1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CanonicalIncludes.h"
12 #include "CodeComplete.h"
13 #include "CodeCompletionStrings.h"
14 #include "ExpectedTypes.h"
15 #include "SourceCode.h"
16 #include "SymbolLocation.h"
17 #include "URI.h"
18 #include "index/Relation.h"
19 #include "index/SymbolID.h"
20 #include "support/Logger.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/DeclBase.h"
23 #include "clang/AST/DeclCXX.h"
24 #include "clang/AST/DeclObjC.h"
25 #include "clang/AST/DeclTemplate.h"
26 #include "clang/Basic/SourceLocation.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/Basic/Specifiers.h"
isLanguageVersionSupported(const LangOptions & LangOpts)29 #include "clang/Index/IndexSymbol.h"
30 #include "clang/Index/IndexingAction.h"
31 #include "clang/Index/USRGeneration.h"
32 #include "clang/Lex/Preprocessor.h"
33 #include "clang/Tooling/Syntax/Tokens.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38 
39 namespace clang {
40 namespace clangd {
41 namespace {
42 
43 /// If \p ND is a template specialization, returns the described template.
44 /// Otherwise, returns \p ND.
45 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
46   if (auto T = ND.getDescribedTemplate())
47     return *T;
48   return ND;
49 }
50 
51 // Checks whether the decl is a private symbol in a header generated by
52 // protobuf compiler.
53 // FIXME: make filtering extensible when there are more use cases for symbol
54 // filters.
55 bool isPrivateProtoDecl(const NamedDecl &ND) {
56   const auto &SM = ND.getASTContext().getSourceManager();
57   if (!isProtoFile(nameLocation(ND, SM), SM))
58     return false;
59 
60   // ND without identifier can be operators.
61   if (ND.getIdentifier() == nullptr)
62     return false;
63   auto Name = ND.getIdentifier()->getName();
64   if (!Name.contains('_'))
65     return false;
66   // Nested proto entities (e.g. Message::Nested) have top-level decls
67   // that shouldn't be used (Message_Nested). Ignore them completely.
68   // The nested entities are dangling type aliases, we may want to reconsider
69   // including them in the future.
70   // For enum constants, SOME_ENUM_CONSTANT is not private and should be
71   // indexed. Outer_INNER is private. This heuristic relies on naming style, it
72   // will include OUTER_INNER and exclude some_enum_constant.
73   // FIXME: the heuristic relies on naming style (i.e. no underscore in
74   // user-defined names) and can be improved.
75   return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
76 }
77 
78 // We only collect #include paths for symbols that are suitable for global code
79 // completion, except for namespaces since #include path for a namespace is hard
80 // to define.
81 bool shouldCollectIncludePath(index::SymbolKind Kind) {
82   using SK = index::SymbolKind;
83   switch (Kind) {
84   case SK::Macro:
85   case SK::Enum:
86   case SK::Struct:
87   case SK::Class:
88   case SK::Union:
89   case SK::TypeAlias:
90   case SK::Using:
91   case SK::Function:
92   case SK::Variable:
93   case SK::EnumConstant:
94     return true;
95   default:
96     return false;
97   }
98 }
99 
100 // Return the symbol range of the token at \p TokLoc.
101 std::pair<SymbolLocation::Position, SymbolLocation::Position>
102 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
103               const LangOptions &LangOpts) {
104   auto CreatePosition = [&SM](SourceLocation Loc) {
105     auto LSPLoc = sourceLocToPosition(SM, Loc);
106     SymbolLocation::Position Pos;
107     Pos.setLine(LSPLoc.line);
108     Pos.setColumn(LSPLoc.character);
109     return Pos;
110   };
111 
112   auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
113   return {CreatePosition(TokLoc),
114           CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
115 }
116 
117 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
118 // its symbol (e.g. a go-to-declaration target). This overrides the default of
119 // using Clang's canonical declaration, which is the first in the TU.
120 //
121 // Example: preferring a class declaration over its forward declaration.
122 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
123   const auto &SM = ND.getASTContext().getSourceManager();
124   if (isa<TagDecl>(ND))
125     return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
126            !isInsideMainFile(ND.getLocation(), SM);
127   if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
128     return ID->isThisDeclarationADefinition();
129   if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
130     return PD->isThisDeclarationADefinition();
131   return false;
132 }
133 
134 RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
135   RefKind Result = RefKind::Unknown;
136   if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
137     Result |= RefKind::Declaration;
138   if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
139     Result |= RefKind::Definition;
140   if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
141     Result |= RefKind::Reference;
142   if (Spelled)
143     Result |= RefKind::Spelled;
144   return Result;
145 }
146 
147 llvm::Optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
148   if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))
149     return RelationKind::BaseOf;
150   if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))
151     return RelationKind::OverriddenBy;
152   return None;
153 }
154 
155 // Given a ref contained in enclosing decl `Enclosing`, return
156 // the decl that should be used as that ref's Ref::Container. This is
157 // usually `Enclosing` itself, but in cases where `Enclosing` is not
158 // indexed, we walk further up because Ref::Container should always be
159 // an indexed symbol.
160 // Note: we don't use DeclContext as the container as in some cases
161 // it's useful to use a Decl which is not a DeclContext. For example,
162 // for a ref occurring in the initializer of a namespace-scope variable,
163 // it's useful to use that variable as the container, as otherwise the
164 // next enclosing DeclContext would be a NamespaceDecl or TranslationUnitDecl,
165 // which are both not indexed and less granular than we'd like for use cases
166 // like call hierarchy.
167 const Decl *getRefContainer(const Decl *Enclosing,
168                             const SymbolCollector::Options &Opts) {
169   while (Enclosing) {
170     const auto *ND = dyn_cast<NamedDecl>(Enclosing);
171     if (ND && SymbolCollector::shouldCollectSymbol(*ND, ND->getASTContext(),
172                                                    Opts, true)) {
173       break;
174     }
175     Enclosing = dyn_cast_or_null<Decl>(Enclosing->getDeclContext());
176   }
177   return Enclosing;
178 }
179 
180 } // namespace
181 
182 // Encapsulates decisions about how to record header paths in the index,
183 // including filename normalization, URI conversion etc.
184 // Expensive checks are cached internally.
185 class SymbolCollector::HeaderFileURICache {
186   // Weird double-indirect access to PP, which might not be ready yet when
187   // HeaderFiles is created but will be by the time it's used.
188   // (IndexDataConsumer::setPreprocessor can happen before or after initialize)
189   const std::shared_ptr<Preprocessor> &PP;
190   const SourceManager &SM;
191   const CanonicalIncludes *Includes;
192   llvm::StringRef FallbackDir;
193   llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;
194   llvm::StringMap<std::string> CachePathToURI;
195   llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;
196 
197 public:
198   HeaderFileURICache(const std::shared_ptr<Preprocessor> &PP,
199                      const SourceManager &SM,
200                      const SymbolCollector::Options &Opts)
201       : PP(PP), SM(SM), Includes(Opts.Includes), FallbackDir(Opts.FallbackDir) {
202   }
203 
204   // Returns a canonical URI for the file \p FE.
205   // We attempt to make the path absolute first.
206   const std::string &toURI(const FileEntry *FE) {
207     auto R = CacheFEToURI.try_emplace(FE);
208     if (R.second) {
209       auto CanonPath = getCanonicalPath(FE, SM);
210       R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE->getName());
211     }
212     return *R.first->second;
213   }
214 
215   // Returns a canonical URI for \p Path.
216   // If the file is in the FileManager, use that to canonicalize the path.
217   // We attempt to make the path absolute in any case.
218   const std::string &toURI(llvm::StringRef Path) {
219     if (auto File = SM.getFileManager().getFile(Path))
220       return toURI(*File);
221     return toURIInternal(Path);
222   }
223 
224   // Gets a canonical include (URI of the header or <header> or "header") for
225   // header of \p FID (which should usually be the *expansion* file).
226   // This does not account for any per-symbol overrides!
227   // Returns "" if includes should not be inserted for this file.
228   llvm::StringRef getIncludeHeader(FileID FID) {
229     auto R = CacheFIDToInclude.try_emplace(FID);
230     if (R.second)
231       R.first->second = getIncludeHeaderUncached(FID);
232     return R.first->second;
233   }
234 
235 private:
236   // This takes care of making paths absolute and path->URI caching, but no
237   // FileManager-based canonicalization.
238   const std::string &toURIInternal(llvm::StringRef Path) {
239     auto R = CachePathToURI.try_emplace(Path);
240     if (R.second) {
241       llvm::SmallString<256> AbsPath = Path;
242       if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())
243         llvm::sys::fs::make_absolute(FallbackDir, AbsPath);
244       assert(llvm::sys::path::is_absolute(AbsPath) &&
245              "If the VFS can't make paths absolute, a FallbackDir must be "
246              "provided");
247       llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
248       R.first->second = URI::create(AbsPath).toString();
249     }
250     return R.first->second;
251   }
252 
253   llvm::StringRef getIncludeHeaderUncached(FileID FID) {
254     const FileEntry *FE = SM.getFileEntryForID(FID);
255     if (!FE || FE->getName().empty())
256       return "";
257     llvm::StringRef Filename = FE->getName();
258     // If a file is mapped by canonical headers, use that mapping, regardless
259     // of whether it's an otherwise-good header (header guards etc).
260     if (Includes) {
261       llvm::StringRef Canonical = Includes->mapHeader(Filename);
262       if (!Canonical.empty()) {
263         // If we had a mapping, always use it.
264         if (Canonical.startswith("<") || Canonical.startswith("\""))
265           return Canonical;
266         return toURI(Canonical);
267       }
268     }
269     if (!isSelfContainedHeader(FID, FE)) {
270       // A .inc or .def file is often included into a real header to define
271       // symbols (e.g. LLVM tablegen files).
272       if (Filename.endswith(".inc") || Filename.endswith(".def"))
273         // Don't use cache reentrantly due to iterator invalidation.
274         return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));
275       // Conservatively refuse to insert #includes to files without guards.
276       return "";
277     }
278     // Standard case: just insert the file itself.
279     return toURI(FE);
280   }
281 
282   bool isSelfContainedHeader(FileID FID, const FileEntry *FE) {
283     // FIXME: Should files that have been #import'd be considered
284     // self-contained? That's really a property of the includer,
285     // not of the file.
286     if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
287         !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
288       return false;
289     // This pattern indicates that a header can't be used without
290     // particular preprocessor state, usually set up by another header.
291     if (isDontIncludeMeHeader(SM.getBufferData(FID)))
292       return false;
293     return true;
294   }
295 
296   // Is Line an #if or #ifdef directive?
297   static bool isIf(llvm::StringRef Line) {
298     Line = Line.ltrim();
299     if (!Line.consume_front("#"))
300       return false;
301     Line = Line.ltrim();
302     return Line.startswith("if");
303   }
304 
305   // Is Line an #error directive mentioning includes?
306   static bool isErrorAboutInclude(llvm::StringRef Line) {
307     Line = Line.ltrim();
308     if (!Line.consume_front("#"))
309       return false;
310     Line = Line.ltrim();
311     if (!Line.startswith("error"))
312       return false;
313     return Line.contains_insensitive(
314         "includ"); // Matches "include" or "including".
315   }
316 
317   // Heuristically headers that only want to be included via an umbrella.
318   static bool isDontIncludeMeHeader(llvm::StringRef Content) {
319     llvm::StringRef Line;
320     // Only sniff up to 100 lines or 10KB.
321     Content = Content.take_front(100 * 100);
322     for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
323       std::tie(Line, Content) = Content.split('\n');
324       if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
325         return true;
326     }
327     return false;
328   }
329 };
330 
331 // Return the symbol location of the token at \p TokLoc.
332 llvm::Optional<SymbolLocation>
333 SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
334   const auto &SM = ASTCtx->getSourceManager();
335   auto *FE = SM.getFileEntryForID(SM.getFileID(TokLoc));
336   if (!FE)
337     return None;
338 
339   SymbolLocation Result;
340   Result.FileURI = HeaderFileURIs->toURI(FE).c_str();
341   auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());
342   Result.Start = Range.first;
343   Result.End = Range.second;
344 
345   return Result;
346 }
347 
348 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
349 SymbolCollector::~SymbolCollector() = default;
350 
351 void SymbolCollector::initialize(ASTContext &Ctx) {
352   ASTCtx = &Ctx;
353   HeaderFileURIs = std::make_unique<HeaderFileURICache>(
354       PP, ASTCtx->getSourceManager(), Opts);
355   CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
356   CompletionTUInfo =
357       std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
358 }
359 
360 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
361                                           const ASTContext &ASTCtx,
362                                           const Options &Opts,
363                                           bool IsMainFileOnly) {
364   // Skip anonymous declarations, e.g (anonymous enum/class/struct).
365   if (ND.getDeclName().isEmpty())
366     return false;
367 
368   // Skip main-file symbols if we are not collecting them.
369   if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
370     return false;
371 
372   // Skip symbols in anonymous namespaces in header files.
373   if (!IsMainFileOnly && ND.isInAnonymousNamespace())
374     return false;
375 
376   // For function local symbols, index only classes and its member functions.
377   if (index::isFunctionLocalSymbol(&ND))
378     return isa<RecordDecl>(ND) ||
379            (ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate());
380 
381   // We want most things but not "local" symbols such as symbols inside
382   // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
383   // FIXME: Need a matcher for ExportDecl in order to include symbols declared
384   // within an export.
385   const auto *DeclCtx = ND.getDeclContext();
386   switch (DeclCtx->getDeclKind()) {
387   case Decl::TranslationUnit:
388   case Decl::Namespace:
389   case Decl::LinkageSpec:
390   case Decl::Enum:
391   case Decl::ObjCProtocol:
392   case Decl::ObjCInterface:
393   case Decl::ObjCCategory:
394   case Decl::ObjCCategoryImpl:
395   case Decl::ObjCImplementation:
396     break;
397   default:
398     // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
399     // easier to cast.
400     if (!isa<RecordDecl>(DeclCtx))
401       return false;
402   }
403 
404   // Avoid indexing internal symbols in protobuf generated headers.
405   if (isPrivateProtoDecl(ND))
406     return false;
407   return true;
408 }
409 
410 // Always return true to continue indexing.
411 bool SymbolCollector::handleDeclOccurrence(
412     const Decl *D, index::SymbolRoleSet Roles,
413     llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
414     index::IndexDataConsumer::ASTNodeInfo ASTNode) {
415   assert(ASTCtx && PP.get() && HeaderFileURIs);
416   assert(CompletionAllocator && CompletionTUInfo);
417   assert(ASTNode.OrigD);
418   // Indexing API puts canonical decl into D, which might not have a valid
419   // source location for implicit/built-in decls. Fallback to original decl in
420   // such cases.
421   if (D->getLocation().isInvalid())
422     D = ASTNode.OrigD;
423   // If OrigD is an declaration associated with a friend declaration and it's
424   // not a definition, skip it. Note that OrigD is the occurrence that the
425   // collector is currently visiting.
426   if ((ASTNode.OrigD->getFriendObjectKind() !=
427        Decl::FriendObjectKind::FOK_None) &&
428       !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
429     return true;
430   // A declaration created for a friend declaration should not be used as the
431   // canonical declaration in the index. Use OrigD instead, unless we've already
432   // picked a replacement for D
433   if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
434     D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
435   // Flag to mark that D should be considered canonical meaning its declaration
436   // will override any previous declaration for the Symbol.
437   bool DeclIsCanonical = false;
438   // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
439   // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
440   if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
441     DeclIsCanonical = true;
442     if (const auto *CID = IID->getClassInterface())
443       if (const auto *DD = CID->getDefinition())
444         if (!DD->isImplicitInterfaceDecl())
445           D = DD;
446   }
447   // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
448   // its ObjCCategoryDecl if it has one.
449   if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
450     DeclIsCanonical = true;
451     if (const auto *CD = CID->getCategoryDecl())
452       D = CD;
453   }
454   const NamedDecl *ND = dyn_cast<NamedDecl>(D);
455   if (!ND)
456     return true;
457 
458   // Mark D as referenced if this is a reference coming from the main file.
459   // D may not be an interesting symbol, but it's cheaper to check at the end.
460   auto &SM = ASTCtx->getSourceManager();
461   if (Opts.CountReferences &&
462       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
463       SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
464     ReferencedDecls.insert(ND);
465 
466   auto ID = getSymbolID(ND);
467   if (!ID)
468     return true;
469 
470   // ND is the canonical (i.e. first) declaration. If it's in the main file
471   // (which is not a header), then no public declaration was visible, so assume
472   // it's main-file only.
473   bool IsMainFileOnly =
474       SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
475       !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
476                     ASTCtx->getLangOpts());
477   // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
478   if (ASTNode.OrigD->isImplicit() ||
479       !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
480     return true;
481 
482   // Note: we need to process relations for all decl occurrences, including
483   // refs, because the indexing code only populates relations for specific
484   // occurrences. For example, RelationBaseOf is only populated for the
485   // occurrence inside the base-specifier.
486   processRelations(*ND, ID, Relations);
487 
488   bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
489   bool IsOnlyRef =
490       !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
491                  static_cast<unsigned>(index::SymbolRole::Definition)));
492 
493   if (IsOnlyRef && !CollectRef)
494     return true;
495 
496   // Unlike other fields, e.g. Symbols (which use spelling locations), we use
497   // file locations for references (as it aligns the behavior of clangd's
498   // AST-based xref).
499   // FIXME: we should try to use the file locations for other fields.
500   if (CollectRef &&
501       (!IsMainFileOnly || Opts.CollectMainFileRefs ||
502        ND->isExternallyVisible()) &&
503       !isa<NamespaceDecl>(ND) &&
504       (Opts.RefsInHeaders ||
505        SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
506     DeclRefs[ND].push_back(SymbolRef{SM.getFileLoc(Loc), Roles,
507                                      getRefContainer(ASTNode.Parent, Opts)});
508   // Don't continue indexing if this is a mere reference.
509   if (IsOnlyRef)
510     return true;
511 
512   // FIXME: ObjCPropertyDecl are not properly indexed here:
513   // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
514   // not a NamedDecl.
515   auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
516   if (!OriginalDecl)
517     return true;
518 
519   const Symbol *BasicSymbol = Symbols.find(ID);
520   if (isPreferredDeclaration(*OriginalDecl, Roles))
521     // If OriginalDecl is preferred, replace/create the existing canonical
522     // declaration (e.g. a class forward declaration). There should be at most
523     // one duplicate as we expect to see only one preferred declaration per
524     // TU, because in practice they are definitions.
525     BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
526   else if (!BasicSymbol || DeclIsCanonical)
527     BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
528 
529   if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
530     addDefinition(*OriginalDecl, *BasicSymbol);
531 
532   return true;
533 }
534 
535 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
536   assert(HeaderFileURIs && PP.get());
537   const auto &SM = PP->getSourceManager();
538   const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
539   assert(MainFileEntry);
540 
541   const std::string &MainFileURI = HeaderFileURIs->toURI(MainFileEntry);
542   // Add macro references.
543   for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
544     for (const auto &MacroRef : IDToRefs.second) {
545       const auto &Range = MacroRef.Rng;
546       bool IsDefinition = MacroRef.IsDefinition;
547       Ref R;
548       R.Location.Start.setLine(Range.start.line);
549       R.Location.Start.setColumn(Range.start.character);
550       R.Location.End.setLine(Range.end.line);
551       R.Location.End.setColumn(Range.end.character);
552       R.Location.FileURI = MainFileURI.c_str();
553       R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference;
554       Refs.insert(IDToRefs.first, R);
555       if (IsDefinition) {
556         Symbol S;
557         S.ID = IDToRefs.first;
558         auto StartLoc = cantFail(sourceLocationInMainFile(SM, Range.start));
559         auto EndLoc = cantFail(sourceLocationInMainFile(SM, Range.end));
560         S.Name = toSourceCode(SM, SourceRange(StartLoc, EndLoc));
561         S.SymInfo.Kind = index::SymbolKind::Macro;
562         S.SymInfo.SubKind = index::SymbolSubKind::None;
563         S.SymInfo.Properties = index::SymbolPropertySet();
564         S.SymInfo.Lang = index::SymbolLanguage::C;
565         S.Origin = Opts.Origin;
566         S.CanonicalDeclaration = R.Location;
567         Symbols.insert(S);
568       }
569     }
570   }
571 }
572 
573 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
574                                             const MacroInfo *MI,
575                                             index::SymbolRoleSet Roles,
576                                             SourceLocation Loc) {
577   assert(PP.get());
578   // Builtin macros don't have useful locations and aren't needed in completion.
579   if (MI->isBuiltinMacro())
580     return true;
581 
582   const auto &SM = PP->getSourceManager();
583   auto DefLoc = MI->getDefinitionLoc();
584   // Also avoid storing predefined macros like __DBL_MIN__.
585   if (SM.isWrittenInBuiltinFile(DefLoc) ||
586       Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")
587     return true;
588 
589   auto ID = getSymbolID(Name->getName(), MI, SM);
590   if (!ID)
591     return true;
592 
593   auto SpellingLoc = SM.getSpellingLoc(Loc);
594   bool IsMainFileOnly =
595       SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
596       !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
597                     ASTCtx->getLangOpts());
598   // Do not store references to main-file macros.
599   if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
600       (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
601     // FIXME: Populate container information for macro references.
602     MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr});
603 
604   // Collect symbols.
605   if (!Opts.CollectMacro)
606     return true;
607 
608   // Skip main-file macros if we are not collecting them.
609   if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
610     return false;
611 
612   // Mark the macro as referenced if this is a reference coming from the main
613   // file. The macro may not be an interesting symbol, but it's cheaper to check
614   // at the end.
615   if (Opts.CountReferences &&
616       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
617       SM.getFileID(SpellingLoc) == SM.getMainFileID())
618     ReferencedMacros.insert(Name);
619 
620   // Don't continue indexing if this is a mere reference.
621   // FIXME: remove macro with ID if it is undefined.
622   if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
623         Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
624     return true;
625 
626   // Only collect one instance in case there are multiple.
627   if (Symbols.find(ID) != nullptr)
628     return true;
629 
630   Symbol S;
631   S.ID = std::move(ID);
632   S.Name = Name->getName();
633   if (!IsMainFileOnly) {
634     S.Flags |= Symbol::IndexedForCodeCompletion;
635     S.Flags |= Symbol::VisibleOutsideFile;
636   }
637   S.SymInfo = index::getSymbolInfoForMacro(*MI);
638   S.Origin = Opts.Origin;
639   // FIXME: use the result to filter out symbols.
640   shouldIndexFile(SM.getFileID(Loc));
641   if (auto DeclLoc = getTokenLocation(DefLoc))
642     S.CanonicalDeclaration = *DeclLoc;
643 
644   CodeCompletionResult SymbolCompletion(Name);
645   const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
646       *PP, *CompletionAllocator, *CompletionTUInfo);
647   std::string Signature;
648   std::string SnippetSuffix;
649   getSignature(*CCS, &Signature, &SnippetSuffix);
650   S.Signature = Signature;
651   S.CompletionSnippetSuffix = SnippetSuffix;
652 
653   IndexedMacros.insert(Name);
654   setIncludeLocation(S, DefLoc);
655   Symbols.insert(S);
656   return true;
657 }
658 
659 void SymbolCollector::processRelations(
660     const NamedDecl &ND, const SymbolID &ID,
661     ArrayRef<index::SymbolRelation> Relations) {
662   for (const auto &R : Relations) {
663     auto RKind = indexableRelation(R);
664     if (!RKind)
665       continue;
666     const Decl *Object = R.RelatedSymbol;
667 
668     auto ObjectID = getSymbolID(Object);
669     if (!ObjectID)
670       continue;
671 
672     // Record the relation.
673     // TODO: There may be cases where the object decl is not indexed for some
674     // reason. Those cases should probably be removed in due course, but for
675     // now there are two possible ways to handle it:
676     //   (A) Avoid storing the relation in such cases.
677     //   (B) Store it anyways. Clients will likely lookup() the SymbolID
678     //       in the index and find nothing, but that's a situation they
679     //       probably need to handle for other reasons anyways.
680     // We currently do (B) because it's simpler.
681     if (*RKind == RelationKind::BaseOf)
682       this->Relations.insert({ID, *RKind, ObjectID});
683     else if (*RKind == RelationKind::OverriddenBy)
684       this->Relations.insert({ObjectID, *RKind, ID});
685   }
686 }
687 
688 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
689   if (Opts.CollectIncludePath)
690     if (shouldCollectIncludePath(S.SymInfo.Kind))
691       // Use the expansion location to get the #include header since this is
692       // where the symbol is exposed.
693       IncludeFiles[S.ID] =
694           PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
695 }
696 
697 void SymbolCollector::finish() {
698   // At the end of the TU, add 1 to the refcount of all referenced symbols.
699   auto IncRef = [this](const SymbolID &ID) {
700     if (const auto *S = Symbols.find(ID)) {
701       Symbol Inc = *S;
702       ++Inc.References;
703       Symbols.insert(Inc);
704     }
705   };
706   for (const NamedDecl *ND : ReferencedDecls) {
707     if (auto ID = getSymbolID(ND)) {
708       IncRef(ID);
709     }
710   }
711   if (Opts.CollectMacro) {
712     assert(PP);
713     // First, drop header guards. We can't identify these until EOF.
714     for (const IdentifierInfo *II : IndexedMacros) {
715       if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
716         if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
717           if (MI->isUsedForHeaderGuard())
718             Symbols.erase(ID);
719     }
720     // Now increment refcounts.
721     for (const IdentifierInfo *II : ReferencedMacros) {
722       if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
723         if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
724           IncRef(ID);
725     }
726   }
727   // Fill in IncludeHeaders.
728   // We delay this until end of TU so header guards are all resolved.
729   llvm::SmallString<128> QName;
730   for (const auto &Entry : IncludeFiles) {
731     if (const Symbol *S = Symbols.find(Entry.first)) {
732       llvm::StringRef IncludeHeader;
733       // Look for an overridden include header for this symbol specifically.
734       if (Opts.Includes) {
735         QName = S->Scope;
736         QName.append(S->Name);
737         IncludeHeader = Opts.Includes->mapSymbol(QName);
738         if (!IncludeHeader.empty()) {
739           if (IncludeHeader.front() != '"' && IncludeHeader.front() != '<')
740             IncludeHeader = HeaderFileURIs->toURI(IncludeHeader);
741           else if (IncludeHeader == "<utility>" && QName == "std::move" &&
742                    S->Signature.contains(','))
743             IncludeHeader = "<algorithm>";
744         }
745       }
746       // Otherwise find the approprate include header for the defining file.
747       if (IncludeHeader.empty())
748         IncludeHeader = HeaderFileURIs->getIncludeHeader(Entry.second);
749 
750       // Symbols in slabs aren't mutable, insert() has to walk all the strings
751       if (!IncludeHeader.empty()) {
752         Symbol NewSym = *S;
753         NewSym.IncludeHeaders.push_back({IncludeHeader, 1});
754         Symbols.insert(NewSym);
755       }
756     }
757   }
758 
759   const auto &SM = ASTCtx->getSourceManager();
760   auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
761                         bool Spelled = false) {
762     auto FileID = SM.getFileID(LocAndRole.Loc);
763     // FIXME: use the result to filter out references.
764     shouldIndexFile(FileID);
765     if (const auto *FE = SM.getFileEntryForID(FileID)) {
766       auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
767       Ref R;
768       R.Location.Start = Range.first;
769       R.Location.End = Range.second;
770       R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str();
771       R.Kind = toRefKind(LocAndRole.Roles, Spelled);
772       R.Container = getSymbolID(LocAndRole.Container);
773       Refs.insert(ID, R);
774     }
775   };
776   // Populate Refs slab from MacroRefs.
777   // FIXME: All MacroRefs are marked as Spelled now, but this should be checked.
778   for (const auto &IDAndRefs : MacroRefs)
779     for (const auto &LocAndRole : IDAndRefs.second)
780       CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true);
781   // Populate Refs slab from DeclRefs.
782   llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
783   for (auto &DeclAndRef : DeclRefs) {
784     if (auto ID = getSymbolID(DeclAndRef.first)) {
785       for (auto &LocAndRole : DeclAndRef.second) {
786         const auto FileID = SM.getFileID(LocAndRole.Loc);
787         // FIXME: It's better to use TokenBuffer by passing spelled tokens from
788         // the caller of SymbolCollector.
789         if (!FilesToTokensCache.count(FileID))
790           FilesToTokensCache[FileID] =
791               syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
792         llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
793         // Check if the referenced symbol is spelled exactly the same way the
794         // corresponding NamedDecl is. If it is, mark this reference as spelled.
795         const auto *IdentifierToken =
796             spelledIdentifierTouching(LocAndRole.Loc, Tokens);
797         DeclarationName Name = DeclAndRef.first->getDeclName();
798         const auto NameKind = Name.getNameKind();
799         bool IsTargetKind = NameKind == DeclarationName::Identifier ||
800                             NameKind == DeclarationName::CXXConstructorName;
801         bool Spelled = IdentifierToken && IsTargetKind &&
802                        Name.getAsString() == IdentifierToken->text(SM);
803         CollectRef(ID, LocAndRole, Spelled);
804       }
805     }
806   }
807 
808   ReferencedDecls.clear();
809   ReferencedMacros.clear();
810   DeclRefs.clear();
811   IncludeFiles.clear();
812 }
813 
814 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
815                                               bool IsMainFileOnly) {
816   auto &Ctx = ND.getASTContext();
817   auto &SM = Ctx.getSourceManager();
818 
819   Symbol S;
820   S.ID = std::move(ID);
821   std::string QName = printQualifiedName(ND);
822   // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
823   // for consistency with CodeCompletionString and a clean name/signature split.
824   std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
825   std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
826   S.TemplateSpecializationArgs = TemplateSpecializationArgs;
827 
828   // We collect main-file symbols, but do not use them for code completion.
829   if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
830     S.Flags |= Symbol::IndexedForCodeCompletion;
831   if (isImplementationDetail(&ND))
832     S.Flags |= Symbol::ImplementationDetail;
833   if (!IsMainFileOnly)
834     S.Flags |= Symbol::VisibleOutsideFile;
835   S.SymInfo = index::getSymbolInfo(&ND);
836   auto Loc = nameLocation(ND, SM);
837   assert(Loc.isValid() && "Invalid source location for NamedDecl");
838   // FIXME: use the result to filter out symbols.
839   shouldIndexFile(SM.getFileID(Loc));
840   if (auto DeclLoc = getTokenLocation(Loc))
841     S.CanonicalDeclaration = *DeclLoc;
842 
843   S.Origin = Opts.Origin;
844   if (ND.getAvailability() == AR_Deprecated)
845     S.Flags |= Symbol::Deprecated;
846 
847   // Add completion info.
848   // FIXME: we may want to choose a different redecl, or combine from several.
849   assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
850   // We use the primary template, as clang does during code completion.
851   CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
852   const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
853       *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
854       *CompletionTUInfo,
855       /*IncludeBriefComments*/ false);
856   std::string Documentation =
857       formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
858                                               /*CommentsFromHeaders=*/true));
859   if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
860     if (Opts.StoreAllDocumentation)
861       S.Documentation = Documentation;
862     Symbols.insert(S);
863     return Symbols.find(S.ID);
864   }
865   S.Documentation = Documentation;
866   std::string Signature;
867   std::string SnippetSuffix;
868   getSignature(*CCS, &Signature, &SnippetSuffix);
869   S.Signature = Signature;
870   S.CompletionSnippetSuffix = SnippetSuffix;
871   std::string ReturnType = getReturnType(*CCS);
872   S.ReturnType = ReturnType;
873 
874   llvm::Optional<OpaqueType> TypeStorage;
875   if (S.Flags & Symbol::IndexedForCodeCompletion) {
876     TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
877     if (TypeStorage)
878       S.Type = TypeStorage->raw();
879   }
880 
881   Symbols.insert(S);
882   setIncludeLocation(S, ND.getLocation());
883   return Symbols.find(S.ID);
884 }
885 
886 void SymbolCollector::addDefinition(const NamedDecl &ND,
887                                     const Symbol &DeclSym) {
888   if (DeclSym.Definition)
889     return;
890   // If we saw some forward declaration, we end up copying the symbol.
891   // This is not ideal, but avoids duplicating the "is this a definition" check
892   // in clang::index. We should only see one definition.
893   Symbol S = DeclSym;
894   const auto &SM = ND.getASTContext().getSourceManager();
895   auto Loc = nameLocation(ND, SM);
896   // FIXME: use the result to filter out symbols.
897   shouldIndexFile(SM.getFileID(Loc));
898   if (auto DefLoc = getTokenLocation(Loc))
899     S.Definition = *DefLoc;
900   Symbols.insert(S);
901 }
902 
903 bool SymbolCollector::shouldIndexFile(FileID FID) {
904   if (!Opts.FileFilter)
905     return true;
906   auto I = FilesToIndexCache.try_emplace(FID);
907   if (I.second)
908     I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
909   return I.first->second;
910 }
911 
912 } // namespace clangd
913 } // namespace clang
914