1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CanonicalIncludes.h"
12 #include "CodeComplete.h"
13 #include "CodeCompletionStrings.h"
14 #include "ExpectedTypes.h"
15 #include "SourceCode.h"
16 #include "SymbolLocation.h"
17 #include "URI.h"
18 #include "index/SymbolID.h"
19 #include "support/Logger.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/DeclBase.h"
22 #include "clang/AST/DeclCXX.h"
23 #include "clang/AST/DeclObjC.h"
24 #include "clang/AST/DeclTemplate.h"
25 #include "clang/Basic/SourceLocation.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/Basic/Specifiers.h"
28 #include "clang/Index/IndexSymbol.h"
29 #include "clang/Index/IndexingAction.h"
30 #include "clang/Index/USRGeneration.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Tooling/Syntax/Tokens.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/FileSystem.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/Path.h"
37 
38 namespace clang {
39 namespace clangd {
40 namespace {
41 
42 /// If \p ND is a template specialization, returns the described template.
43 /// Otherwise, returns \p ND.
getTemplateOrThis(const NamedDecl & ND)44 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
45   if (auto T = ND.getDescribedTemplate())
46     return *T;
47   return ND;
48 }
49 
50 // Returns a URI of \p Path. Firstly, this makes the \p Path absolute using the
51 // current working directory of the given SourceManager if the Path is not an
52 // absolute path. If failed, this resolves relative paths against \p FallbackDir
53 // to get an absolute path. Then, this tries creating an URI for the absolute
54 // path with schemes specified in \p Opts. This returns an URI with the first
55 // working scheme, if there is any; otherwise, this returns None.
56 //
57 // The Path can be a path relative to the build directory, or retrieved from
58 // the SourceManager.
toURI(const SourceManager & SM,llvm::StringRef Path,const SymbolCollector::Options & Opts)59 std::string toURI(const SourceManager &SM, llvm::StringRef Path,
60                   const SymbolCollector::Options &Opts) {
61   llvm::SmallString<128> AbsolutePath(Path);
62   if (auto File = SM.getFileManager().getFile(Path)) {
63     if (auto CanonPath = getCanonicalPath(*File, SM)) {
64       AbsolutePath = *CanonPath;
65     }
66   }
67   // We don't perform is_absolute check in an else branch because makeAbsolute
68   // might return a relative path on some InMemoryFileSystems.
69   if (!llvm::sys::path::is_absolute(AbsolutePath) && !Opts.FallbackDir.empty())
70     llvm::sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath);
71   llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true);
72   return URI::create(AbsolutePath).toString();
73 }
74 
75 // Checks whether the decl is a private symbol in a header generated by
76 // protobuf compiler.
77 // FIXME: make filtering extensible when there are more use cases for symbol
78 // filters.
isPrivateProtoDecl(const NamedDecl & ND)79 bool isPrivateProtoDecl(const NamedDecl &ND) {
80   const auto &SM = ND.getASTContext().getSourceManager();
81   if (!isProtoFile(nameLocation(ND, SM), SM))
82     return false;
83 
84   // ND without identifier can be operators.
85   if (ND.getIdentifier() == nullptr)
86     return false;
87   auto Name = ND.getIdentifier()->getName();
88   if (!Name.contains('_'))
89     return false;
90   // Nested proto entities (e.g. Message::Nested) have top-level decls
91   // that shouldn't be used (Message_Nested). Ignore them completely.
92   // The nested entities are dangling type aliases, we may want to reconsider
93   // including them in the future.
94   // For enum constants, SOME_ENUM_CONSTANT is not private and should be
95   // indexed. Outer_INNER is private. This heuristic relies on naming style, it
96   // will include OUTER_INNER and exclude some_enum_constant.
97   // FIXME: the heuristic relies on naming style (i.e. no underscore in
98   // user-defined names) and can be improved.
99   return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
100 }
101 
102 // We only collect #include paths for symbols that are suitable for global code
103 // completion, except for namespaces since #include path for a namespace is hard
104 // to define.
shouldCollectIncludePath(index::SymbolKind Kind)105 bool shouldCollectIncludePath(index::SymbolKind Kind) {
106   using SK = index::SymbolKind;
107   switch (Kind) {
108   case SK::Macro:
109   case SK::Enum:
110   case SK::Struct:
111   case SK::Class:
112   case SK::Union:
113   case SK::TypeAlias:
114   case SK::Using:
115   case SK::Function:
116   case SK::Variable:
117   case SK::EnumConstant:
118     return true;
119   default:
120     return false;
121   }
122 }
123 
124 // Return the symbol range of the token at \p TokLoc.
125 std::pair<SymbolLocation::Position, SymbolLocation::Position>
getTokenRange(SourceLocation TokLoc,const SourceManager & SM,const LangOptions & LangOpts)126 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
127               const LangOptions &LangOpts) {
128   auto CreatePosition = [&SM](SourceLocation Loc) {
129     auto LSPLoc = sourceLocToPosition(SM, Loc);
130     SymbolLocation::Position Pos;
131     Pos.setLine(LSPLoc.line);
132     Pos.setColumn(LSPLoc.character);
133     return Pos;
134   };
135 
136   auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
137   return {CreatePosition(TokLoc),
138           CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
139 }
140 
141 // Return the symbol location of the token at \p TokLoc.
142 llvm::Optional<SymbolLocation>
getTokenLocation(SourceLocation TokLoc,const SourceManager & SM,const SymbolCollector::Options & Opts,const clang::LangOptions & LangOpts,std::string & FileURIStorage)143 getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
144                  const SymbolCollector::Options &Opts,
145                  const clang::LangOptions &LangOpts,
146                  std::string &FileURIStorage) {
147   auto Path = SM.getFilename(TokLoc);
148   if (Path.empty())
149     return None;
150   FileURIStorage = toURI(SM, Path, Opts);
151   SymbolLocation Result;
152   Result.FileURI = FileURIStorage.c_str();
153   auto Range = getTokenRange(TokLoc, SM, LangOpts);
154   Result.Start = Range.first;
155   Result.End = Range.second;
156 
157   return Result;
158 }
159 
160 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
161 // its symbol (e.g. a go-to-declaration target). This overrides the default of
162 // using Clang's canonical declaration, which is the first in the TU.
163 //
164 // Example: preferring a class declaration over its forward declaration.
isPreferredDeclaration(const NamedDecl & ND,index::SymbolRoleSet Roles)165 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
166   const auto &SM = ND.getASTContext().getSourceManager();
167   if (isa<TagDecl>(ND))
168     return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
169            !isInsideMainFile(ND.getLocation(), SM);
170   if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
171     return ID->isThisDeclarationADefinition();
172   if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
173     return PD->isThisDeclarationADefinition();
174   return false;
175 }
176 
toRefKind(index::SymbolRoleSet Roles,bool Spelled=false)177 RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
178   RefKind Result = RefKind::Unknown;
179   if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
180     Result |= RefKind::Declaration;
181   if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
182     Result |= RefKind::Definition;
183   if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
184     Result |= RefKind::Reference;
185   if (Spelled)
186     Result |= RefKind::Spelled;
187   return Result;
188 }
189 
shouldIndexRelation(const index::SymbolRelation & R)190 bool shouldIndexRelation(const index::SymbolRelation &R) {
191   // We currently only index BaseOf relations, for type hierarchy subtypes.
192   return R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf);
193 }
194 
195 } // namespace
196 
SymbolCollector(Options Opts)197 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
198 
initialize(ASTContext & Ctx)199 void SymbolCollector::initialize(ASTContext &Ctx) {
200   ASTCtx = &Ctx;
201   CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
202   CompletionTUInfo =
203       std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
204 }
205 
shouldCollectSymbol(const NamedDecl & ND,const ASTContext & ASTCtx,const Options & Opts,bool IsMainFileOnly)206 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
207                                           const ASTContext &ASTCtx,
208                                           const Options &Opts,
209                                           bool IsMainFileOnly) {
210   // Skip anonymous declarations, e.g (anonymous enum/class/struct).
211   if (ND.getDeclName().isEmpty())
212     return false;
213 
214   // Skip main-file symbols if we are not collecting them.
215   if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
216     return false;
217 
218   // Skip symbols in anonymous namespaces in header files.
219   if (!IsMainFileOnly && ND.isInAnonymousNamespace())
220     return false;
221 
222   // We want most things but not "local" symbols such as symbols inside
223   // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
224   // FIXME: Need a matcher for ExportDecl in order to include symbols declared
225   // within an export.
226   const auto *DeclCtx = ND.getDeclContext();
227   switch (DeclCtx->getDeclKind()) {
228   case Decl::TranslationUnit:
229   case Decl::Namespace:
230   case Decl::LinkageSpec:
231   case Decl::Enum:
232   case Decl::ObjCProtocol:
233   case Decl::ObjCInterface:
234   case Decl::ObjCCategory:
235   case Decl::ObjCCategoryImpl:
236   case Decl::ObjCImplementation:
237     break;
238   default:
239     // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
240     // easier to cast.
241     if (!isa<RecordDecl>(DeclCtx))
242       return false;
243   }
244 
245   // Avoid indexing internal symbols in protobuf generated headers.
246   if (isPrivateProtoDecl(ND))
247     return false;
248   return true;
249 }
250 
251 // Always return true to continue indexing.
handleDeclOccurrence(const Decl * D,index::SymbolRoleSet Roles,llvm::ArrayRef<index::SymbolRelation> Relations,SourceLocation Loc,index::IndexDataConsumer::ASTNodeInfo ASTNode)252 bool SymbolCollector::handleDeclOccurrence(
253     const Decl *D, index::SymbolRoleSet Roles,
254     llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
255     index::IndexDataConsumer::ASTNodeInfo ASTNode) {
256   assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
257   assert(CompletionAllocator && CompletionTUInfo);
258   assert(ASTNode.OrigD);
259   // Indexing API puts canonical decl into D, which might not have a valid
260   // source location for implicit/built-in decls. Fallback to original decl in
261   // such cases.
262   if (D->getLocation().isInvalid())
263     D = ASTNode.OrigD;
264   // If OrigD is an declaration associated with a friend declaration and it's
265   // not a definition, skip it. Note that OrigD is the occurrence that the
266   // collector is currently visiting.
267   if ((ASTNode.OrigD->getFriendObjectKind() !=
268        Decl::FriendObjectKind::FOK_None) &&
269       !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
270     return true;
271   // A declaration created for a friend declaration should not be used as the
272   // canonical declaration in the index. Use OrigD instead, unless we've already
273   // picked a replacement for D
274   if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
275     D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
276   // Flag to mark that D should be considered canonical meaning its declaration
277   // will override any previous declaration for the Symbol.
278   bool DeclIsCanonical = false;
279   // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
280   // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
281   if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
282     DeclIsCanonical = true;
283     if (const auto *CID = IID->getClassInterface())
284       if (const auto *DD = CID->getDefinition())
285         if (!DD->isImplicitInterfaceDecl())
286           D = DD;
287   }
288   // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
289   // its ObjCCategoryDecl if it has one.
290   if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
291     DeclIsCanonical = true;
292     if (const auto *CD = CID->getCategoryDecl())
293       D = CD;
294   }
295   const NamedDecl *ND = dyn_cast<NamedDecl>(D);
296   if (!ND)
297     return true;
298 
299   // Mark D as referenced if this is a reference coming from the main file.
300   // D may not be an interesting symbol, but it's cheaper to check at the end.
301   auto &SM = ASTCtx->getSourceManager();
302   if (Opts.CountReferences &&
303       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
304       SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
305     ReferencedDecls.insert(ND);
306 
307   auto ID = getSymbolID(ND);
308   if (!ID)
309     return true;
310 
311   // ND is the canonical (i.e. first) declaration. If it's in the main file
312   // (which is not a header), then no public declaration was visible, so assume
313   // it's main-file only.
314   bool IsMainFileOnly =
315       SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
316       !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
317                     ASTCtx->getLangOpts());
318   // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
319   if (ASTNode.OrigD->isImplicit() ||
320       !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
321     return true;
322 
323   // Note: we need to process relations for all decl occurrences, including
324   // refs, because the indexing code only populates relations for specific
325   // occurrences. For example, RelationBaseOf is only populated for the
326   // occurrence inside the base-specifier.
327   processRelations(*ND, ID, Relations);
328 
329   bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
330   bool IsOnlyRef =
331       !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
332                  static_cast<unsigned>(index::SymbolRole::Definition)));
333 
334   if (IsOnlyRef && !CollectRef)
335     return true;
336 
337   // Unlike other fields, e.g. Symbols (which use spelling locations), we use
338   // file locations for references (as it aligns the behavior of clangd's
339   // AST-based xref).
340   // FIXME: we should try to use the file locations for other fields.
341   if (CollectRef &&
342       (!IsMainFileOnly || Opts.CollectMainFileRefs ||
343        ND->isExternallyVisible()) &&
344       !isa<NamespaceDecl>(ND) &&
345       (Opts.RefsInHeaders ||
346        SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
347     DeclRefs[ND].push_back(
348         SymbolRef{SM.getFileLoc(Loc), Roles, ASTNode.Parent});
349   // Don't continue indexing if this is a mere reference.
350   if (IsOnlyRef)
351     return true;
352 
353   // FIXME: ObjCPropertyDecl are not properly indexed here:
354   // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
355   // not a NamedDecl.
356   auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
357   if (!OriginalDecl)
358     return true;
359 
360   const Symbol *BasicSymbol = Symbols.find(ID);
361   if (isPreferredDeclaration(*OriginalDecl, Roles))
362     // If OriginalDecl is preferred, replace/create the existing canonical
363     // declaration (e.g. a class forward declaration). There should be at most
364     // one duplicate as we expect to see only one preferred declaration per
365     // TU, because in practice they are definitions.
366     BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
367   else if (!BasicSymbol || DeclIsCanonical)
368     BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
369 
370   if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
371     addDefinition(*OriginalDecl, *BasicSymbol);
372 
373   return true;
374 }
375 
handleMacros(const MainFileMacros & MacroRefsToIndex)376 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
377   assert(PP.get());
378   const auto &SM = PP->getSourceManager();
379   const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
380   assert(MainFileEntry);
381 
382   const auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts);
383   // Add macro references.
384   for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
385     for (const auto &Range : IDToRefs.second) {
386       Ref R;
387       R.Location.Start.setLine(Range.start.line);
388       R.Location.Start.setColumn(Range.start.character);
389       R.Location.End.setLine(Range.end.line);
390       R.Location.End.setColumn(Range.end.character);
391       R.Location.FileURI = MainFileURI.c_str();
392       // FIXME: Add correct RefKind information to MainFileMacros.
393       R.Kind = RefKind::Reference;
394       Refs.insert(IDToRefs.first, R);
395     }
396   }
397 }
398 
handleMacroOccurrence(const IdentifierInfo * Name,const MacroInfo * MI,index::SymbolRoleSet Roles,SourceLocation Loc)399 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
400                                             const MacroInfo *MI,
401                                             index::SymbolRoleSet Roles,
402                                             SourceLocation Loc) {
403   assert(PP.get());
404   // Builtin macros don't have useful locations and aren't needed in completion.
405   if (MI->isBuiltinMacro())
406     return true;
407 
408   const auto &SM = PP->getSourceManager();
409   auto DefLoc = MI->getDefinitionLoc();
410   // Also avoid storing predefined macros like __DBL_MIN__.
411   if (SM.isWrittenInBuiltinFile(DefLoc))
412     return true;
413 
414   auto ID = getSymbolID(Name->getName(), MI, SM);
415   if (!ID)
416     return true;
417 
418   auto SpellingLoc = SM.getSpellingLoc(Loc);
419   bool IsMainFileOnly =
420       SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
421       !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
422                     ASTCtx->getLangOpts());
423   // Do not store references to main-file macros.
424   if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
425       (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
426     // FIXME: Populate container information for macro references.
427     MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr});
428 
429   // Collect symbols.
430   if (!Opts.CollectMacro)
431     return true;
432 
433   // Skip main-file macros if we are not collecting them.
434   if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
435     return false;
436 
437   // Mark the macro as referenced if this is a reference coming from the main
438   // file. The macro may not be an interesting symbol, but it's cheaper to check
439   // at the end.
440   if (Opts.CountReferences &&
441       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
442       SM.getFileID(SpellingLoc) == SM.getMainFileID())
443     ReferencedMacros.insert(Name);
444 
445   // Don't continue indexing if this is a mere reference.
446   // FIXME: remove macro with ID if it is undefined.
447   if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
448         Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
449     return true;
450 
451   // Only collect one instance in case there are multiple.
452   if (Symbols.find(ID) != nullptr)
453     return true;
454 
455   Symbol S;
456   S.ID = std::move(ID);
457   S.Name = Name->getName();
458   if (!IsMainFileOnly) {
459     S.Flags |= Symbol::IndexedForCodeCompletion;
460     S.Flags |= Symbol::VisibleOutsideFile;
461   }
462   S.SymInfo = index::getSymbolInfoForMacro(*MI);
463   S.Origin = Opts.Origin;
464   std::string FileURI;
465   // FIXME: use the result to filter out symbols.
466   shouldIndexFile(SM.getFileID(Loc));
467   if (auto DeclLoc =
468           getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
469     S.CanonicalDeclaration = *DeclLoc;
470 
471   CodeCompletionResult SymbolCompletion(Name);
472   const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
473       *PP, *CompletionAllocator, *CompletionTUInfo);
474   std::string Signature;
475   std::string SnippetSuffix;
476   getSignature(*CCS, &Signature, &SnippetSuffix);
477   S.Signature = Signature;
478   S.CompletionSnippetSuffix = SnippetSuffix;
479 
480   IndexedMacros.insert(Name);
481   setIncludeLocation(S, DefLoc);
482   Symbols.insert(S);
483   return true;
484 }
485 
processRelations(const NamedDecl & ND,const SymbolID & ID,ArrayRef<index::SymbolRelation> Relations)486 void SymbolCollector::processRelations(
487     const NamedDecl &ND, const SymbolID &ID,
488     ArrayRef<index::SymbolRelation> Relations) {
489   // Store subtype relations.
490   if (!dyn_cast<TagDecl>(&ND))
491     return;
492 
493   for (const auto &R : Relations) {
494     if (!shouldIndexRelation(R))
495       continue;
496 
497     const Decl *Object = R.RelatedSymbol;
498 
499     auto ObjectID = getSymbolID(Object);
500     if (!ObjectID)
501       continue;
502 
503     // Record the relation.
504     // TODO: There may be cases where the object decl is not indexed for some
505     // reason. Those cases should probably be removed in due course, but for
506     // now there are two possible ways to handle it:
507     //   (A) Avoid storing the relation in such cases.
508     //   (B) Store it anyways. Clients will likely lookup() the SymbolID
509     //       in the index and find nothing, but that's a situation they
510     //       probably need to handle for other reasons anyways.
511     // We currently do (B) because it's simpler.
512     this->Relations.insert(Relation{ID, RelationKind::BaseOf, ObjectID});
513   }
514 }
515 
setIncludeLocation(const Symbol & S,SourceLocation Loc)516 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
517   if (Opts.CollectIncludePath)
518     if (shouldCollectIncludePath(S.SymInfo.Kind))
519       // Use the expansion location to get the #include header since this is
520       // where the symbol is exposed.
521       IncludeFiles[S.ID] =
522           PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
523 }
524 
finish()525 void SymbolCollector::finish() {
526   // At the end of the TU, add 1 to the refcount of all referenced symbols.
527   auto IncRef = [this](const SymbolID &ID) {
528     if (const auto *S = Symbols.find(ID)) {
529       Symbol Inc = *S;
530       ++Inc.References;
531       Symbols.insert(Inc);
532     }
533   };
534   for (const NamedDecl *ND : ReferencedDecls) {
535     if (auto ID = getSymbolID(ND)) {
536       IncRef(ID);
537     }
538   }
539   if (Opts.CollectMacro) {
540     assert(PP);
541     // First, drop header guards. We can't identify these until EOF.
542     for (const IdentifierInfo *II : IndexedMacros) {
543       if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
544         if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
545           if (MI->isUsedForHeaderGuard())
546             Symbols.erase(ID);
547     }
548     // Now increment refcounts.
549     for (const IdentifierInfo *II : ReferencedMacros) {
550       if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
551         if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
552           IncRef(ID);
553     }
554   }
555   // Fill in IncludeHeaders.
556   // We delay this until end of TU so header guards are all resolved.
557   // Symbols in slabs aren't mutable, so insert() has to walk all the strings
558   // :-(
559   for (const auto &Entry : IncludeFiles)
560     if (const Symbol *S = Symbols.find(Entry.first)) {
561       if (auto Header = getIncludeHeader(*S, Entry.second)) {
562         Symbol NewSym = *S;
563         NewSym.IncludeHeaders.push_back({std::move(*Header), 1});
564         Symbols.insert(NewSym);
565       }
566     }
567 
568   const auto &SM = ASTCtx->getSourceManager();
569   llvm::DenseMap<FileID, std::string> URICache;
570   auto GetURI = [&](FileID FID) -> llvm::Optional<std::string> {
571     auto Found = URICache.find(FID);
572     if (Found == URICache.end()) {
573       if (auto *FileEntry = SM.getFileEntryForID(FID)) {
574         auto FileURI = toURI(SM, FileEntry->getName(), Opts);
575         Found = URICache.insert({FID, FileURI}).first;
576       } else {
577         // Ignore cases where we can not find a corresponding file entry for
578         // given location, e.g. symbols formed via macro concatenation.
579         return None;
580       }
581     }
582     return Found->second;
583   };
584   auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
585                         bool Spelled = false) {
586     auto FileID = SM.getFileID(LocAndRole.Loc);
587     // FIXME: use the result to filter out references.
588     shouldIndexFile(FileID);
589     if (auto FileURI = GetURI(FileID)) {
590       auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
591       Ref R;
592       R.Location.Start = Range.first;
593       R.Location.End = Range.second;
594       R.Location.FileURI = FileURI->c_str();
595       R.Kind = toRefKind(LocAndRole.Roles, Spelled);
596       R.Container = getSymbolID(LocAndRole.Container);
597       Refs.insert(ID, R);
598     }
599   };
600   // Populate Refs slab from MacroRefs.
601   // FIXME: All MacroRefs are marked as Spelled now, but this should be checked.
602   for (const auto &IDAndRefs : MacroRefs)
603     for (const auto &LocAndRole : IDAndRefs.second)
604       CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true);
605   // Populate Refs slab from DeclRefs.
606   llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
607   for (auto &DeclAndRef : DeclRefs) {
608     if (auto ID = getSymbolID(DeclAndRef.first)) {
609       for (auto &LocAndRole : DeclAndRef.second) {
610         const auto FileID = SM.getFileID(LocAndRole.Loc);
611         // FIXME: It's better to use TokenBuffer by passing spelled tokens from
612         // the caller of SymbolCollector.
613         if (!FilesToTokensCache.count(FileID))
614           FilesToTokensCache[FileID] =
615               syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
616         llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
617         // Check if the referenced symbol is spelled exactly the same way the
618         // corresponding NamedDecl is. If it is, mark this reference as spelled.
619         const auto *IdentifierToken =
620             spelledIdentifierTouching(LocAndRole.Loc, Tokens);
621         DeclarationName Name = DeclAndRef.first->getDeclName();
622         const auto NameKind = Name.getNameKind();
623         bool IsTargetKind = NameKind == DeclarationName::Identifier ||
624                             NameKind == DeclarationName::CXXConstructorName;
625         bool Spelled = IdentifierToken && IsTargetKind &&
626                        Name.getAsString() == IdentifierToken->text(SM);
627         CollectRef(ID, LocAndRole, Spelled);
628       }
629     }
630   }
631 
632   ReferencedDecls.clear();
633   ReferencedMacros.clear();
634   DeclRefs.clear();
635   FilesToIndexCache.clear();
636   HeaderIsSelfContainedCache.clear();
637   IncludeFiles.clear();
638 }
639 
addDeclaration(const NamedDecl & ND,SymbolID ID,bool IsMainFileOnly)640 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
641                                               bool IsMainFileOnly) {
642   auto &Ctx = ND.getASTContext();
643   auto &SM = Ctx.getSourceManager();
644 
645   Symbol S;
646   S.ID = std::move(ID);
647   std::string QName = printQualifiedName(ND);
648   // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
649   // for consistency with CodeCompletionString and a clean name/signature split.
650   std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
651   std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
652   S.TemplateSpecializationArgs = TemplateSpecializationArgs;
653 
654   // We collect main-file symbols, but do not use them for code completion.
655   if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
656     S.Flags |= Symbol::IndexedForCodeCompletion;
657   if (isImplementationDetail(&ND))
658     S.Flags |= Symbol::ImplementationDetail;
659   if (!IsMainFileOnly)
660     S.Flags |= Symbol::VisibleOutsideFile;
661   S.SymInfo = index::getSymbolInfo(&ND);
662   std::string FileURI;
663   auto Loc = nameLocation(ND, SM);
664   assert(Loc.isValid() && "Invalid source location for NamedDecl");
665   // FIXME: use the result to filter out symbols.
666   shouldIndexFile(SM.getFileID(Loc));
667   if (auto DeclLoc =
668           getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
669     S.CanonicalDeclaration = *DeclLoc;
670 
671   S.Origin = Opts.Origin;
672   if (ND.getAvailability() == AR_Deprecated)
673     S.Flags |= Symbol::Deprecated;
674 
675   // Add completion info.
676   // FIXME: we may want to choose a different redecl, or combine from several.
677   assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
678   // We use the primary template, as clang does during code completion.
679   CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
680   const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
681       *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
682       *CompletionTUInfo,
683       /*IncludeBriefComments*/ false);
684   std::string Documentation =
685       formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
686                                               /*CommentsFromHeaders=*/true));
687   if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
688     if (Opts.StoreAllDocumentation)
689       S.Documentation = Documentation;
690     Symbols.insert(S);
691     return Symbols.find(S.ID);
692   }
693   S.Documentation = Documentation;
694   std::string Signature;
695   std::string SnippetSuffix;
696   getSignature(*CCS, &Signature, &SnippetSuffix);
697   S.Signature = Signature;
698   S.CompletionSnippetSuffix = SnippetSuffix;
699   std::string ReturnType = getReturnType(*CCS);
700   S.ReturnType = ReturnType;
701 
702   llvm::Optional<OpaqueType> TypeStorage;
703   if (S.Flags & Symbol::IndexedForCodeCompletion) {
704     TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
705     if (TypeStorage)
706       S.Type = TypeStorage->raw();
707   }
708 
709   Symbols.insert(S);
710   setIncludeLocation(S, ND.getLocation());
711   return Symbols.find(S.ID);
712 }
713 
addDefinition(const NamedDecl & ND,const Symbol & DeclSym)714 void SymbolCollector::addDefinition(const NamedDecl &ND,
715                                     const Symbol &DeclSym) {
716   if (DeclSym.Definition)
717     return;
718   // If we saw some forward declaration, we end up copying the symbol.
719   // This is not ideal, but avoids duplicating the "is this a definition" check
720   // in clang::index. We should only see one definition.
721   Symbol S = DeclSym;
722   std::string FileURI;
723   const auto &SM = ND.getASTContext().getSourceManager();
724   auto Loc = nameLocation(ND, SM);
725   // FIXME: use the result to filter out symbols.
726   shouldIndexFile(SM.getFileID(Loc));
727   if (auto DefLoc =
728           getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
729     S.Definition = *DefLoc;
730   Symbols.insert(S);
731 }
732 
733 /// Gets a canonical include (URI of the header or <header> or "header") for
734 /// header of \p FID (which should usually be the *expansion* file).
735 /// Returns None if includes should not be inserted for this file.
getIncludeHeader(const Symbol & S,FileID FID)736 llvm::Optional<std::string> SymbolCollector::getIncludeHeader(const Symbol &S,
737                                                               FileID FID) {
738   const SourceManager &SM = ASTCtx->getSourceManager();
739   const FileEntry *FE = SM.getFileEntryForID(FID);
740   if (!FE || FE->getName().empty())
741     return llvm::None;
742   llvm::StringRef Filename = FE->getName();
743   // If a file is mapped by canonical headers, use that mapping, regardless
744   // of whether it's an otherwise-good header (header guards etc).
745   if (Opts.Includes) {
746     llvm::SmallString<256> QName = S.Scope;
747     QName.append(S.Name);
748     llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
749     // If we had a mapping, always use it.
750     if (Canonical.startswith("<") || Canonical.startswith("\"")) {
751       // Hack: there are two std::move() overloads from different headers.
752       // CanonicalIncludes returns the common one-arg one from <utility>.
753       if (Canonical == "<utility>" && S.Name == "move" &&
754           S.Signature.contains(','))
755         Canonical = "<algorithm>";
756       return Canonical.str();
757     }
758     if (Canonical != Filename)
759       return toURI(SM, Canonical, Opts);
760   }
761   if (!isSelfContainedHeader(FID)) {
762     // A .inc or .def file is often included into a real header to define
763     // symbols (e.g. LLVM tablegen files).
764     if (Filename.endswith(".inc") || Filename.endswith(".def"))
765       return getIncludeHeader(S, SM.getFileID(SM.getIncludeLoc(FID)));
766     // Conservatively refuse to insert #includes to files without guards.
767     return llvm::None;
768   }
769   // Standard case: just insert the file itself.
770   return toURI(SM, Filename, Opts);
771 }
772 
isSelfContainedHeader(FileID FID)773 bool SymbolCollector::isSelfContainedHeader(FileID FID) {
774   // The real computation (which will be memoized).
775   auto Compute = [&] {
776     const SourceManager &SM = ASTCtx->getSourceManager();
777     const FileEntry *FE = SM.getFileEntryForID(FID);
778     if (!FE)
779       return false;
780     // FIXME: Should files that have been #import'd be considered
781     // self-contained? That's really a property of the includer,
782     // not of the file.
783     if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
784         !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
785       return false;
786     // This pattern indicates that a header can't be used without
787     // particular preprocessor state, usually set up by another header.
788     if (isDontIncludeMeHeader(SM.getBufferData(FID)))
789       return false;
790     return true;
791   };
792 
793   auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
794   if (R.second)
795     R.first->second = Compute();
796   return R.first->second;
797 }
798 
799 // Is Line an #if or #ifdef directive?
isIf(llvm::StringRef Line)800 static bool isIf(llvm::StringRef Line) {
801   Line = Line.ltrim();
802   if (!Line.consume_front("#"))
803     return false;
804   Line = Line.ltrim();
805   return Line.startswith("if");
806 }
807 // Is Line an #error directive mentioning includes?
isErrorAboutInclude(llvm::StringRef Line)808 static bool isErrorAboutInclude(llvm::StringRef Line) {
809   Line = Line.ltrim();
810   if (!Line.consume_front("#"))
811     return false;
812   Line = Line.ltrim();
813   if (!Line.startswith("error"))
814     return false;
815   return Line.contains_lower("includ"); // Matches "include" or "including".
816 }
817 
isDontIncludeMeHeader(llvm::StringRef Content)818 bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
819   llvm::StringRef Line;
820   // Only sniff up to 100 lines or 10KB.
821   Content = Content.take_front(100 * 100);
822   for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
823     std::tie(Line, Content) = Content.split('\n');
824     if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
825       return true;
826   }
827   return false;
828 }
829 
shouldIndexFile(FileID FID)830 bool SymbolCollector::shouldIndexFile(FileID FID) {
831   if (!Opts.FileFilter)
832     return true;
833   auto I = FilesToIndexCache.try_emplace(FID);
834   if (I.second)
835     I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
836   return I.first->second;
837 }
838 
839 } // namespace clangd
840 } // namespace clang
841