1 //===--- CodeComplete.cpp ----------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Code completion has several moving parts:
10 // - AST-based completions are provided using the completion hooks in Sema.
11 // - external completions are retrieved from the index (using hints from Sema)
12 // - the two sources overlap, and must be merged and overloads bundled
13 // - results must be scored and ranked (see Quality.h) before rendering
14 //
15 // Signature help works in a similar way as code completion, but it is simpler:
16 // it's purely AST-based, and there are few candidates.
17 //
18 //===----------------------------------------------------------------------===//
19
20 #include "CodeComplete.h"
21 #include "AST.h"
22 #include "CodeCompletionStrings.h"
23 #include "Compiler.h"
24 #include "Diagnostics.h"
25 #include "ExpectedTypes.h"
26 #include "FileDistance.h"
27 #include "FuzzyMatch.h"
28 #include "Headers.h"
29 #include "Hover.h"
30 #include "Preamble.h"
31 #include "Protocol.h"
32 #include "Quality.h"
33 #include "SourceCode.h"
34 #include "TUScheduler.h"
35 #include "URI.h"
36 #include "index/Index.h"
37 #include "index/Symbol.h"
38 #include "index/SymbolOrigin.h"
39 #include "support/Logger.h"
40 #include "support/Threading.h"
41 #include "support/ThreadsafeFS.h"
42 #include "support/Trace.h"
43 #include "clang/AST/Decl.h"
44 #include "clang/AST/DeclBase.h"
45 #include "clang/Basic/CharInfo.h"
46 #include "clang/Basic/LangOptions.h"
47 #include "clang/Basic/SourceLocation.h"
48 #include "clang/Basic/TokenKinds.h"
49 #include "clang/Format/Format.h"
50 #include "clang/Frontend/CompilerInstance.h"
51 #include "clang/Frontend/FrontendActions.h"
52 #include "clang/Lex/ExternalPreprocessorSource.h"
53 #include "clang/Lex/Lexer.h"
54 #include "clang/Lex/Preprocessor.h"
55 #include "clang/Lex/PreprocessorOptions.h"
56 #include "clang/Sema/CodeCompleteConsumer.h"
57 #include "clang/Sema/DeclSpec.h"
58 #include "clang/Sema/Sema.h"
59 #include "llvm/ADT/ArrayRef.h"
60 #include "llvm/ADT/None.h"
61 #include "llvm/ADT/Optional.h"
62 #include "llvm/ADT/SmallVector.h"
63 #include "llvm/ADT/StringExtras.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/Debug.h"
67 #include "llvm/Support/Error.h"
68 #include "llvm/Support/Format.h"
69 #include "llvm/Support/FormatVariadic.h"
70 #include "llvm/Support/ScopedPrinter.h"
71 #include <algorithm>
72 #include <iterator>
73
74 // We log detailed candidate here if you run with -debug-only=codecomplete.
75 #define DEBUG_TYPE "CodeComplete"
76
77 namespace clang {
78 namespace clangd {
79 namespace {
80
toCompletionItemKind(index::SymbolKind Kind)81 CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
82 using SK = index::SymbolKind;
83 switch (Kind) {
84 case SK::Unknown:
85 return CompletionItemKind::Missing;
86 case SK::Module:
87 case SK::Namespace:
88 case SK::NamespaceAlias:
89 return CompletionItemKind::Module;
90 case SK::Macro:
91 return CompletionItemKind::Text;
92 case SK::Enum:
93 return CompletionItemKind::Enum;
94 case SK::Struct:
95 return CompletionItemKind::Struct;
96 case SK::Class:
97 case SK::Protocol:
98 case SK::Extension:
99 case SK::Union:
100 return CompletionItemKind::Class;
101 case SK::TypeAlias:
102 // We use the same kind as the VSCode C++ extension.
103 // FIXME: pick a better option when we have one.
104 return CompletionItemKind::Interface;
105 case SK::Using:
106 return CompletionItemKind::Reference;
107 case SK::Function:
108 case SK::ConversionFunction:
109 return CompletionItemKind::Function;
110 case SK::Variable:
111 case SK::Parameter:
112 case SK::NonTypeTemplateParm:
113 return CompletionItemKind::Variable;
114 case SK::Field:
115 return CompletionItemKind::Field;
116 case SK::EnumConstant:
117 return CompletionItemKind::EnumMember;
118 case SK::InstanceMethod:
119 case SK::ClassMethod:
120 case SK::StaticMethod:
121 case SK::Destructor:
122 return CompletionItemKind::Method;
123 case SK::InstanceProperty:
124 case SK::ClassProperty:
125 case SK::StaticProperty:
126 return CompletionItemKind::Property;
127 case SK::Constructor:
128 return CompletionItemKind::Constructor;
129 case SK::TemplateTypeParm:
130 case SK::TemplateTemplateParm:
131 return CompletionItemKind::TypeParameter;
132 }
133 llvm_unreachable("Unhandled clang::index::SymbolKind.");
134 }
135
136 CompletionItemKind
toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,const NamedDecl * Decl,CodeCompletionContext::Kind CtxKind)137 toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,
138 const NamedDecl *Decl,
139 CodeCompletionContext::Kind CtxKind) {
140 if (Decl)
141 return toCompletionItemKind(index::getSymbolInfo(Decl).Kind);
142 if (CtxKind == CodeCompletionContext::CCC_IncludedFile)
143 return CompletionItemKind::File;
144 switch (ResKind) {
145 case CodeCompletionResult::RK_Declaration:
146 llvm_unreachable("RK_Declaration without Decl");
147 case CodeCompletionResult::RK_Keyword:
148 return CompletionItemKind::Keyword;
149 case CodeCompletionResult::RK_Macro:
150 return CompletionItemKind::Text; // unfortunately, there's no 'Macro'
151 // completion items in LSP.
152 case CodeCompletionResult::RK_Pattern:
153 return CompletionItemKind::Snippet;
154 }
155 llvm_unreachable("Unhandled CodeCompletionResult::ResultKind.");
156 }
157
158 // Identifier code completion result.
159 struct RawIdentifier {
160 llvm::StringRef Name;
161 unsigned References; // # of usages in file.
162 };
163
164 /// A code completion result, in clang-native form.
165 /// It may be promoted to a CompletionItem if it's among the top-ranked results.
166 struct CompletionCandidate {
167 llvm::StringRef Name; // Used for filtering and sorting.
168 // We may have a result from Sema, from the index, or both.
169 const CodeCompletionResult *SemaResult = nullptr;
170 const Symbol *IndexResult = nullptr;
171 const RawIdentifier *IdentifierResult = nullptr;
172 llvm::SmallVector<llvm::StringRef, 1> RankedIncludeHeaders;
173
174 // Returns a token identifying the overload set this is part of.
175 // 0 indicates it's not part of any overload set.
overloadSetclang::clangd::__anon726c0d1d0111::CompletionCandidate176 size_t overloadSet(const CodeCompleteOptions &Opts) const {
177 if (!Opts.BundleOverloads.getValueOr(false))
178 return 0;
179 llvm::SmallString<256> Scratch;
180 if (IndexResult) {
181 switch (IndexResult->SymInfo.Kind) {
182 case index::SymbolKind::ClassMethod:
183 case index::SymbolKind::InstanceMethod:
184 case index::SymbolKind::StaticMethod:
185 #ifndef NDEBUG
186 llvm_unreachable("Don't expect members from index in code completion");
187 #else
188 LLVM_FALLTHROUGH;
189 #endif
190 case index::SymbolKind::Function:
191 // We can't group overloads together that need different #includes.
192 // This could break #include insertion.
193 return llvm::hash_combine(
194 (IndexResult->Scope + IndexResult->Name).toStringRef(Scratch),
195 headerToInsertIfAllowed(Opts).getValueOr(""));
196 default:
197 return 0;
198 }
199 }
200 if (SemaResult) {
201 // We need to make sure we're consistent with the IndexResult case!
202 const NamedDecl *D = SemaResult->Declaration;
203 if (!D || !D->isFunctionOrFunctionTemplate())
204 return 0;
205 {
206 llvm::raw_svector_ostream OS(Scratch);
207 D->printQualifiedName(OS);
208 }
209 return llvm::hash_combine(Scratch,
210 headerToInsertIfAllowed(Opts).getValueOr(""));
211 }
212 assert(IdentifierResult);
213 return 0;
214 }
215
216 // The best header to include if include insertion is allowed.
217 llvm::Optional<llvm::StringRef>
headerToInsertIfAllowedclang::clangd::__anon726c0d1d0111::CompletionCandidate218 headerToInsertIfAllowed(const CodeCompleteOptions &Opts) const {
219 if (Opts.InsertIncludes == CodeCompleteOptions::NeverInsert ||
220 RankedIncludeHeaders.empty())
221 return None;
222 if (SemaResult && SemaResult->Declaration) {
223 // Avoid inserting new #include if the declaration is found in the current
224 // file e.g. the symbol is forward declared.
225 auto &SM = SemaResult->Declaration->getASTContext().getSourceManager();
226 for (const Decl *RD : SemaResult->Declaration->redecls())
227 if (SM.isInMainFile(SM.getExpansionLoc(RD->getBeginLoc())))
228 return None;
229 }
230 return RankedIncludeHeaders[0];
231 }
232
233 using Bundle = llvm::SmallVector<CompletionCandidate, 4>;
234 };
235 using ScoredBundle =
236 std::pair<CompletionCandidate::Bundle, CodeCompletion::Scores>;
237 struct ScoredBundleGreater {
operator ()clang::clangd::__anon726c0d1d0111::ScoredBundleGreater238 bool operator()(const ScoredBundle &L, const ScoredBundle &R) {
239 if (L.second.Total != R.second.Total)
240 return L.second.Total > R.second.Total;
241 return L.first.front().Name <
242 R.first.front().Name; // Earlier name is better.
243 }
244 };
245
246 // Assembles a code completion out of a bundle of >=1 completion candidates.
247 // Many of the expensive strings are only computed at this point, once we know
248 // the candidate bundle is going to be returned.
249 //
250 // Many fields are the same for all candidates in a bundle (e.g. name), and are
251 // computed from the first candidate, in the constructor.
252 // Others vary per candidate, so add() must be called for remaining candidates.
253 struct CodeCompletionBuilder {
CodeCompletionBuilderclang::clangd::__anon726c0d1d0111::CodeCompletionBuilder254 CodeCompletionBuilder(ASTContext *ASTCtx, const CompletionCandidate &C,
255 CodeCompletionString *SemaCCS,
256 llvm::ArrayRef<std::string> QueryScopes,
257 const IncludeInserter &Includes,
258 llvm::StringRef FileName,
259 CodeCompletionContext::Kind ContextKind,
260 const CodeCompleteOptions &Opts,
261 bool IsUsingDeclaration, tok::TokenKind NextTokenKind)
262 : ASTCtx(ASTCtx), ExtractDocumentation(Opts.IncludeComments),
263 EnableFunctionArgSnippets(Opts.EnableFunctionArgSnippets),
264 IsUsingDeclaration(IsUsingDeclaration), NextTokenKind(NextTokenKind) {
265 add(C, SemaCCS);
266 if (C.SemaResult) {
267 assert(ASTCtx);
268 Completion.Origin |= SymbolOrigin::AST;
269 Completion.Name = std::string(llvm::StringRef(SemaCCS->getTypedText()));
270 if (Completion.Scope.empty()) {
271 if ((C.SemaResult->Kind == CodeCompletionResult::RK_Declaration) ||
272 (C.SemaResult->Kind == CodeCompletionResult::RK_Pattern))
273 if (const auto *D = C.SemaResult->getDeclaration())
274 if (const auto *ND = dyn_cast<NamedDecl>(D))
275 Completion.Scope = std::string(
276 splitQualifiedName(printQualifiedName(*ND)).first);
277 }
278 Completion.Kind = toCompletionItemKind(
279 C.SemaResult->Kind, C.SemaResult->Declaration, ContextKind);
280 // Sema could provide more info on whether the completion was a file or
281 // folder.
282 if (Completion.Kind == CompletionItemKind::File &&
283 Completion.Name.back() == '/')
284 Completion.Kind = CompletionItemKind::Folder;
285 for (const auto &FixIt : C.SemaResult->FixIts) {
286 Completion.FixIts.push_back(toTextEdit(
287 FixIt, ASTCtx->getSourceManager(), ASTCtx->getLangOpts()));
288 }
289 llvm::sort(Completion.FixIts, [](const TextEdit &X, const TextEdit &Y) {
290 return std::tie(X.range.start.line, X.range.start.character) <
291 std::tie(Y.range.start.line, Y.range.start.character);
292 });
293 Completion.Deprecated |=
294 (C.SemaResult->Availability == CXAvailability_Deprecated);
295 }
296 if (C.IndexResult) {
297 Completion.Origin |= C.IndexResult->Origin;
298 if (Completion.Scope.empty())
299 Completion.Scope = std::string(C.IndexResult->Scope);
300 if (Completion.Kind == CompletionItemKind::Missing)
301 Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind);
302 if (Completion.Name.empty())
303 Completion.Name = std::string(C.IndexResult->Name);
304 // If the completion was visible to Sema, no qualifier is needed. This
305 // avoids unneeded qualifiers in cases like with `using ns::X`.
306 if (Completion.RequiredQualifier.empty() && !C.SemaResult) {
307 llvm::StringRef ShortestQualifier = C.IndexResult->Scope;
308 for (llvm::StringRef Scope : QueryScopes) {
309 llvm::StringRef Qualifier = C.IndexResult->Scope;
310 if (Qualifier.consume_front(Scope) &&
311 Qualifier.size() < ShortestQualifier.size())
312 ShortestQualifier = Qualifier;
313 }
314 Completion.RequiredQualifier = std::string(ShortestQualifier);
315 }
316 Completion.Deprecated |= (C.IndexResult->Flags & Symbol::Deprecated);
317 }
318 if (C.IdentifierResult) {
319 Completion.Origin |= SymbolOrigin::Identifier;
320 Completion.Kind = CompletionItemKind::Text;
321 Completion.Name = std::string(C.IdentifierResult->Name);
322 }
323
324 // Turn absolute path into a literal string that can be #included.
325 auto Inserted = [&](llvm::StringRef Header)
326 -> llvm::Expected<std::pair<std::string, bool>> {
327 auto ResolvedDeclaring =
328 URI::resolve(C.IndexResult->CanonicalDeclaration.FileURI, FileName);
329 if (!ResolvedDeclaring)
330 return ResolvedDeclaring.takeError();
331 auto ResolvedInserted = toHeaderFile(Header, FileName);
332 if (!ResolvedInserted)
333 return ResolvedInserted.takeError();
334 auto Spelled = Includes.calculateIncludePath(*ResolvedInserted, FileName);
335 if (!Spelled)
336 return llvm::createStringError(llvm::inconvertibleErrorCode(),
337 "Header not on include path");
338 return std::make_pair(
339 std::move(*Spelled),
340 Includes.shouldInsertInclude(*ResolvedDeclaring, *ResolvedInserted));
341 };
342 bool ShouldInsert = C.headerToInsertIfAllowed(Opts).hasValue();
343 // Calculate include paths and edits for all possible headers.
344 for (const auto &Inc : C.RankedIncludeHeaders) {
345 if (auto ToInclude = Inserted(Inc)) {
346 CodeCompletion::IncludeCandidate Include;
347 Include.Header = ToInclude->first;
348 if (ToInclude->second && ShouldInsert)
349 Include.Insertion = Includes.insert(ToInclude->first);
350 Completion.Includes.push_back(std::move(Include));
351 } else
352 log("Failed to generate include insertion edits for adding header "
353 "(FileURI='{0}', IncludeHeader='{1}') into {2}: {3}",
354 C.IndexResult->CanonicalDeclaration.FileURI, Inc, FileName,
355 ToInclude.takeError());
356 }
357 // Prefer includes that do not need edits (i.e. already exist).
358 std::stable_partition(Completion.Includes.begin(),
359 Completion.Includes.end(),
360 [](const CodeCompletion::IncludeCandidate &I) {
361 return !I.Insertion.hasValue();
362 });
363 }
364
addclang::clangd::__anon726c0d1d0111::CodeCompletionBuilder365 void add(const CompletionCandidate &C, CodeCompletionString *SemaCCS) {
366 assert(bool(C.SemaResult) == bool(SemaCCS));
367 Bundled.emplace_back();
368 BundledEntry &S = Bundled.back();
369 if (C.SemaResult) {
370 bool IsPattern = C.SemaResult->Kind == CodeCompletionResult::RK_Pattern;
371 getSignature(*SemaCCS, &S.Signature, &S.SnippetSuffix,
372 &Completion.RequiredQualifier, IsPattern);
373 S.ReturnType = getReturnType(*SemaCCS);
374 } else if (C.IndexResult) {
375 S.Signature = std::string(C.IndexResult->Signature);
376 S.SnippetSuffix = std::string(C.IndexResult->CompletionSnippetSuffix);
377 S.ReturnType = std::string(C.IndexResult->ReturnType);
378 }
379 if (ExtractDocumentation && !Completion.Documentation) {
380 auto SetDoc = [&](llvm::StringRef Doc) {
381 if (!Doc.empty()) {
382 Completion.Documentation.emplace();
383 parseDocumentation(Doc, *Completion.Documentation);
384 }
385 };
386 if (C.IndexResult) {
387 SetDoc(C.IndexResult->Documentation);
388 } else if (C.SemaResult) {
389 SetDoc(getDocComment(*ASTCtx, *C.SemaResult,
390 /*CommentsFromHeader=*/false));
391 }
392 }
393 }
394
buildclang::clangd::__anon726c0d1d0111::CodeCompletionBuilder395 CodeCompletion build() {
396 Completion.ReturnType = summarizeReturnType();
397 Completion.Signature = summarizeSignature();
398 Completion.SnippetSuffix = summarizeSnippet();
399 Completion.BundleSize = Bundled.size();
400 return std::move(Completion);
401 }
402
403 private:
404 struct BundledEntry {
405 std::string SnippetSuffix;
406 std::string Signature;
407 std::string ReturnType;
408 };
409
410 // If all BundledEntries have the same value for a property, return it.
411 template <std::string BundledEntry::*Member>
onlyValueclang::clangd::__anon726c0d1d0111::CodeCompletionBuilder412 const std::string *onlyValue() const {
413 auto B = Bundled.begin(), E = Bundled.end();
414 for (auto I = B + 1; I != E; ++I)
415 if (I->*Member != B->*Member)
416 return nullptr;
417 return &(B->*Member);
418 }
419
onlyValueclang::clangd::__anon726c0d1d0111::CodeCompletionBuilder420 template <bool BundledEntry::*Member> const bool *onlyValue() const {
421 auto B = Bundled.begin(), E = Bundled.end();
422 for (auto I = B + 1; I != E; ++I)
423 if (I->*Member != B->*Member)
424 return nullptr;
425 return &(B->*Member);
426 }
427
summarizeReturnTypeclang::clangd::__anon726c0d1d0111::CodeCompletionBuilder428 std::string summarizeReturnType() const {
429 if (auto *RT = onlyValue<&BundledEntry::ReturnType>())
430 return *RT;
431 return "";
432 }
433
summarizeSnippetclang::clangd::__anon726c0d1d0111::CodeCompletionBuilder434 std::string summarizeSnippet() const {
435 if (IsUsingDeclaration)
436 return "";
437 // Suppress function argument snippets if args are already present.
438 if ((Completion.Kind == CompletionItemKind::Function ||
439 Completion.Kind == CompletionItemKind::Method ||
440 Completion.Kind == CompletionItemKind::Constructor) &&
441 NextTokenKind == tok::l_paren)
442 return "";
443 auto *Snippet = onlyValue<&BundledEntry::SnippetSuffix>();
444 if (!Snippet)
445 // All bundles are function calls.
446 // FIXME(ibiryukov): sometimes add template arguments to a snippet, e.g.
447 // we need to complete 'forward<$1>($0)'.
448 return "($0)";
449 if (EnableFunctionArgSnippets)
450 return *Snippet;
451
452 // Replace argument snippets with a simplified pattern.
453 if (Snippet->empty())
454 return "";
455 if (Completion.Kind == CompletionItemKind::Function ||
456 Completion.Kind == CompletionItemKind::Method) {
457 // Functions snippets can be of 2 types:
458 // - containing only function arguments, e.g.
459 // foo(${1:int p1}, ${2:int p2});
460 // We transform this pattern to '($0)' or '()'.
461 // - template arguments and function arguments, e.g.
462 // foo<${1:class}>(${2:int p1}).
463 // We transform this pattern to '<$1>()$0' or '<$0>()'.
464
465 bool EmptyArgs = llvm::StringRef(*Snippet).endswith("()");
466 if (Snippet->front() == '<')
467 return EmptyArgs ? "<$1>()$0" : "<$1>($0)";
468 if (Snippet->front() == '(')
469 return EmptyArgs ? "()" : "($0)";
470 return *Snippet; // Not an arg snippet?
471 }
472 // 'CompletionItemKind::Interface' matches template type aliases.
473 if (Completion.Kind == CompletionItemKind::Interface ||
474 Completion.Kind == CompletionItemKind::Class) {
475 if (Snippet->front() != '<')
476 return *Snippet; // Not an arg snippet?
477
478 // Classes and template using aliases can only have template arguments,
479 // e.g. Foo<${1:class}>.
480 if (llvm::StringRef(*Snippet).endswith("<>"))
481 return "<>"; // can happen with defaulted template arguments.
482 return "<$0>";
483 }
484 return *Snippet;
485 }
486
summarizeSignatureclang::clangd::__anon726c0d1d0111::CodeCompletionBuilder487 std::string summarizeSignature() const {
488 if (auto *Signature = onlyValue<&BundledEntry::Signature>())
489 return *Signature;
490 // All bundles are function calls.
491 return "(…)";
492 }
493
494 // ASTCtx can be nullptr if not run with sema.
495 ASTContext *ASTCtx;
496 CodeCompletion Completion;
497 llvm::SmallVector<BundledEntry, 1> Bundled;
498 bool ExtractDocumentation;
499 bool EnableFunctionArgSnippets;
500 // No snippets will be generated for using declarations and when the function
501 // arguments are already present.
502 bool IsUsingDeclaration;
503 tok::TokenKind NextTokenKind;
504 };
505
506 // Determine the symbol ID for a Sema code completion result, if possible.
getSymbolID(const CodeCompletionResult & R,const SourceManager & SM)507 llvm::Optional<SymbolID> getSymbolID(const CodeCompletionResult &R,
508 const SourceManager &SM) {
509 switch (R.Kind) {
510 case CodeCompletionResult::RK_Declaration:
511 case CodeCompletionResult::RK_Pattern: {
512 // Computing USR caches linkage, which may change after code completion.
513 if (hasUnstableLinkage(R.Declaration))
514 return llvm::None;
515 return clang::clangd::getSymbolID(R.Declaration);
516 }
517 case CodeCompletionResult::RK_Macro:
518 return clang::clangd::getSymbolID(R.Macro->getName(), R.MacroDefInfo, SM);
519 case CodeCompletionResult::RK_Keyword:
520 return None;
521 }
522 llvm_unreachable("unknown CodeCompletionResult kind");
523 }
524
525 // Scopes of the partial identifier we're trying to complete.
526 // It is used when we query the index for more completion results.
527 struct SpecifiedScope {
528 // The scopes we should look in, determined by Sema.
529 //
530 // If the qualifier was fully resolved, we look for completions in these
531 // scopes; if there is an unresolved part of the qualifier, it should be
532 // resolved within these scopes.
533 //
534 // Examples of qualified completion:
535 //
536 // "::vec" => {""}
537 // "using namespace std; ::vec^" => {"", "std::"}
538 // "namespace ns {using namespace std;} ns::^" => {"ns::", "std::"}
539 // "std::vec^" => {""} // "std" unresolved
540 //
541 // Examples of unqualified completion:
542 //
543 // "vec^" => {""}
544 // "using namespace std; vec^" => {"", "std::"}
545 // "using namespace std; namespace ns { vec^ }" => {"ns::", "std::", ""}
546 //
547 // "" for global namespace, "ns::" for normal namespace.
548 std::vector<std::string> AccessibleScopes;
549 // The full scope qualifier as typed by the user (without the leading "::").
550 // Set if the qualifier is not fully resolved by Sema.
551 llvm::Optional<std::string> UnresolvedQualifier;
552
553 // Construct scopes being queried in indexes. The results are deduplicated.
554 // This method format the scopes to match the index request representation.
scopesForIndexQueryclang::clangd::__anon726c0d1d0111::SpecifiedScope555 std::vector<std::string> scopesForIndexQuery() {
556 std::set<std::string> Results;
557 for (llvm::StringRef AS : AccessibleScopes)
558 Results.insert(
559 (AS + (UnresolvedQualifier ? *UnresolvedQualifier : "")).str());
560 return {Results.begin(), Results.end()};
561 }
562 };
563
564 // Get all scopes that will be queried in indexes and whether symbols from
565 // any scope is allowed. The first scope in the list is the preferred scope
566 // (e.g. enclosing namespace).
567 std::pair<std::vector<std::string>, bool>
getQueryScopes(CodeCompletionContext & CCContext,const Sema & CCSema,const CompletionPrefix & HeuristicPrefix,const CodeCompleteOptions & Opts)568 getQueryScopes(CodeCompletionContext &CCContext, const Sema &CCSema,
569 const CompletionPrefix &HeuristicPrefix,
570 const CodeCompleteOptions &Opts) {
571 SpecifiedScope Scopes;
572 for (auto *Context : CCContext.getVisitedContexts()) {
573 if (isa<TranslationUnitDecl>(Context))
574 Scopes.AccessibleScopes.push_back(""); // global namespace
575 else if (isa<NamespaceDecl>(Context))
576 Scopes.AccessibleScopes.push_back(printNamespaceScope(*Context));
577 }
578
579 const CXXScopeSpec *SemaSpecifier =
580 CCContext.getCXXScopeSpecifier().getValueOr(nullptr);
581 // Case 1: unqualified completion.
582 if (!SemaSpecifier) {
583 // Case 2 (exception): sema saw no qualifier, but there appears to be one!
584 // This can happen e.g. in incomplete macro expansions. Use heuristics.
585 if (!HeuristicPrefix.Qualifier.empty()) {
586 vlog("Sema said no scope specifier, but we saw {0} in the source code",
587 HeuristicPrefix.Qualifier);
588 StringRef SpelledSpecifier = HeuristicPrefix.Qualifier;
589 if (SpelledSpecifier.consume_front("::"))
590 Scopes.AccessibleScopes = {""};
591 Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
592 return {Scopes.scopesForIndexQuery(), false};
593 }
594 // The enclosing namespace must be first, it gets a quality boost.
595 std::vector<std::string> EnclosingAtFront;
596 std::string EnclosingScope = printNamespaceScope(*CCSema.CurContext);
597 EnclosingAtFront.push_back(EnclosingScope);
598 for (auto &S : Scopes.scopesForIndexQuery()) {
599 if (EnclosingScope != S)
600 EnclosingAtFront.push_back(std::move(S));
601 }
602 // Allow AllScopes completion as there is no explicit scope qualifier.
603 return {EnclosingAtFront, Opts.AllScopes};
604 }
605 // Case 3: sema saw and resolved a scope qualifier.
606 if (SemaSpecifier && SemaSpecifier->isValid())
607 return {Scopes.scopesForIndexQuery(), false};
608
609 // Case 4: There was a qualifier, and Sema didn't resolve it.
610 Scopes.AccessibleScopes.push_back(""); // Make sure global scope is included.
611 llvm::StringRef SpelledSpecifier = Lexer::getSourceText(
612 CharSourceRange::getCharRange(SemaSpecifier->getRange()),
613 CCSema.SourceMgr, clang::LangOptions());
614 if (SpelledSpecifier.consume_front("::"))
615 Scopes.AccessibleScopes = {""};
616 Scopes.UnresolvedQualifier = std::string(SpelledSpecifier);
617 // Sema excludes the trailing "::".
618 if (!Scopes.UnresolvedQualifier->empty())
619 *Scopes.UnresolvedQualifier += "::";
620
621 return {Scopes.scopesForIndexQuery(), false};
622 }
623
624 // Should we perform index-based completion in a context of the specified kind?
625 // FIXME: consider allowing completion, but restricting the result types.
contextAllowsIndex(enum CodeCompletionContext::Kind K)626 bool contextAllowsIndex(enum CodeCompletionContext::Kind K) {
627 switch (K) {
628 case CodeCompletionContext::CCC_TopLevel:
629 case CodeCompletionContext::CCC_ObjCInterface:
630 case CodeCompletionContext::CCC_ObjCImplementation:
631 case CodeCompletionContext::CCC_ObjCIvarList:
632 case CodeCompletionContext::CCC_ClassStructUnion:
633 case CodeCompletionContext::CCC_Statement:
634 case CodeCompletionContext::CCC_Expression:
635 case CodeCompletionContext::CCC_ObjCMessageReceiver:
636 case CodeCompletionContext::CCC_EnumTag:
637 case CodeCompletionContext::CCC_UnionTag:
638 case CodeCompletionContext::CCC_ClassOrStructTag:
639 case CodeCompletionContext::CCC_ObjCProtocolName:
640 case CodeCompletionContext::CCC_Namespace:
641 case CodeCompletionContext::CCC_Type:
642 case CodeCompletionContext::CCC_ParenthesizedExpression:
643 case CodeCompletionContext::CCC_ObjCInterfaceName:
644 case CodeCompletionContext::CCC_ObjCCategoryName:
645 case CodeCompletionContext::CCC_Symbol:
646 case CodeCompletionContext::CCC_SymbolOrNewName:
647 return true;
648 case CodeCompletionContext::CCC_OtherWithMacros:
649 case CodeCompletionContext::CCC_DotMemberAccess:
650 case CodeCompletionContext::CCC_ArrowMemberAccess:
651 case CodeCompletionContext::CCC_ObjCPropertyAccess:
652 case CodeCompletionContext::CCC_MacroName:
653 case CodeCompletionContext::CCC_MacroNameUse:
654 case CodeCompletionContext::CCC_PreprocessorExpression:
655 case CodeCompletionContext::CCC_PreprocessorDirective:
656 case CodeCompletionContext::CCC_SelectorName:
657 case CodeCompletionContext::CCC_TypeQualifiers:
658 case CodeCompletionContext::CCC_ObjCInstanceMessage:
659 case CodeCompletionContext::CCC_ObjCClassMessage:
660 case CodeCompletionContext::CCC_IncludedFile:
661 // FIXME: Provide identifier based completions for the following contexts:
662 case CodeCompletionContext::CCC_Other: // Be conservative.
663 case CodeCompletionContext::CCC_NaturalLanguage:
664 case CodeCompletionContext::CCC_Recovery:
665 case CodeCompletionContext::CCC_NewName:
666 return false;
667 }
668 llvm_unreachable("unknown code completion context");
669 }
670
isInjectedClass(const NamedDecl & D)671 static bool isInjectedClass(const NamedDecl &D) {
672 if (auto *R = dyn_cast_or_null<RecordDecl>(&D))
673 if (R->isInjectedClassName())
674 return true;
675 return false;
676 }
677
678 // Some member calls are excluded because they're so rarely useful.
isExcludedMember(const NamedDecl & D)679 static bool isExcludedMember(const NamedDecl &D) {
680 // Destructor completion is rarely useful, and works inconsistently.
681 // (s.^ completes ~string, but s.~st^ is an error).
682 if (D.getKind() == Decl::CXXDestructor)
683 return true;
684 // Injected name may be useful for A::foo(), but who writes A::A::foo()?
685 if (isInjectedClass(D))
686 return true;
687 // Explicit calls to operators are also rare.
688 auto NameKind = D.getDeclName().getNameKind();
689 if (NameKind == DeclarationName::CXXOperatorName ||
690 NameKind == DeclarationName::CXXLiteralOperatorName ||
691 NameKind == DeclarationName::CXXConversionFunctionName)
692 return true;
693 return false;
694 }
695
696 // The CompletionRecorder captures Sema code-complete output, including context.
697 // It filters out ignored results (but doesn't apply fuzzy-filtering yet).
698 // It doesn't do scoring or conversion to CompletionItem yet, as we want to
699 // merge with index results first.
700 // Generally the fields and methods of this object should only be used from
701 // within the callback.
702 struct CompletionRecorder : public CodeCompleteConsumer {
CompletionRecorderclang::clangd::__anon726c0d1d0111::CompletionRecorder703 CompletionRecorder(const CodeCompleteOptions &Opts,
704 llvm::unique_function<void()> ResultsCallback)
705 : CodeCompleteConsumer(Opts.getClangCompleteOpts()),
706 CCContext(CodeCompletionContext::CCC_Other), Opts(Opts),
707 CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
708 CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) {
709 assert(this->ResultsCallback);
710 }
711
712 std::vector<CodeCompletionResult> Results;
713 CodeCompletionContext CCContext;
714 Sema *CCSema = nullptr; // Sema that created the results.
715 // FIXME: Sema is scary. Can we store ASTContext and Preprocessor, instead?
716
ProcessCodeCompleteResultsclang::clangd::__anon726c0d1d0111::CompletionRecorder717 void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context,
718 CodeCompletionResult *InResults,
719 unsigned NumResults) override final {
720 // Results from recovery mode are generally useless, and the callback after
721 // recovery (if any) is usually more interesting. To make sure we handle the
722 // future callback from sema, we just ignore all callbacks in recovery mode,
723 // as taking only results from recovery mode results in poor completion
724 // results.
725 // FIXME: in case there is no future sema completion callback after the
726 // recovery mode, we might still want to provide some results (e.g. trivial
727 // identifier-based completion).
728 if (Context.getKind() == CodeCompletionContext::CCC_Recovery) {
729 log("Code complete: Ignoring sema code complete callback with Recovery "
730 "context.");
731 return;
732 }
733 // If a callback is called without any sema result and the context does not
734 // support index-based completion, we simply skip it to give way to
735 // potential future callbacks with results.
736 if (NumResults == 0 && !contextAllowsIndex(Context.getKind()))
737 return;
738 if (CCSema) {
739 log("Multiple code complete callbacks (parser backtracked?). "
740 "Dropping results from context {0}, keeping results from {1}.",
741 getCompletionKindString(Context.getKind()),
742 getCompletionKindString(this->CCContext.getKind()));
743 return;
744 }
745 // Record the completion context.
746 CCSema = &S;
747 CCContext = Context;
748
749 // Retain the results we might want.
750 for (unsigned I = 0; I < NumResults; ++I) {
751 auto &Result = InResults[I];
752 // Class members that are shadowed by subclasses are usually noise.
753 if (Result.Hidden && Result.Declaration &&
754 Result.Declaration->isCXXClassMember())
755 continue;
756 if (!Opts.IncludeIneligibleResults &&
757 (Result.Availability == CXAvailability_NotAvailable ||
758 Result.Availability == CXAvailability_NotAccessible))
759 continue;
760 if (Result.Declaration &&
761 !Context.getBaseType().isNull() // is this a member-access context?
762 && isExcludedMember(*Result.Declaration))
763 continue;
764 // Skip injected class name when no class scope is not explicitly set.
765 // E.g. show injected A::A in `using A::A^` but not in "A^".
766 if (Result.Declaration && !Context.getCXXScopeSpecifier().hasValue() &&
767 isInjectedClass(*Result.Declaration))
768 continue;
769 // We choose to never append '::' to completion results in clangd.
770 Result.StartsNestedNameSpecifier = false;
771 Results.push_back(Result);
772 }
773 ResultsCallback();
774 }
775
getAllocatorclang::clangd::__anon726c0d1d0111::CompletionRecorder776 CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
getCodeCompletionTUInfoclang::clangd::__anon726c0d1d0111::CompletionRecorder777 CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
778
779 // Returns the filtering/sorting name for Result, which must be from Results.
780 // Returned string is owned by this recorder (or the AST).
getNameclang::clangd::__anon726c0d1d0111::CompletionRecorder781 llvm::StringRef getName(const CodeCompletionResult &Result) {
782 switch (Result.Kind) {
783 case CodeCompletionResult::RK_Declaration:
784 if (auto *ID = Result.Declaration->getIdentifier())
785 return ID->getName();
786 break;
787 case CodeCompletionResult::RK_Keyword:
788 return Result.Keyword;
789 case CodeCompletionResult::RK_Macro:
790 return Result.Macro->getName();
791 case CodeCompletionResult::RK_Pattern:
792 return Result.Pattern->getTypedText();
793 }
794 auto *CCS = codeCompletionString(Result);
795 return CCS->getTypedText();
796 }
797
798 // Build a CodeCompletion string for R, which must be from Results.
799 // The CCS will be owned by this recorder.
codeCompletionStringclang::clangd::__anon726c0d1d0111::CompletionRecorder800 CodeCompletionString *codeCompletionString(const CodeCompletionResult &R) {
801 // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
802 return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
803 *CCSema, CCContext, *CCAllocator, CCTUInfo,
804 /*IncludeBriefComments=*/false);
805 }
806
807 private:
808 CodeCompleteOptions Opts;
809 std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
810 CodeCompletionTUInfo CCTUInfo;
811 llvm::unique_function<void()> ResultsCallback;
812 };
813
814 struct ScoredSignature {
815 // When set, requires documentation to be requested from the index with this
816 // ID.
817 llvm::Optional<SymbolID> IDForDoc;
818 SignatureInformation Signature;
819 SignatureQualitySignals Quality;
820 };
821
822 class SignatureHelpCollector final : public CodeCompleteConsumer {
823 public:
SignatureHelpCollector(const clang::CodeCompleteOptions & CodeCompleteOpts,const SymbolIndex * Index,SignatureHelp & SigHelp)824 SignatureHelpCollector(const clang::CodeCompleteOptions &CodeCompleteOpts,
825 const SymbolIndex *Index, SignatureHelp &SigHelp)
826 : CodeCompleteConsumer(CodeCompleteOpts), SigHelp(SigHelp),
827 Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()),
828 CCTUInfo(Allocator), Index(Index) {}
829
ProcessOverloadCandidates(Sema & S,unsigned CurrentArg,OverloadCandidate * Candidates,unsigned NumCandidates,SourceLocation OpenParLoc)830 void ProcessOverloadCandidates(Sema &S, unsigned CurrentArg,
831 OverloadCandidate *Candidates,
832 unsigned NumCandidates,
833 SourceLocation OpenParLoc) override {
834 assert(!OpenParLoc.isInvalid());
835 SourceManager &SrcMgr = S.getSourceManager();
836 OpenParLoc = SrcMgr.getFileLoc(OpenParLoc);
837 if (SrcMgr.isInMainFile(OpenParLoc))
838 SigHelp.argListStart = sourceLocToPosition(SrcMgr, OpenParLoc);
839 else
840 elog("Location oustide main file in signature help: {0}",
841 OpenParLoc.printToString(SrcMgr));
842
843 std::vector<ScoredSignature> ScoredSignatures;
844 SigHelp.signatures.reserve(NumCandidates);
845 ScoredSignatures.reserve(NumCandidates);
846 // FIXME(rwols): How can we determine the "active overload candidate"?
847 // Right now the overloaded candidates seem to be provided in a "best fit"
848 // order, so I'm not too worried about this.
849 SigHelp.activeSignature = 0;
850 assert(CurrentArg <= (unsigned)std::numeric_limits<int>::max() &&
851 "too many arguments");
852 SigHelp.activeParameter = static_cast<int>(CurrentArg);
853 for (unsigned I = 0; I < NumCandidates; ++I) {
854 OverloadCandidate Candidate = Candidates[I];
855 // We want to avoid showing instantiated signatures, because they may be
856 // long in some cases (e.g. when 'T' is substituted with 'std::string', we
857 // would get 'std::basic_string<char>').
858 if (auto *Func = Candidate.getFunction()) {
859 if (auto *Pattern = Func->getTemplateInstantiationPattern())
860 Candidate = OverloadCandidate(Pattern);
861 }
862
863 const auto *CCS = Candidate.CreateSignatureString(
864 CurrentArg, S, *Allocator, CCTUInfo, true);
865 assert(CCS && "Expected the CodeCompletionString to be non-null");
866 ScoredSignatures.push_back(processOverloadCandidate(
867 Candidate, *CCS,
868 Candidate.getFunction()
869 ? getDeclComment(S.getASTContext(), *Candidate.getFunction())
870 : ""));
871 }
872
873 // Sema does not load the docs from the preamble, so we need to fetch extra
874 // docs from the index instead.
875 llvm::DenseMap<SymbolID, std::string> FetchedDocs;
876 if (Index) {
877 LookupRequest IndexRequest;
878 for (const auto &S : ScoredSignatures) {
879 if (!S.IDForDoc)
880 continue;
881 IndexRequest.IDs.insert(*S.IDForDoc);
882 }
883 Index->lookup(IndexRequest, [&](const Symbol &S) {
884 if (!S.Documentation.empty())
885 FetchedDocs[S.ID] = std::string(S.Documentation);
886 });
887 log("SigHelp: requested docs for {0} symbols from the index, got {1} "
888 "symbols with non-empty docs in the response",
889 IndexRequest.IDs.size(), FetchedDocs.size());
890 }
891
892 llvm::sort(ScoredSignatures, [](const ScoredSignature &L,
893 const ScoredSignature &R) {
894 // Ordering follows:
895 // - Less number of parameters is better.
896 // - Function is better than FunctionType which is better than
897 // Function Template.
898 // - High score is better.
899 // - Shorter signature is better.
900 // - Alphabetically smaller is better.
901 if (L.Quality.NumberOfParameters != R.Quality.NumberOfParameters)
902 return L.Quality.NumberOfParameters < R.Quality.NumberOfParameters;
903 if (L.Quality.NumberOfOptionalParameters !=
904 R.Quality.NumberOfOptionalParameters)
905 return L.Quality.NumberOfOptionalParameters <
906 R.Quality.NumberOfOptionalParameters;
907 if (L.Quality.Kind != R.Quality.Kind) {
908 using OC = CodeCompleteConsumer::OverloadCandidate;
909 switch (L.Quality.Kind) {
910 case OC::CK_Function:
911 return true;
912 case OC::CK_FunctionType:
913 return R.Quality.Kind != OC::CK_Function;
914 case OC::CK_FunctionTemplate:
915 return false;
916 }
917 llvm_unreachable("Unknown overload candidate type.");
918 }
919 if (L.Signature.label.size() != R.Signature.label.size())
920 return L.Signature.label.size() < R.Signature.label.size();
921 return L.Signature.label < R.Signature.label;
922 });
923
924 for (auto &SS : ScoredSignatures) {
925 auto IndexDocIt =
926 SS.IDForDoc ? FetchedDocs.find(*SS.IDForDoc) : FetchedDocs.end();
927 if (IndexDocIt != FetchedDocs.end())
928 SS.Signature.documentation = IndexDocIt->second;
929
930 SigHelp.signatures.push_back(std::move(SS.Signature));
931 }
932 }
933
getAllocator()934 GlobalCodeCompletionAllocator &getAllocator() override { return *Allocator; }
935
getCodeCompletionTUInfo()936 CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
937
938 private:
processParameterChunk(llvm::StringRef ChunkText,SignatureInformation & Signature) const939 void processParameterChunk(llvm::StringRef ChunkText,
940 SignatureInformation &Signature) const {
941 // (!) this is O(n), should still be fast compared to building ASTs.
942 unsigned ParamStartOffset = lspLength(Signature.label);
943 unsigned ParamEndOffset = ParamStartOffset + lspLength(ChunkText);
944 // A piece of text that describes the parameter that corresponds to
945 // the code-completion location within a function call, message send,
946 // macro invocation, etc.
947 Signature.label += ChunkText;
948 ParameterInformation Info;
949 Info.labelOffsets.emplace(ParamStartOffset, ParamEndOffset);
950 // FIXME: only set 'labelOffsets' when all clients migrate out of it.
951 Info.labelString = std::string(ChunkText);
952
953 Signature.parameters.push_back(std::move(Info));
954 }
955
processOptionalChunk(const CodeCompletionString & CCS,SignatureInformation & Signature,SignatureQualitySignals & Signal) const956 void processOptionalChunk(const CodeCompletionString &CCS,
957 SignatureInformation &Signature,
958 SignatureQualitySignals &Signal) const {
959 for (const auto &Chunk : CCS) {
960 switch (Chunk.Kind) {
961 case CodeCompletionString::CK_Optional:
962 assert(Chunk.Optional &&
963 "Expected the optional code completion string to be non-null.");
964 processOptionalChunk(*Chunk.Optional, Signature, Signal);
965 break;
966 case CodeCompletionString::CK_VerticalSpace:
967 break;
968 case CodeCompletionString::CK_CurrentParameter:
969 case CodeCompletionString::CK_Placeholder:
970 processParameterChunk(Chunk.Text, Signature);
971 Signal.NumberOfOptionalParameters++;
972 break;
973 default:
974 Signature.label += Chunk.Text;
975 break;
976 }
977 }
978 }
979
980 // FIXME(ioeric): consider moving CodeCompletionString logic here to
981 // CompletionString.h.
processOverloadCandidate(const OverloadCandidate & Candidate,const CodeCompletionString & CCS,llvm::StringRef DocComment) const982 ScoredSignature processOverloadCandidate(const OverloadCandidate &Candidate,
983 const CodeCompletionString &CCS,
984 llvm::StringRef DocComment) const {
985 SignatureInformation Signature;
986 SignatureQualitySignals Signal;
987 const char *ReturnType = nullptr;
988
989 Signature.documentation = formatDocumentation(CCS, DocComment);
990 Signal.Kind = Candidate.getKind();
991
992 for (const auto &Chunk : CCS) {
993 switch (Chunk.Kind) {
994 case CodeCompletionString::CK_ResultType:
995 // A piece of text that describes the type of an entity or,
996 // for functions and methods, the return type.
997 assert(!ReturnType && "Unexpected CK_ResultType");
998 ReturnType = Chunk.Text;
999 break;
1000 case CodeCompletionString::CK_CurrentParameter:
1001 case CodeCompletionString::CK_Placeholder:
1002 processParameterChunk(Chunk.Text, Signature);
1003 Signal.NumberOfParameters++;
1004 break;
1005 case CodeCompletionString::CK_Optional: {
1006 // The rest of the parameters are defaulted/optional.
1007 assert(Chunk.Optional &&
1008 "Expected the optional code completion string to be non-null.");
1009 processOptionalChunk(*Chunk.Optional, Signature, Signal);
1010 break;
1011 }
1012 case CodeCompletionString::CK_VerticalSpace:
1013 break;
1014 default:
1015 Signature.label += Chunk.Text;
1016 break;
1017 }
1018 }
1019 if (ReturnType) {
1020 Signature.label += " -> ";
1021 Signature.label += ReturnType;
1022 }
1023 dlog("Signal for {0}: {1}", Signature, Signal);
1024 ScoredSignature Result;
1025 Result.Signature = std::move(Signature);
1026 Result.Quality = Signal;
1027 const FunctionDecl *Func = Candidate.getFunction();
1028 if (Func && Result.Signature.documentation.empty()) {
1029 // Computing USR caches linkage, which may change after code completion.
1030 if (!hasUnstableLinkage(Func))
1031 Result.IDForDoc = clangd::getSymbolID(Func);
1032 }
1033 return Result;
1034 }
1035
1036 SignatureHelp &SigHelp;
1037 std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator;
1038 CodeCompletionTUInfo CCTUInfo;
1039 const SymbolIndex *Index;
1040 }; // SignatureHelpCollector
1041
1042 struct SemaCompleteInput {
1043 PathRef FileName;
1044 size_t Offset;
1045 const PreambleData &Preamble;
1046 const llvm::Optional<PreamblePatch> Patch;
1047 const ParseInputs &ParseInput;
1048 };
1049
loadMainFilePreambleMacros(const Preprocessor & PP,const PreambleData & Preamble)1050 void loadMainFilePreambleMacros(const Preprocessor &PP,
1051 const PreambleData &Preamble) {
1052 // The ExternalPreprocessorSource has our macros, if we know where to look.
1053 // We can read all the macros using PreambleMacros->ReadDefinedMacros(),
1054 // but this includes transitively included files, so may deserialize a lot.
1055 ExternalPreprocessorSource *PreambleMacros = PP.getExternalSource();
1056 // As we have the names of the macros, we can look up their IdentifierInfo
1057 // and then use this to load just the macros we want.
1058 IdentifierInfoLookup *PreambleIdentifiers =
1059 PP.getIdentifierTable().getExternalIdentifierLookup();
1060 if (!PreambleIdentifiers || !PreambleMacros)
1061 return;
1062 for (const auto &MacroName : Preamble.Macros.Names)
1063 if (auto *II = PreambleIdentifiers->get(MacroName.getKey()))
1064 if (II->isOutOfDate())
1065 PreambleMacros->updateOutOfDateIdentifier(*II);
1066 }
1067
1068 // Invokes Sema code completion on a file.
1069 // If \p Includes is set, it will be updated based on the compiler invocation.
semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,const clang::CodeCompleteOptions & Options,const SemaCompleteInput & Input,IncludeStructure * Includes=nullptr)1070 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
1071 const clang::CodeCompleteOptions &Options,
1072 const SemaCompleteInput &Input,
1073 IncludeStructure *Includes = nullptr) {
1074 trace::Span Tracer("Sema completion");
1075
1076 IgnoreDiagnostics IgnoreDiags;
1077 auto CI = buildCompilerInvocation(Input.ParseInput, IgnoreDiags);
1078 if (!CI) {
1079 elog("Couldn't create CompilerInvocation");
1080 return false;
1081 }
1082 auto &FrontendOpts = CI->getFrontendOpts();
1083 FrontendOpts.SkipFunctionBodies = true;
1084 // Disable typo correction in Sema.
1085 CI->getLangOpts()->SpellChecking = false;
1086 // Code completion won't trigger in delayed template bodies.
1087 // This is on-by-default in windows to allow parsing SDK headers; we're only
1088 // disabling it for the main-file (not preamble).
1089 CI->getLangOpts()->DelayedTemplateParsing = false;
1090 // Setup code completion.
1091 FrontendOpts.CodeCompleteOpts = Options;
1092 FrontendOpts.CodeCompletionAt.FileName = std::string(Input.FileName);
1093 std::tie(FrontendOpts.CodeCompletionAt.Line,
1094 FrontendOpts.CodeCompletionAt.Column) =
1095 offsetToClangLineColumn(Input.ParseInput.Contents, Input.Offset);
1096
1097 std::unique_ptr<llvm::MemoryBuffer> ContentsBuffer =
1098 llvm::MemoryBuffer::getMemBufferCopy(Input.ParseInput.Contents,
1099 Input.FileName);
1100 // The diagnostic options must be set before creating a CompilerInstance.
1101 CI->getDiagnosticOpts().IgnoreWarnings = true;
1102 // We reuse the preamble whether it's valid or not. This is a
1103 // correctness/performance tradeoff: building without a preamble is slow, and
1104 // completion is latency-sensitive.
1105 // However, if we're completing *inside* the preamble section of the draft,
1106 // overriding the preamble will break sema completion. Fortunately we can just
1107 // skip all includes in this case; these completions are really simple.
1108 PreambleBounds PreambleRegion =
1109 ComputePreambleBounds(*CI->getLangOpts(), ContentsBuffer.get(), 0);
1110 bool CompletingInPreamble = PreambleRegion.Size > Input.Offset;
1111 if (Input.Patch)
1112 Input.Patch->apply(*CI);
1113 // NOTE: we must call BeginSourceFile after prepareCompilerInstance. Otherwise
1114 // the remapped buffers do not get freed.
1115 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS =
1116 Input.ParseInput.TFS->view(Input.ParseInput.CompileCommand.Directory);
1117 if (Input.Preamble.StatCache)
1118 VFS = Input.Preamble.StatCache->getConsumingFS(std::move(VFS));
1119 auto Clang = prepareCompilerInstance(
1120 std::move(CI), !CompletingInPreamble ? &Input.Preamble.Preamble : nullptr,
1121 std::move(ContentsBuffer), std::move(VFS), IgnoreDiags);
1122 Clang->getPreprocessorOpts().SingleFileParseMode = CompletingInPreamble;
1123 Clang->setCodeCompletionConsumer(Consumer.release());
1124
1125 SyntaxOnlyAction Action;
1126 if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) {
1127 log("BeginSourceFile() failed when running codeComplete for {0}",
1128 Input.FileName);
1129 return false;
1130 }
1131 // Macros can be defined within the preamble region of the main file.
1132 // They don't fall nicely into our index/Sema dichotomy:
1133 // - they're not indexed for completion (they're not available across files)
1134 // - but Sema code complete won't see them: as part of the preamble, they're
1135 // deserialized only when mentioned.
1136 // Force them to be deserialized so SemaCodeComplete sees them.
1137 loadMainFilePreambleMacros(Clang->getPreprocessor(), Input.Preamble);
1138 if (Includes)
1139 Clang->getPreprocessor().addPPCallbacks(
1140 collectIncludeStructureCallback(Clang->getSourceManager(), Includes));
1141 if (llvm::Error Err = Action.Execute()) {
1142 log("Execute() failed when running codeComplete for {0}: {1}",
1143 Input.FileName, toString(std::move(Err)));
1144 return false;
1145 }
1146 Action.EndSourceFile();
1147
1148 return true;
1149 }
1150
1151 // Should we allow index completions in the specified context?
allowIndex(CodeCompletionContext & CC)1152 bool allowIndex(CodeCompletionContext &CC) {
1153 if (!contextAllowsIndex(CC.getKind()))
1154 return false;
1155 // We also avoid ClassName::bar (but allow namespace::bar).
1156 auto Scope = CC.getCXXScopeSpecifier();
1157 if (!Scope)
1158 return true;
1159 NestedNameSpecifier *NameSpec = (*Scope)->getScopeRep();
1160 if (!NameSpec)
1161 return true;
1162 // We only query the index when qualifier is a namespace.
1163 // If it's a class, we rely solely on sema completions.
1164 switch (NameSpec->getKind()) {
1165 case NestedNameSpecifier::Global:
1166 case NestedNameSpecifier::Namespace:
1167 case NestedNameSpecifier::NamespaceAlias:
1168 return true;
1169 case NestedNameSpecifier::Super:
1170 case NestedNameSpecifier::TypeSpec:
1171 case NestedNameSpecifier::TypeSpecWithTemplate:
1172 // Unresolved inside a template.
1173 case NestedNameSpecifier::Identifier:
1174 return false;
1175 }
1176 llvm_unreachable("invalid NestedNameSpecifier kind");
1177 }
1178
startAsyncFuzzyFind(const SymbolIndex & Index,const FuzzyFindRequest & Req)1179 std::future<SymbolSlab> startAsyncFuzzyFind(const SymbolIndex &Index,
1180 const FuzzyFindRequest &Req) {
1181 return runAsync<SymbolSlab>([&Index, Req]() {
1182 trace::Span Tracer("Async fuzzyFind");
1183 SymbolSlab::Builder Syms;
1184 Index.fuzzyFind(Req, [&Syms](const Symbol &Sym) { Syms.insert(Sym); });
1185 return std::move(Syms).build();
1186 });
1187 }
1188
1189 // Creates a `FuzzyFindRequest` based on the cached index request from the
1190 // last completion, if any, and the speculated completion filter text in the
1191 // source code.
speculativeFuzzyFindRequestForCompletion(FuzzyFindRequest CachedReq,const CompletionPrefix & HeuristicPrefix)1192 FuzzyFindRequest speculativeFuzzyFindRequestForCompletion(
1193 FuzzyFindRequest CachedReq, const CompletionPrefix &HeuristicPrefix) {
1194 CachedReq.Query = std::string(HeuristicPrefix.Name);
1195 return CachedReq;
1196 }
1197
1198 // Runs Sema-based (AST) and Index-based completion, returns merged results.
1199 //
1200 // There are a few tricky considerations:
1201 // - the AST provides information needed for the index query (e.g. which
1202 // namespaces to search in). So Sema must start first.
1203 // - we only want to return the top results (Opts.Limit).
1204 // Building CompletionItems for everything else is wasteful, so we want to
1205 // preserve the "native" format until we're done with scoring.
1206 // - the data underlying Sema completion items is owned by the AST and various
1207 // other arenas, which must stay alive for us to build CompletionItems.
1208 // - we may get duplicate results from Sema and the Index, we need to merge.
1209 //
1210 // So we start Sema completion first, and do all our work in its callback.
1211 // We use the Sema context information to query the index.
1212 // Then we merge the two result sets, producing items that are Sema/Index/Both.
1213 // These items are scored, and the top N are synthesized into the LSP response.
1214 // Finally, we can clean up the data structures created by Sema completion.
1215 //
1216 // Main collaborators are:
1217 // - semaCodeComplete sets up the compiler machinery to run code completion.
1218 // - CompletionRecorder captures Sema completion results, including context.
1219 // - SymbolIndex (Opts.Index) provides index completion results as Symbols
1220 // - CompletionCandidates are the result of merging Sema and Index results.
1221 // Each candidate points to an underlying CodeCompletionResult (Sema), a
1222 // Symbol (Index), or both. It computes the result quality score.
1223 // CompletionCandidate also does conversion to CompletionItem (at the end).
1224 // - FuzzyMatcher scores how the candidate matches the partial identifier.
1225 // This score is combined with the result quality score for the final score.
1226 // - TopN determines the results with the best score.
1227 class CodeCompleteFlow {
1228 PathRef FileName;
1229 IncludeStructure Includes; // Complete once the compiler runs.
1230 SpeculativeFuzzyFind *SpecFuzzyFind; // Can be nullptr.
1231 const CodeCompleteOptions &Opts;
1232
1233 // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
1234 CompletionRecorder *Recorder = nullptr;
1235 CodeCompletionContext::Kind CCContextKind = CodeCompletionContext::CCC_Other;
1236 bool IsUsingDeclaration = false;
1237 // The snippets will not be generated if the token following completion
1238 // location is an opening parenthesis (tok::l_paren) because this would add
1239 // extra parenthesis.
1240 tok::TokenKind NextTokenKind = tok::eof;
1241 // Counters for logging.
1242 int NSema = 0, NIndex = 0, NSemaAndIndex = 0, NIdent = 0;
1243 bool Incomplete = false; // Would more be available with a higher limit?
1244 CompletionPrefix HeuristicPrefix;
1245 llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
1246 Range ReplacedRange;
1247 std::vector<std::string> QueryScopes; // Initialized once Sema runs.
1248 // Initialized once QueryScopes is initialized, if there are scopes.
1249 llvm::Optional<ScopeDistance> ScopeProximity;
1250 llvm::Optional<OpaqueType> PreferredType; // Initialized once Sema runs.
1251 // Whether to query symbols from any scope. Initialized once Sema runs.
1252 bool AllScopes = false;
1253 llvm::StringSet<> ContextWords;
1254 // Include-insertion and proximity scoring rely on the include structure.
1255 // This is available after Sema has run.
1256 llvm::Optional<IncludeInserter> Inserter; // Available during runWithSema.
1257 llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs.
1258 /// Speculative request based on the cached request and the filter text before
1259 /// the cursor.
1260 /// Initialized right before sema run. This is only set if `SpecFuzzyFind` is
1261 /// set and contains a cached request.
1262 llvm::Optional<FuzzyFindRequest> SpecReq;
1263
1264 public:
1265 // A CodeCompleteFlow object is only useful for calling run() exactly once.
CodeCompleteFlow(PathRef FileName,const IncludeStructure & Includes,SpeculativeFuzzyFind * SpecFuzzyFind,const CodeCompleteOptions & Opts)1266 CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes,
1267 SpeculativeFuzzyFind *SpecFuzzyFind,
1268 const CodeCompleteOptions &Opts)
1269 : FileName(FileName), Includes(Includes), SpecFuzzyFind(SpecFuzzyFind),
1270 Opts(Opts) {}
1271
run(const SemaCompleteInput & SemaCCInput)1272 CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && {
1273 trace::Span Tracer("CodeCompleteFlow");
1274 HeuristicPrefix = guessCompletionPrefix(SemaCCInput.ParseInput.Contents,
1275 SemaCCInput.Offset);
1276 populateContextWords(SemaCCInput.ParseInput.Contents);
1277 if (Opts.Index && SpecFuzzyFind && SpecFuzzyFind->CachedReq.hasValue()) {
1278 assert(!SpecFuzzyFind->Result.valid());
1279 SpecReq = speculativeFuzzyFindRequestForCompletion(
1280 *SpecFuzzyFind->CachedReq, HeuristicPrefix);
1281 SpecFuzzyFind->Result = startAsyncFuzzyFind(*Opts.Index, *SpecReq);
1282 }
1283
1284 // We run Sema code completion first. It builds an AST and calculates:
1285 // - completion results based on the AST.
1286 // - partial identifier and context. We need these for the index query.
1287 CodeCompleteResult Output;
1288 auto RecorderOwner = std::make_unique<CompletionRecorder>(Opts, [&]() {
1289 assert(Recorder && "Recorder is not set");
1290 CCContextKind = Recorder->CCContext.getKind();
1291 IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
1292 auto Style = getFormatStyleForFile(SemaCCInput.FileName,
1293 SemaCCInput.ParseInput.Contents,
1294 *SemaCCInput.ParseInput.TFS);
1295 const auto NextToken = Lexer::findNextToken(
1296 Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(),
1297 Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts);
1298 if (NextToken)
1299 NextTokenKind = NextToken->getKind();
1300 // If preprocessor was run, inclusions from preprocessor callback should
1301 // already be added to Includes.
1302 Inserter.emplace(
1303 SemaCCInput.FileName, SemaCCInput.ParseInput.Contents, Style,
1304 SemaCCInput.ParseInput.CompileCommand.Directory,
1305 &Recorder->CCSema->getPreprocessor().getHeaderSearchInfo());
1306 for (const auto &Inc : Includes.MainFileIncludes)
1307 Inserter->addExisting(Inc);
1308
1309 // Most of the cost of file proximity is in initializing the FileDistance
1310 // structures based on the observed includes, once per query. Conceptually
1311 // that happens here (though the per-URI-scheme initialization is lazy).
1312 // The per-result proximity scoring is (amortized) very cheap.
1313 FileDistanceOptions ProxOpts{}; // Use defaults.
1314 const auto &SM = Recorder->CCSema->getSourceManager();
1315 llvm::StringMap<SourceParams> ProxSources;
1316 for (auto &Entry : Includes.includeDepth(
1317 SM.getFileEntryForID(SM.getMainFileID())->getName())) {
1318 auto &Source = ProxSources[Entry.getKey()];
1319 Source.Cost = Entry.getValue() * ProxOpts.IncludeCost;
1320 // Symbols near our transitive includes are good, but only consider
1321 // things in the same directory or below it. Otherwise there can be
1322 // many false positives.
1323 if (Entry.getValue() > 0)
1324 Source.MaxUpTraversals = 1;
1325 }
1326 FileProximity.emplace(ProxSources, ProxOpts);
1327
1328 Output = runWithSema();
1329 Inserter.reset(); // Make sure this doesn't out-live Clang.
1330 SPAN_ATTACH(Tracer, "sema_completion_kind",
1331 getCompletionKindString(CCContextKind));
1332 log("Code complete: sema context {0}, query scopes [{1}] (AnyScope={2}), "
1333 "expected type {3}{4}",
1334 getCompletionKindString(CCContextKind),
1335 llvm::join(QueryScopes.begin(), QueryScopes.end(), ","), AllScopes,
1336 PreferredType ? Recorder->CCContext.getPreferredType().getAsString()
1337 : "<none>",
1338 IsUsingDeclaration ? ", inside using declaration" : "");
1339 });
1340
1341 Recorder = RecorderOwner.get();
1342
1343 semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(),
1344 SemaCCInput, &Includes);
1345 logResults(Output, Tracer);
1346 return Output;
1347 }
1348
logResults(const CodeCompleteResult & Output,const trace::Span & Tracer)1349 void logResults(const CodeCompleteResult &Output, const trace::Span &Tracer) {
1350 SPAN_ATTACH(Tracer, "sema_results", NSema);
1351 SPAN_ATTACH(Tracer, "index_results", NIndex);
1352 SPAN_ATTACH(Tracer, "merged_results", NSemaAndIndex);
1353 SPAN_ATTACH(Tracer, "identifier_results", NIdent);
1354 SPAN_ATTACH(Tracer, "returned_results", int64_t(Output.Completions.size()));
1355 SPAN_ATTACH(Tracer, "incomplete", Output.HasMore);
1356 log("Code complete: {0} results from Sema, {1} from Index, "
1357 "{2} matched, {3} from identifiers, {4} returned{5}.",
1358 NSema, NIndex, NSemaAndIndex, NIdent, Output.Completions.size(),
1359 Output.HasMore ? " (incomplete)" : "");
1360 assert(!Opts.Limit || Output.Completions.size() <= Opts.Limit);
1361 // We don't assert that isIncomplete means we hit a limit.
1362 // Indexes may choose to impose their own limits even if we don't have one.
1363 }
1364
runWithoutSema(llvm::StringRef Content,size_t Offset,const ThreadsafeFS & TFS)1365 CodeCompleteResult runWithoutSema(llvm::StringRef Content, size_t Offset,
1366 const ThreadsafeFS &TFS) && {
1367 trace::Span Tracer("CodeCompleteWithoutSema");
1368 // Fill in fields normally set by runWithSema()
1369 HeuristicPrefix = guessCompletionPrefix(Content, Offset);
1370 populateContextWords(Content);
1371 CCContextKind = CodeCompletionContext::CCC_Recovery;
1372 IsUsingDeclaration = false;
1373 Filter = FuzzyMatcher(HeuristicPrefix.Name);
1374 auto Pos = offsetToPosition(Content, Offset);
1375 ReplacedRange.start = ReplacedRange.end = Pos;
1376 ReplacedRange.start.character -= HeuristicPrefix.Name.size();
1377
1378 llvm::StringMap<SourceParams> ProxSources;
1379 ProxSources[FileName].Cost = 0;
1380 FileProximity.emplace(ProxSources);
1381
1382 auto Style = getFormatStyleForFile(FileName, Content, TFS);
1383 // This will only insert verbatim headers.
1384 Inserter.emplace(FileName, Content, Style,
1385 /*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);
1386
1387 auto Identifiers = collectIdentifiers(Content, Style);
1388 std::vector<RawIdentifier> IdentifierResults;
1389 for (const auto &IDAndCount : Identifiers) {
1390 RawIdentifier ID;
1391 ID.Name = IDAndCount.first();
1392 ID.References = IDAndCount.second;
1393 // Avoid treating typed filter as an identifier.
1394 if (ID.Name == HeuristicPrefix.Name)
1395 --ID.References;
1396 if (ID.References > 0)
1397 IdentifierResults.push_back(std::move(ID));
1398 }
1399
1400 // Simplified version of getQueryScopes():
1401 // - accessible scopes are determined heuristically.
1402 // - all-scopes query if no qualifier was typed (and it's allowed).
1403 SpecifiedScope Scopes;
1404 Scopes.AccessibleScopes = visibleNamespaces(
1405 Content.take_front(Offset), format::getFormattingLangOpts(Style));
1406 for (std::string &S : Scopes.AccessibleScopes)
1407 if (!S.empty())
1408 S.append("::"); // visibleNamespaces doesn't include trailing ::.
1409 if (HeuristicPrefix.Qualifier.empty())
1410 AllScopes = Opts.AllScopes;
1411 else if (HeuristicPrefix.Qualifier.startswith("::")) {
1412 Scopes.AccessibleScopes = {""};
1413 Scopes.UnresolvedQualifier =
1414 std::string(HeuristicPrefix.Qualifier.drop_front(2));
1415 } else
1416 Scopes.UnresolvedQualifier = std::string(HeuristicPrefix.Qualifier);
1417 // First scope is the (modified) enclosing scope.
1418 QueryScopes = Scopes.scopesForIndexQuery();
1419 ScopeProximity.emplace(QueryScopes);
1420
1421 SymbolSlab IndexResults = Opts.Index ? queryIndex() : SymbolSlab();
1422
1423 CodeCompleteResult Output = toCodeCompleteResult(mergeResults(
1424 /*SemaResults=*/{}, IndexResults, IdentifierResults));
1425 Output.RanParser = false;
1426 logResults(Output, Tracer);
1427 return Output;
1428 }
1429
1430 private:
populateContextWords(llvm::StringRef Content)1431 void populateContextWords(llvm::StringRef Content) {
1432 // Take last 3 lines before the completion point.
1433 unsigned RangeEnd = HeuristicPrefix.Qualifier.begin() - Content.data(),
1434 RangeBegin = RangeEnd;
1435 for (size_t I = 0; I < 3 && RangeBegin > 0; ++I) {
1436 auto PrevNL = Content.rfind('\n', RangeBegin);
1437 if (PrevNL == StringRef::npos) {
1438 RangeBegin = 0;
1439 break;
1440 }
1441 RangeBegin = PrevNL;
1442 }
1443
1444 ContextWords = collectWords(Content.slice(RangeBegin, RangeEnd));
1445 dlog("Completion context words: {0}",
1446 llvm::join(ContextWords.keys(), ", "));
1447 }
1448
1449 // This is called by run() once Sema code completion is done, but before the
1450 // Sema data structures are torn down. It does all the real work.
runWithSema()1451 CodeCompleteResult runWithSema() {
1452 const auto &CodeCompletionRange = CharSourceRange::getCharRange(
1453 Recorder->CCSema->getPreprocessor().getCodeCompletionTokenRange());
1454 // When we are getting completions with an empty identifier, for example
1455 // std::vector<int> asdf;
1456 // asdf.^;
1457 // Then the range will be invalid and we will be doing insertion, use
1458 // current cursor position in such cases as range.
1459 if (CodeCompletionRange.isValid()) {
1460 ReplacedRange = halfOpenToRange(Recorder->CCSema->getSourceManager(),
1461 CodeCompletionRange);
1462 } else {
1463 const auto &Pos = sourceLocToPosition(
1464 Recorder->CCSema->getSourceManager(),
1465 Recorder->CCSema->getPreprocessor().getCodeCompletionLoc());
1466 ReplacedRange.start = ReplacedRange.end = Pos;
1467 }
1468 Filter = FuzzyMatcher(
1469 Recorder->CCSema->getPreprocessor().getCodeCompletionFilter());
1470 std::tie(QueryScopes, AllScopes) = getQueryScopes(
1471 Recorder->CCContext, *Recorder->CCSema, HeuristicPrefix, Opts);
1472 if (!QueryScopes.empty())
1473 ScopeProximity.emplace(QueryScopes);
1474 PreferredType =
1475 OpaqueType::fromType(Recorder->CCSema->getASTContext(),
1476 Recorder->CCContext.getPreferredType());
1477 // Sema provides the needed context to query the index.
1478 // FIXME: in addition to querying for extra/overlapping symbols, we should
1479 // explicitly request symbols corresponding to Sema results.
1480 // We can use their signals even if the index can't suggest them.
1481 // We must copy index results to preserve them, but there are at most Limit.
1482 auto IndexResults = (Opts.Index && allowIndex(Recorder->CCContext))
1483 ? queryIndex()
1484 : SymbolSlab();
1485 trace::Span Tracer("Populate CodeCompleteResult");
1486 // Merge Sema and Index results, score them, and pick the winners.
1487 auto Top =
1488 mergeResults(Recorder->Results, IndexResults, /*Identifiers*/ {});
1489 return toCodeCompleteResult(Top);
1490 }
1491
1492 CodeCompleteResult
toCodeCompleteResult(const std::vector<ScoredBundle> & Scored)1493 toCodeCompleteResult(const std::vector<ScoredBundle> &Scored) {
1494 CodeCompleteResult Output;
1495
1496 // Convert the results to final form, assembling the expensive strings.
1497 for (auto &C : Scored) {
1498 Output.Completions.push_back(toCodeCompletion(C.first));
1499 Output.Completions.back().Score = C.second;
1500 Output.Completions.back().CompletionTokenRange = ReplacedRange;
1501 }
1502 Output.HasMore = Incomplete;
1503 Output.Context = CCContextKind;
1504 Output.CompletionRange = ReplacedRange;
1505 return Output;
1506 }
1507
queryIndex()1508 SymbolSlab queryIndex() {
1509 trace::Span Tracer("Query index");
1510 SPAN_ATTACH(Tracer, "limit", int64_t(Opts.Limit));
1511
1512 // Build the query.
1513 FuzzyFindRequest Req;
1514 if (Opts.Limit)
1515 Req.Limit = Opts.Limit;
1516 Req.Query = std::string(Filter->pattern());
1517 Req.RestrictForCodeCompletion = true;
1518 Req.Scopes = QueryScopes;
1519 Req.AnyScope = AllScopes;
1520 // FIXME: we should send multiple weighted paths here.
1521 Req.ProximityPaths.push_back(std::string(FileName));
1522 if (PreferredType)
1523 Req.PreferredTypes.push_back(std::string(PreferredType->raw()));
1524 vlog("Code complete: fuzzyFind({0:2})", toJSON(Req));
1525
1526 if (SpecFuzzyFind)
1527 SpecFuzzyFind->NewReq = Req;
1528 if (SpecFuzzyFind && SpecFuzzyFind->Result.valid() && (*SpecReq == Req)) {
1529 vlog("Code complete: speculative fuzzy request matches the actual index "
1530 "request. Waiting for the speculative index results.");
1531 SPAN_ATTACH(Tracer, "Speculative results", true);
1532
1533 trace::Span WaitSpec("Wait speculative results");
1534 return SpecFuzzyFind->Result.get();
1535 }
1536
1537 SPAN_ATTACH(Tracer, "Speculative results", false);
1538
1539 // Run the query against the index.
1540 SymbolSlab::Builder ResultsBuilder;
1541 if (Opts.Index->fuzzyFind(
1542 Req, [&](const Symbol &Sym) { ResultsBuilder.insert(Sym); }))
1543 Incomplete = true;
1544 return std::move(ResultsBuilder).build();
1545 }
1546
1547 // Merges Sema and Index results where possible, to form CompletionCandidates.
1548 // \p Identifiers is raw identifiers that can also be completion candidates.
1549 // Identifiers are not merged with results from index or sema.
1550 // Groups overloads if desired, to form CompletionCandidate::Bundles. The
1551 // bundles are scored and top results are returned, best to worst.
1552 std::vector<ScoredBundle>
mergeResults(const std::vector<CodeCompletionResult> & SemaResults,const SymbolSlab & IndexResults,const std::vector<RawIdentifier> & IdentifierResults)1553 mergeResults(const std::vector<CodeCompletionResult> &SemaResults,
1554 const SymbolSlab &IndexResults,
1555 const std::vector<RawIdentifier> &IdentifierResults) {
1556 trace::Span Tracer("Merge and score results");
1557 std::vector<CompletionCandidate::Bundle> Bundles;
1558 llvm::DenseMap<size_t, size_t> BundleLookup;
1559 auto AddToBundles = [&](const CodeCompletionResult *SemaResult,
1560 const Symbol *IndexResult,
1561 const RawIdentifier *IdentifierResult) {
1562 CompletionCandidate C;
1563 C.SemaResult = SemaResult;
1564 C.IndexResult = IndexResult;
1565 C.IdentifierResult = IdentifierResult;
1566 if (C.IndexResult) {
1567 C.Name = IndexResult->Name;
1568 C.RankedIncludeHeaders = getRankedIncludes(*C.IndexResult);
1569 } else if (C.SemaResult) {
1570 C.Name = Recorder->getName(*SemaResult);
1571 } else {
1572 assert(IdentifierResult);
1573 C.Name = IdentifierResult->Name;
1574 }
1575 if (auto OverloadSet = C.overloadSet(Opts)) {
1576 auto Ret = BundleLookup.try_emplace(OverloadSet, Bundles.size());
1577 if (Ret.second)
1578 Bundles.emplace_back();
1579 Bundles[Ret.first->second].push_back(std::move(C));
1580 } else {
1581 Bundles.emplace_back();
1582 Bundles.back().push_back(std::move(C));
1583 }
1584 };
1585 llvm::DenseSet<const Symbol *> UsedIndexResults;
1586 auto CorrespondingIndexResult =
1587 [&](const CodeCompletionResult &SemaResult) -> const Symbol * {
1588 if (auto SymID =
1589 getSymbolID(SemaResult, Recorder->CCSema->getSourceManager())) {
1590 auto I = IndexResults.find(*SymID);
1591 if (I != IndexResults.end()) {
1592 UsedIndexResults.insert(&*I);
1593 return &*I;
1594 }
1595 }
1596 return nullptr;
1597 };
1598 // Emit all Sema results, merging them with Index results if possible.
1599 for (auto &SemaResult : SemaResults)
1600 AddToBundles(&SemaResult, CorrespondingIndexResult(SemaResult), nullptr);
1601 // Now emit any Index-only results.
1602 for (const auto &IndexResult : IndexResults) {
1603 if (UsedIndexResults.count(&IndexResult))
1604 continue;
1605 AddToBundles(/*SemaResult=*/nullptr, &IndexResult, nullptr);
1606 }
1607 // Emit identifier results.
1608 for (const auto &Ident : IdentifierResults)
1609 AddToBundles(/*SemaResult=*/nullptr, /*IndexResult=*/nullptr, &Ident);
1610 // We only keep the best N results at any time, in "native" format.
1611 TopN<ScoredBundle, ScoredBundleGreater> Top(
1612 Opts.Limit == 0 ? std::numeric_limits<size_t>::max() : Opts.Limit);
1613 for (auto &Bundle : Bundles)
1614 addCandidate(Top, std::move(Bundle));
1615 return std::move(Top).items();
1616 }
1617
fuzzyScore(const CompletionCandidate & C)1618 llvm::Optional<float> fuzzyScore(const CompletionCandidate &C) {
1619 // Macros can be very spammy, so we only support prefix completion.
1620 // We won't end up with underfull index results, as macros are sema-only.
1621 if (C.SemaResult && C.SemaResult->Kind == CodeCompletionResult::RK_Macro &&
1622 !C.Name.startswith_lower(Filter->pattern()))
1623 return None;
1624 return Filter->match(C.Name);
1625 }
1626
1627 // Scores a candidate and adds it to the TopN structure.
addCandidate(TopN<ScoredBundle,ScoredBundleGreater> & Candidates,CompletionCandidate::Bundle Bundle)1628 void addCandidate(TopN<ScoredBundle, ScoredBundleGreater> &Candidates,
1629 CompletionCandidate::Bundle Bundle) {
1630 SymbolQualitySignals Quality;
1631 SymbolRelevanceSignals Relevance;
1632 Relevance.Context = CCContextKind;
1633 Relevance.Name = Bundle.front().Name;
1634 Relevance.Query = SymbolRelevanceSignals::CodeComplete;
1635 Relevance.FileProximityMatch = FileProximity.getPointer();
1636 if (ScopeProximity)
1637 Relevance.ScopeProximityMatch = ScopeProximity.getPointer();
1638 if (PreferredType)
1639 Relevance.HadContextType = true;
1640 Relevance.ContextWords = &ContextWords;
1641
1642 auto &First = Bundle.front();
1643 if (auto FuzzyScore = fuzzyScore(First))
1644 Relevance.NameMatch = *FuzzyScore;
1645 else
1646 return;
1647 SymbolOrigin Origin = SymbolOrigin::Unknown;
1648 bool FromIndex = false;
1649 for (const auto &Candidate : Bundle) {
1650 if (Candidate.IndexResult) {
1651 Quality.merge(*Candidate.IndexResult);
1652 Relevance.merge(*Candidate.IndexResult);
1653 Origin |= Candidate.IndexResult->Origin;
1654 FromIndex = true;
1655 if (!Candidate.IndexResult->Type.empty())
1656 Relevance.HadSymbolType |= true;
1657 if (PreferredType &&
1658 PreferredType->raw() == Candidate.IndexResult->Type) {
1659 Relevance.TypeMatchesPreferred = true;
1660 }
1661 }
1662 if (Candidate.SemaResult) {
1663 Quality.merge(*Candidate.SemaResult);
1664 Relevance.merge(*Candidate.SemaResult);
1665 if (PreferredType) {
1666 if (auto CompletionType = OpaqueType::fromCompletionResult(
1667 Recorder->CCSema->getASTContext(), *Candidate.SemaResult)) {
1668 Relevance.HadSymbolType |= true;
1669 if (PreferredType == CompletionType)
1670 Relevance.TypeMatchesPreferred = true;
1671 }
1672 }
1673 Origin |= SymbolOrigin::AST;
1674 }
1675 if (Candidate.IdentifierResult) {
1676 Quality.References = Candidate.IdentifierResult->References;
1677 Relevance.Scope = SymbolRelevanceSignals::FileScope;
1678 Origin |= SymbolOrigin::Identifier;
1679 }
1680 }
1681
1682 CodeCompletion::Scores Scores;
1683 Scores.Quality = Quality.evaluate();
1684 Scores.Relevance = Relevance.evaluate();
1685 Scores.Total = evaluateSymbolAndRelevance(Scores.Quality, Scores.Relevance);
1686 // NameMatch is in fact a multiplier on total score, so rescoring is sound.
1687 Scores.ExcludingName = Relevance.NameMatch
1688 ? Scores.Total / Relevance.NameMatch
1689 : Scores.Quality;
1690
1691 if (Opts.RecordCCResult)
1692 Opts.RecordCCResult(toCodeCompletion(Bundle), Quality, Relevance,
1693 Scores.Total);
1694
1695 dlog("CodeComplete: {0} ({1}) = {2}\n{3}{4}\n", First.Name,
1696 llvm::to_string(Origin), Scores.Total, llvm::to_string(Quality),
1697 llvm::to_string(Relevance));
1698
1699 NSema += bool(Origin & SymbolOrigin::AST);
1700 NIndex += FromIndex;
1701 NSemaAndIndex += bool(Origin & SymbolOrigin::AST) && FromIndex;
1702 NIdent += bool(Origin & SymbolOrigin::Identifier);
1703 if (Candidates.push({std::move(Bundle), Scores}))
1704 Incomplete = true;
1705 }
1706
toCodeCompletion(const CompletionCandidate::Bundle & Bundle)1707 CodeCompletion toCodeCompletion(const CompletionCandidate::Bundle &Bundle) {
1708 llvm::Optional<CodeCompletionBuilder> Builder;
1709 for (const auto &Item : Bundle) {
1710 CodeCompletionString *SemaCCS =
1711 Item.SemaResult ? Recorder->codeCompletionString(*Item.SemaResult)
1712 : nullptr;
1713 if (!Builder)
1714 Builder.emplace(Recorder ? &Recorder->CCSema->getASTContext() : nullptr,
1715 Item, SemaCCS, QueryScopes, *Inserter, FileName,
1716 CCContextKind, Opts, IsUsingDeclaration, NextTokenKind);
1717 else
1718 Builder->add(Item, SemaCCS);
1719 }
1720 return Builder->build();
1721 }
1722 };
1723
1724 } // namespace
1725
getClangCompleteOpts() const1726 clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
1727 clang::CodeCompleteOptions Result;
1728 Result.IncludeCodePatterns = EnableSnippets && IncludeCodePatterns;
1729 Result.IncludeMacros = IncludeMacros;
1730 Result.IncludeGlobals = true;
1731 // We choose to include full comments and not do doxygen parsing in
1732 // completion.
1733 // FIXME: ideally, we should support doxygen in some form, e.g. do markdown
1734 // formatting of the comments.
1735 Result.IncludeBriefComments = false;
1736
1737 // When an is used, Sema is responsible for completing the main file,
1738 // the index can provide results from the preamble.
1739 // Tell Sema not to deserialize the preamble to look for results.
1740 Result.LoadExternal = !Index;
1741 Result.IncludeFixIts = IncludeFixIts;
1742
1743 return Result;
1744 }
1745
guessCompletionPrefix(llvm::StringRef Content,unsigned Offset)1746 CompletionPrefix guessCompletionPrefix(llvm::StringRef Content,
1747 unsigned Offset) {
1748 assert(Offset <= Content.size());
1749 StringRef Rest = Content.take_front(Offset);
1750 CompletionPrefix Result;
1751
1752 // Consume the unqualified name. We only handle ASCII characters.
1753 // isIdentifierBody will let us match "0invalid", but we don't mind.
1754 while (!Rest.empty() && isIdentifierBody(Rest.back()))
1755 Rest = Rest.drop_back();
1756 Result.Name = Content.slice(Rest.size(), Offset);
1757
1758 // Consume qualifiers.
1759 while (Rest.consume_back("::") && !Rest.endswith(":")) // reject ::::
1760 while (!Rest.empty() && isIdentifierBody(Rest.back()))
1761 Rest = Rest.drop_back();
1762 Result.Qualifier =
1763 Content.slice(Rest.size(), Result.Name.begin() - Content.begin());
1764
1765 return Result;
1766 }
1767
codeComplete(PathRef FileName,Position Pos,const PreambleData * Preamble,const ParseInputs & ParseInput,CodeCompleteOptions Opts,SpeculativeFuzzyFind * SpecFuzzyFind)1768 CodeCompleteResult codeComplete(PathRef FileName, Position Pos,
1769 const PreambleData *Preamble,
1770 const ParseInputs &ParseInput,
1771 CodeCompleteOptions Opts,
1772 SpeculativeFuzzyFind *SpecFuzzyFind) {
1773 auto Offset = positionToOffset(ParseInput.Contents, Pos);
1774 if (!Offset) {
1775 elog("Code completion position was invalid {0}", Offset.takeError());
1776 return CodeCompleteResult();
1777 }
1778 auto Flow = CodeCompleteFlow(
1779 FileName, Preamble ? Preamble->Includes : IncludeStructure(),
1780 SpecFuzzyFind, Opts);
1781 return (!Preamble || Opts.RunParser == CodeCompleteOptions::NeverParse)
1782 ? std::move(Flow).runWithoutSema(ParseInput.Contents, *Offset,
1783 *ParseInput.TFS)
1784 : std::move(Flow).run({FileName, *Offset, *Preamble,
1785 // We want to serve code completions with
1786 // low latency, so don't bother patching.
1787 /*PreamblePatch=*/llvm::None, ParseInput});
1788 }
1789
signatureHelp(PathRef FileName,Position Pos,const PreambleData & Preamble,const ParseInputs & ParseInput)1790 SignatureHelp signatureHelp(PathRef FileName, Position Pos,
1791 const PreambleData &Preamble,
1792 const ParseInputs &ParseInput) {
1793 auto Offset = positionToOffset(ParseInput.Contents, Pos);
1794 if (!Offset) {
1795 elog("Signature help position was invalid {0}", Offset.takeError());
1796 return SignatureHelp();
1797 }
1798 SignatureHelp Result;
1799 clang::CodeCompleteOptions Options;
1800 Options.IncludeGlobals = false;
1801 Options.IncludeMacros = false;
1802 Options.IncludeCodePatterns = false;
1803 Options.IncludeBriefComments = false;
1804 semaCodeComplete(
1805 std::make_unique<SignatureHelpCollector>(Options, ParseInput.Index,
1806 Result),
1807 Options,
1808 {FileName, *Offset, Preamble,
1809 PreamblePatch::create(FileName, ParseInput, Preamble), ParseInput});
1810 return Result;
1811 }
1812
isIndexedForCodeCompletion(const NamedDecl & ND,ASTContext & ASTCtx)1813 bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) {
1814 auto InTopLevelScope = [](const NamedDecl &ND) {
1815 switch (ND.getDeclContext()->getDeclKind()) {
1816 case Decl::TranslationUnit:
1817 case Decl::Namespace:
1818 case Decl::LinkageSpec:
1819 return true;
1820 default:
1821 break;
1822 };
1823 return false;
1824 };
1825 // We only complete symbol's name, which is the same as the name of the
1826 // *primary* template in case of template specializations.
1827 if (isExplicitTemplateSpecialization(&ND))
1828 return false;
1829
1830 if (InTopLevelScope(ND))
1831 return true;
1832
1833 if (const auto *EnumDecl = dyn_cast<clang::EnumDecl>(ND.getDeclContext()))
1834 return InTopLevelScope(*EnumDecl) && !EnumDecl->isScoped();
1835
1836 return false;
1837 }
1838
1839 // FIXME: find a home for this (that can depend on both markup and Protocol).
renderDoc(const markup::Document & Doc,MarkupKind Kind)1840 static MarkupContent renderDoc(const markup::Document &Doc, MarkupKind Kind) {
1841 MarkupContent Result;
1842 Result.kind = Kind;
1843 switch (Kind) {
1844 case MarkupKind::PlainText:
1845 Result.value.append(Doc.asPlainText());
1846 break;
1847 case MarkupKind::Markdown:
1848 Result.value.append(Doc.asMarkdown());
1849 break;
1850 }
1851 return Result;
1852 }
1853
render(const CodeCompleteOptions & Opts) const1854 CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const {
1855 CompletionItem LSP;
1856 const auto *InsertInclude = Includes.empty() ? nullptr : &Includes[0];
1857 LSP.label = ((InsertInclude && InsertInclude->Insertion)
1858 ? Opts.IncludeIndicator.Insert
1859 : Opts.IncludeIndicator.NoInsert) +
1860 (Opts.ShowOrigins ? "[" + llvm::to_string(Origin) + "]" : "") +
1861 RequiredQualifier + Name + Signature;
1862
1863 LSP.kind = Kind;
1864 LSP.detail = BundleSize > 1
1865 ? std::string(llvm::formatv("[{0} overloads]", BundleSize))
1866 : ReturnType;
1867 LSP.deprecated = Deprecated;
1868 // Combine header information and documentation in LSP `documentation` field.
1869 // This is not quite right semantically, but tends to display well in editors.
1870 if (InsertInclude || Documentation) {
1871 markup::Document Doc;
1872 if (InsertInclude)
1873 Doc.addParagraph().appendText("From ").appendCode(InsertInclude->Header);
1874 if (Documentation)
1875 Doc.append(*Documentation);
1876 LSP.documentation = renderDoc(Doc, Opts.DocumentationFormat);
1877 }
1878 LSP.sortText = sortText(Score.Total, Name);
1879 LSP.filterText = Name;
1880 LSP.textEdit = {CompletionTokenRange, RequiredQualifier + Name};
1881 // Merge continuous additionalTextEdits into main edit. The main motivation
1882 // behind this is to help LSP clients, it seems most of them are confused when
1883 // they are provided with additionalTextEdits that are consecutive to main
1884 // edit.
1885 // Note that we store additional text edits from back to front in a line. That
1886 // is mainly to help LSP clients again, so that changes do not effect each
1887 // other.
1888 for (const auto &FixIt : FixIts) {
1889 if (FixIt.range.end == LSP.textEdit->range.start) {
1890 LSP.textEdit->newText = FixIt.newText + LSP.textEdit->newText;
1891 LSP.textEdit->range.start = FixIt.range.start;
1892 } else {
1893 LSP.additionalTextEdits.push_back(FixIt);
1894 }
1895 }
1896 if (Opts.EnableSnippets)
1897 LSP.textEdit->newText += SnippetSuffix;
1898
1899 // FIXME(kadircet): Do not even fill insertText after making sure textEdit is
1900 // compatible with most of the editors.
1901 LSP.insertText = LSP.textEdit->newText;
1902 LSP.insertTextFormat = Opts.EnableSnippets ? InsertTextFormat::Snippet
1903 : InsertTextFormat::PlainText;
1904 if (InsertInclude && InsertInclude->Insertion)
1905 LSP.additionalTextEdits.push_back(*InsertInclude->Insertion);
1906
1907 LSP.score = Score.ExcludingName;
1908
1909 return LSP;
1910 }
1911
operator <<(llvm::raw_ostream & OS,const CodeCompletion & C)1912 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const CodeCompletion &C) {
1913 // For now just lean on CompletionItem.
1914 return OS << C.render(CodeCompleteOptions());
1915 }
1916
operator <<(llvm::raw_ostream & OS,const CodeCompleteResult & R)1917 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
1918 const CodeCompleteResult &R) {
1919 OS << "CodeCompleteResult: " << R.Completions.size() << (R.HasMore ? "+" : "")
1920 << " (" << getCompletionKindString(R.Context) << ")"
1921 << " items:\n";
1922 for (const auto &C : R.Completions)
1923 OS << C << "\n";
1924 return OS;
1925 }
1926
1927 // Heuristically detect whether the `Line` is an unterminated include filename.
isIncludeFile(llvm::StringRef Line)1928 bool isIncludeFile(llvm::StringRef Line) {
1929 Line = Line.ltrim();
1930 if (!Line.consume_front("#"))
1931 return false;
1932 Line = Line.ltrim();
1933 if (!(Line.consume_front("include_next") || Line.consume_front("include") ||
1934 Line.consume_front("import")))
1935 return false;
1936 Line = Line.ltrim();
1937 if (Line.consume_front("<"))
1938 return Line.count('>') == 0;
1939 if (Line.consume_front("\""))
1940 return Line.count('"') == 0;
1941 return false;
1942 }
1943
allowImplicitCompletion(llvm::StringRef Content,unsigned Offset)1944 bool allowImplicitCompletion(llvm::StringRef Content, unsigned Offset) {
1945 // Look at last line before completion point only.
1946 Content = Content.take_front(Offset);
1947 auto Pos = Content.rfind('\n');
1948 if (Pos != llvm::StringRef::npos)
1949 Content = Content.substr(Pos + 1);
1950
1951 // Complete after scope operators.
1952 if (Content.endswith(".") || Content.endswith("->") || Content.endswith("::"))
1953 return true;
1954 // Complete after `#include <` and #include `<foo/`.
1955 if ((Content.endswith("<") || Content.endswith("\"") ||
1956 Content.endswith("/")) &&
1957 isIncludeFile(Content))
1958 return true;
1959
1960 // Complete words. Give non-ascii characters the benefit of the doubt.
1961 return !Content.empty() &&
1962 (isIdentifierBody(Content.back()) || !llvm::isASCII(Content.back()));
1963 }
1964
1965 } // namespace clangd
1966 } // namespace clang
1967