1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// Some operations such as code completion produce a set of candidates. 10 /// Usually the user can choose between them, but we should put the best options 11 /// at the top (they're easier to select, and more likely to be seen). 12 /// 13 /// This file defines building blocks for ranking candidates. 14 /// It's used by the features directly and also in the implementation of 15 /// indexes, as indexes also need to heuristically limit their results. 16 /// 17 /// The facilities here are: 18 /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString 19 /// These are structured in a way that they can be debugged, and are fairly 20 /// consistent regardless of the source. 21 /// - compute scores from scoring signals. These are suitable for sorting. 22 /// - sorting utilities like the TopN container. 23 /// These could be split up further to isolate dependencies if we care. 24 /// 25 //===----------------------------------------------------------------------===// 26 27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 29 30 #include "ExpectedTypes.h" 31 #include "FileDistance.h" 32 #include "TUScheduler.h" 33 #include "clang/Sema/CodeCompleteConsumer.h" 34 #include "llvm/ADT/ArrayRef.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/ADT/StringSet.h" 37 #include <algorithm> 38 #include <functional> 39 #include <vector> 40 41 namespace llvm { 42 class raw_ostream; 43 } // namespace llvm 44 45 namespace clang { 46 class CodeCompletionResult; 47 48 namespace clangd { 49 50 struct Symbol; 51 class URIDistance; 52 53 // Signals structs are designed to be aggregated from 0 or more sources. 54 // A default instance has neutral signals, and sources are merged into it. 55 // They can be dumped for debugging, and evaluate()d into a score. 56 57 /// Attributes of a symbol that affect how much we like it. 58 struct SymbolQualitySignals { 59 bool Deprecated = false; 60 bool ReservedName = false; // __foo, _Foo are usually implementation details. 61 // FIXME: make these findable once user types _. 62 bool ImplementationDetail = false; 63 unsigned References = 0; 64 65 enum SymbolCategory { 66 Unknown = 0, 67 Variable, 68 Macro, 69 Type, 70 Function, 71 Constructor, 72 Destructor, 73 Namespace, 74 Keyword, 75 Operator, 76 } Category = Unknown; 77 78 void merge(const CodeCompletionResult &SemaCCResult); 79 void merge(const Symbol &IndexResult); 80 81 // Condense these signals down to a single number, higher is better. 82 float evaluateHeuristics() const; 83 }; 84 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 85 const SymbolQualitySignals &); 86 87 /// Attributes of a symbol-query pair that affect how much we like it. 88 struct SymbolRelevanceSignals { 89 /// The name of the symbol (for ContextWords). Must be explicitly assigned. 90 llvm::StringRef Name; 91 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. 92 float NameMatch = 1; 93 /// Lowercase words relevant to the context (e.g. near the completion point). 94 llvm::StringSet<>* ContextWords = nullptr; 95 bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). 96 /// Whether fixits needs to be applied for that completion or not. 97 bool NeedsFixIts = false; 98 bool InBaseClass = false; // A member from base class of the accessed class. 99 100 URIDistance *FileProximityMatch = nullptr; 101 /// These are used to calculate proximity between the index symbol and the 102 /// query. 103 llvm::StringRef SymbolURI; 104 /// FIXME: unify with index proximity score - signals should be 105 /// source-independent. 106 /// Proximity between best declaration and the query. [0-1], 1 is closest. 107 float SemaFileProximityScore = 0; 108 109 // Scope proximity is only considered (both index and sema) when this is set. 110 ScopeDistance *ScopeProximityMatch = nullptr; 111 llvm::Optional<llvm::StringRef> SymbolScope; 112 // A symbol from sema should be accessible from the current scope. 113 bool SemaSaysInScope = false; 114 115 // An approximate measure of where we expect the symbol to be used. 116 enum AccessibleScope { 117 FunctionScope, 118 ClassScope, 119 FileScope, 120 GlobalScope, 121 } Scope = GlobalScope; 122 123 enum QueryType { 124 CodeComplete, 125 Generic, 126 } Query = Generic; 127 128 CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other; 129 130 // Whether symbol is an instance member of a class. 131 bool IsInstanceMember = false; 132 133 // Whether clang provided a preferred type in the completion context. 134 bool HadContextType = false; 135 // Whether a source completion item or a symbol had a type information. 136 bool HadSymbolType = false; 137 // Whether the item matches the type expected in the completion context. 138 bool TypeMatchesPreferred = false; 139 140 /// Length of the unqualified partial name of Symbol typed in 141 /// CompletionPrefix. 142 unsigned FilterLength = 0; 143 144 const ASTSignals *MainFileSignals = nullptr; 145 /// Number of references to the candidate in the main file. 146 unsigned MainFileRefs = 0; 147 /// Number of unique symbols in the main file which belongs to candidate's 148 /// namespace. This indicates how relevant the namespace is in the current 149 /// file. 150 unsigned ScopeRefsInFile = 0; 151 152 /// Set of derived signals computed by calculateDerivedSignals(). Must not be 153 /// set explicitly. 154 struct DerivedSignals { 155 /// Whether Name contains some word from context. 156 bool NameMatchesContext = false; 157 /// Min distance between SymbolURI and all the headers included by the TU. 158 unsigned FileProximityDistance = FileDistance::Unreachable; 159 /// Min distance between SymbolScope and all the available scopes. 160 unsigned ScopeProximityDistance = FileDistance::Unreachable; 161 }; 162 163 DerivedSignals calculateDerivedSignals() const; 164 165 void merge(const CodeCompletionResult &SemaResult); 166 void merge(const Symbol &IndexResult); 167 void computeASTSignals(const CodeCompletionResult &SemaResult); 168 169 // Condense these signals down to a single number, higher is better. 170 float evaluateHeuristics() const; 171 }; 172 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 173 const SymbolRelevanceSignals &); 174 175 /// Combine symbol quality and relevance into a single score. 176 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); 177 178 /// Same semantics as CodeComplete::Score. Quality score and Relevance score 179 /// have been removed since DecisionForest cannot assign individual scores to 180 /// Quality and Relevance signals. 181 struct DecisionForestScores { 182 float Total = 0.f; 183 float ExcludingName = 0.f; 184 }; 185 186 DecisionForestScores 187 evaluateDecisionForest(const SymbolQualitySignals &Quality, 188 const SymbolRelevanceSignals &Relevance, float Base); 189 190 /// TopN<T> is a lossy container that preserves only the "best" N elements. 191 template <typename T, typename Compare = std::greater<T>> class TopN { 192 public: 193 using value_type = T; 194 TopN(size_t N, Compare Greater = Compare()) N(N)195 : N(N), Greater(std::move(Greater)) {} 196 197 // Adds a candidate to the set. 198 // Returns true if a candidate was dropped to get back under N. push(value_type && V)199 bool push(value_type &&V) { 200 bool Dropped = false; 201 if (Heap.size() >= N) { 202 Dropped = true; 203 if (N > 0 && Greater(V, Heap.front())) { 204 std::pop_heap(Heap.begin(), Heap.end(), Greater); 205 Heap.back() = std::move(V); 206 std::push_heap(Heap.begin(), Heap.end(), Greater); 207 } 208 } else { 209 Heap.push_back(std::move(V)); 210 std::push_heap(Heap.begin(), Heap.end(), Greater); 211 } 212 assert(Heap.size() <= N); 213 assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); 214 return Dropped; 215 } 216 217 // Returns candidates from best to worst. items()218 std::vector<value_type> items() && { 219 std::sort_heap(Heap.begin(), Heap.end(), Greater); 220 assert(Heap.size() <= N); 221 return std::move(Heap); 222 } 223 224 private: 225 const size_t N; 226 std::vector<value_type> Heap; // Min-heap, comparator is Greater. 227 Compare Greater; 228 }; 229 230 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for 231 /// LSP. (The highest score compares smallest so it sorts at the top). 232 std::string sortText(float Score, llvm::StringRef Tiebreak = ""); 233 234 struct SignatureQualitySignals { 235 uint32_t NumberOfParameters = 0; 236 uint32_t NumberOfOptionalParameters = 0; 237 CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind = 238 CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function; 239 }; 240 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 241 const SignatureQualitySignals &); 242 243 } // namespace clangd 244 } // namespace clang 245 246 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 247