1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// Some operations such as code completion produce a set of candidates. 10 /// Usually the user can choose between them, but we should put the best options 11 /// at the top (they're easier to select, and more likely to be seen). 12 /// 13 /// This file defines building blocks for ranking candidates. 14 /// It's used by the features directly and also in the implementation of 15 /// indexes, as indexes also need to heuristically limit their results. 16 /// 17 /// The facilities here are: 18 /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString 19 /// These are structured in a way that they can be debugged, and are fairly 20 /// consistent regardless of the source. 21 /// - compute scores from scoring signals. These are suitable for sorting. 22 /// - sorting utilities like the TopN container. 23 /// These could be split up further to isolate dependencies if we care. 24 /// 25 //===----------------------------------------------------------------------===// 26 27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 29 30 #include "ExpectedTypes.h" 31 #include "FileDistance.h" 32 #include "clang/Sema/CodeCompleteConsumer.h" 33 #include "llvm/ADT/ArrayRef.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/ADT/StringSet.h" 36 #include <algorithm> 37 #include <functional> 38 #include <vector> 39 40 namespace llvm { 41 class raw_ostream; 42 } // namespace llvm 43 44 namespace clang { 45 class CodeCompletionResult; 46 47 namespace clangd { 48 49 struct Symbol; 50 class URIDistance; 51 52 // Signals structs are designed to be aggregated from 0 or more sources. 53 // A default instance has neutral signals, and sources are merged into it. 54 // They can be dumped for debugging, and evaluate()d into a score. 55 56 /// Attributes of a symbol that affect how much we like it. 57 struct SymbolQualitySignals { 58 bool Deprecated = false; 59 bool ReservedName = false; // __foo, _Foo are usually implementation details. 60 // FIXME: make these findable once user types _. 61 bool ImplementationDetail = false; 62 unsigned References = 0; 63 64 enum SymbolCategory { 65 Unknown = 0, 66 Variable, 67 Macro, 68 Type, 69 Function, 70 Constructor, 71 Destructor, 72 Namespace, 73 Keyword, 74 Operator, 75 } Category = Unknown; 76 77 void merge(const CodeCompletionResult &SemaCCResult); 78 void merge(const Symbol &IndexResult); 79 80 // Condense these signals down to a single number, higher is better. 81 float evaluate() const; 82 }; 83 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 84 const SymbolQualitySignals &); 85 86 /// Attributes of a symbol-query pair that affect how much we like it. 87 struct SymbolRelevanceSignals { 88 /// The name of the symbol (for ContextWords). Must be explicitly assigned. 89 llvm::StringRef Name; 90 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. 91 float NameMatch = 1; 92 /// Lowercase words relevant to the context (e.g. near the completion point). 93 llvm::StringSet<>* ContextWords = nullptr; 94 bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). 95 /// Whether fixits needs to be applied for that completion or not. 96 bool NeedsFixIts = false; 97 bool InBaseClass = false; // A member from base class of the accessed class. 98 99 URIDistance *FileProximityMatch = nullptr; 100 /// These are used to calculate proximity between the index symbol and the 101 /// query. 102 llvm::StringRef SymbolURI; 103 /// FIXME: unify with index proximity score - signals should be 104 /// source-independent. 105 /// Proximity between best declaration and the query. [0-1], 1 is closest. 106 float SemaFileProximityScore = 0; 107 108 // Scope proximity is only considered (both index and sema) when this is set. 109 ScopeDistance *ScopeProximityMatch = nullptr; 110 llvm::Optional<llvm::StringRef> SymbolScope; 111 // A symbol from sema should be accessible from the current scope. 112 bool SemaSaysInScope = false; 113 114 // An approximate measure of where we expect the symbol to be used. 115 enum AccessibleScope { 116 FunctionScope, 117 ClassScope, 118 FileScope, 119 GlobalScope, 120 } Scope = GlobalScope; 121 122 enum QueryType { 123 CodeComplete, 124 Generic, 125 } Query = Generic; 126 127 CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other; 128 129 // Whether symbol is an instance member of a class. 130 bool IsInstanceMember = false; 131 132 // Whether clang provided a preferred type in the completion context. 133 bool HadContextType = false; 134 // Whether a source completion item or a symbol had a type information. 135 bool HadSymbolType = false; 136 // Whether the item matches the type expected in the completion context. 137 bool TypeMatchesPreferred = false; 138 139 void merge(const CodeCompletionResult &SemaResult); 140 void merge(const Symbol &IndexResult); 141 142 // Condense these signals down to a single number, higher is better. 143 float evaluate() const; 144 }; 145 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 146 const SymbolRelevanceSignals &); 147 148 /// Combine symbol quality and relevance into a single score. 149 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); 150 151 /// TopN<T> is a lossy container that preserves only the "best" N elements. 152 template <typename T, typename Compare = std::greater<T>> class TopN { 153 public: 154 using value_type = T; 155 TopN(size_t N, Compare Greater = Compare()) N(N)156 : N(N), Greater(std::move(Greater)) {} 157 158 // Adds a candidate to the set. 159 // Returns true if a candidate was dropped to get back under N. push(value_type && V)160 bool push(value_type &&V) { 161 bool Dropped = false; 162 if (Heap.size() >= N) { 163 Dropped = true; 164 if (N > 0 && Greater(V, Heap.front())) { 165 std::pop_heap(Heap.begin(), Heap.end(), Greater); 166 Heap.back() = std::move(V); 167 std::push_heap(Heap.begin(), Heap.end(), Greater); 168 } 169 } else { 170 Heap.push_back(std::move(V)); 171 std::push_heap(Heap.begin(), Heap.end(), Greater); 172 } 173 assert(Heap.size() <= N); 174 assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); 175 return Dropped; 176 } 177 178 // Returns candidates from best to worst. items()179 std::vector<value_type> items() && { 180 std::sort_heap(Heap.begin(), Heap.end(), Greater); 181 assert(Heap.size() <= N); 182 return std::move(Heap); 183 } 184 185 private: 186 const size_t N; 187 std::vector<value_type> Heap; // Min-heap, comparator is Greater. 188 Compare Greater; 189 }; 190 191 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for 192 /// LSP. (The highest score compares smallest so it sorts at the top). 193 std::string sortText(float Score, llvm::StringRef Tiebreak = ""); 194 195 struct SignatureQualitySignals { 196 uint32_t NumberOfParameters = 0; 197 uint32_t NumberOfOptionalParameters = 0; 198 CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind = 199 CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function; 200 }; 201 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 202 const SignatureQualitySignals &); 203 204 } // namespace clangd 205 } // namespace clang 206 207 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 208