1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// Some operations such as code completion produce a set of candidates.
10 /// Usually the user can choose between them, but we should put the best options
11 /// at the top (they're easier to select, and more likely to be seen).
12 ///
13 /// This file defines building blocks for ranking candidates.
14 /// It's used by the features directly and also in the implementation of
15 /// indexes, as indexes also need to heuristically limit their results.
16 ///
17 /// The facilities here are:
18 ///   - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
19 ///     These are structured in a way that they can be debugged, and are fairly
20 ///     consistent regardless of the source.
21 ///   - compute scores from scoring signals. These are suitable for sorting.
22 ///   - sorting utilities like the TopN container.
23 /// These could be split up further to isolate dependencies if we care.
24 ///
25 //===----------------------------------------------------------------------===//
26 
27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
29 
30 #include "ExpectedTypes.h"
31 #include "FileDistance.h"
32 #include "TUScheduler.h"
33 #include "clang/Sema/CodeCompleteConsumer.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSet.h"
37 #include <algorithm>
38 #include <functional>
39 #include <vector>
40 
41 namespace llvm {
42 class raw_ostream;
43 } // namespace llvm
44 
45 namespace clang {
46 class CodeCompletionResult;
47 
48 namespace clangd {
49 
50 struct Symbol;
51 class URIDistance;
52 
53 // Signals structs are designed to be aggregated from 0 or more sources.
54 // A default instance has neutral signals, and sources are merged into it.
55 // They can be dumped for debugging, and evaluate()d into a score.
56 
57 /// Attributes of a symbol that affect how much we like it.
58 struct SymbolQualitySignals {
59   bool Deprecated = false;
60   bool ReservedName = false; // __foo, _Foo are usually implementation details.
61                              // FIXME: make these findable once user types _.
62   bool ImplementationDetail = false;
63   unsigned References = 0;
64 
65   enum SymbolCategory {
66     Unknown = 0,
67     Variable,
68     Macro,
69     Type,
70     Function,
71     Constructor,
72     Destructor,
73     Namespace,
74     Keyword,
75     Operator,
76   } Category = Unknown;
77 
78   void merge(const CodeCompletionResult &SemaCCResult);
79   void merge(const Symbol &IndexResult);
80 
81   // Condense these signals down to a single number, higher is better.
82   float evaluateHeuristics() const;
83 };
84 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
85                               const SymbolQualitySignals &);
86 
87 /// Attributes of a symbol-query pair that affect how much we like it.
88 struct SymbolRelevanceSignals {
89   /// The name of the symbol (for ContextWords). Must be explicitly assigned.
90   llvm::StringRef Name;
91   /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
92   float NameMatch = 1;
93   /// Lowercase words relevant to the context (e.g. near the completion point).
94   llvm::StringSet<>* ContextWords = nullptr;
95   bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
96   /// Whether fixits needs to be applied for that completion or not.
97   bool NeedsFixIts = false;
98   bool InBaseClass = false; // A member from base class of the accessed class.
99 
100   URIDistance *FileProximityMatch = nullptr;
101   /// These are used to calculate proximity between the index symbol and the
102   /// query.
103   llvm::StringRef SymbolURI;
104   /// FIXME: unify with index proximity score - signals should be
105   /// source-independent.
106   /// Proximity between best declaration and the query. [0-1], 1 is closest.
107   float SemaFileProximityScore = 0;
108 
109   // Scope proximity is only considered (both index and sema) when this is set.
110   ScopeDistance *ScopeProximityMatch = nullptr;
111   llvm::Optional<llvm::StringRef> SymbolScope;
112   // A symbol from sema should be accessible from the current scope.
113   bool SemaSaysInScope = false;
114 
115   // An approximate measure of where we expect the symbol to be used.
116   enum AccessibleScope {
117     FunctionScope,
118     ClassScope,
119     FileScope,
120     GlobalScope,
121   } Scope = GlobalScope;
122 
123   enum QueryType {
124     CodeComplete,
125     Generic,
126   } Query = Generic;
127 
128   CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;
129 
130   // Whether symbol is an instance member of a class.
131   bool IsInstanceMember = false;
132 
133   // Whether clang provided a preferred type in the completion context.
134   bool HadContextType = false;
135   // Whether a source completion item or a symbol had a type information.
136   bool HadSymbolType = false;
137   // Whether the item matches the type expected in the completion context.
138   bool TypeMatchesPreferred = false;
139 
140   /// Length of the unqualified partial name of Symbol typed in
141   /// CompletionPrefix.
142   unsigned FilterLength = 0;
143 
144   const ASTSignals *MainFileSignals = nullptr;
145   /// Number of references to the candidate in the main file.
146   unsigned MainFileRefs = 0;
147   /// Number of unique symbols in the main file which belongs to candidate's
148   /// namespace. This indicates how relevant the namespace is in the current
149   /// file.
150   unsigned ScopeRefsInFile = 0;
151 
152   /// Set of derived signals computed by calculateDerivedSignals(). Must not be
153   /// set explicitly.
154   struct DerivedSignals {
155     /// Whether Name contains some word from context.
156     bool NameMatchesContext = false;
157     /// Min distance between SymbolURI and all the headers included by the TU.
158     unsigned FileProximityDistance = FileDistance::Unreachable;
159     /// Min distance between SymbolScope and all the available scopes.
160     unsigned ScopeProximityDistance = FileDistance::Unreachable;
161   };
162 
163   DerivedSignals calculateDerivedSignals() const;
164 
165   void merge(const CodeCompletionResult &SemaResult);
166   void merge(const Symbol &IndexResult);
167   void computeASTSignals(const CodeCompletionResult &SemaResult);
168 
169   // Condense these signals down to a single number, higher is better.
170   float evaluateHeuristics() const;
171 };
172 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
173                               const SymbolRelevanceSignals &);
174 
175 /// Combine symbol quality and relevance into a single score.
176 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
177 
178 /// Same semantics as CodeComplete::Score. Quality score and Relevance score
179 /// have been removed since DecisionForest cannot assign individual scores to
180 /// Quality and Relevance signals.
181 struct DecisionForestScores {
182   float Total = 0.f;
183   float ExcludingName = 0.f;
184 };
185 
186 DecisionForestScores
187 evaluateDecisionForest(const SymbolQualitySignals &Quality,
188                        const SymbolRelevanceSignals &Relevance, float Base);
189 
190 /// TopN<T> is a lossy container that preserves only the "best" N elements.
191 template <typename T, typename Compare = std::greater<T>> class TopN {
192 public:
193   using value_type = T;
194   TopN(size_t N, Compare Greater = Compare())
N(N)195       : N(N), Greater(std::move(Greater)) {}
196 
197   // Adds a candidate to the set.
198   // Returns true if a candidate was dropped to get back under N.
push(value_type && V)199   bool push(value_type &&V) {
200     bool Dropped = false;
201     if (Heap.size() >= N) {
202       Dropped = true;
203       if (N > 0 && Greater(V, Heap.front())) {
204         std::pop_heap(Heap.begin(), Heap.end(), Greater);
205         Heap.back() = std::move(V);
206         std::push_heap(Heap.begin(), Heap.end(), Greater);
207       }
208     } else {
209       Heap.push_back(std::move(V));
210       std::push_heap(Heap.begin(), Heap.end(), Greater);
211     }
212     assert(Heap.size() <= N);
213     assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
214     return Dropped;
215   }
216 
217   // Returns candidates from best to worst.
items()218   std::vector<value_type> items() && {
219     std::sort_heap(Heap.begin(), Heap.end(), Greater);
220     assert(Heap.size() <= N);
221     return std::move(Heap);
222   }
223 
224 private:
225   const size_t N;
226   std::vector<value_type> Heap; // Min-heap, comparator is Greater.
227   Compare Greater;
228 };
229 
230 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for
231 /// LSP. (The highest score compares smallest so it sorts at the top).
232 std::string sortText(float Score, llvm::StringRef Tiebreak = "");
233 
234 struct SignatureQualitySignals {
235   uint32_t NumberOfParameters = 0;
236   uint32_t NumberOfOptionalParameters = 0;
237   CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind =
238       CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function;
239 };
240 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
241                               const SignatureQualitySignals &);
242 
243 } // namespace clangd
244 } // namespace clang
245 
246 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
247