1 //===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 ///
10 /// Some operations such as code completion produce a set of candidates.
11 /// Usually the user can choose between them, but we should put the best options
12 /// at the top (they're easier to select, and more likely to be seen).
13 ///
14 /// This file defines building blocks for ranking candidates.
15 /// It's used by the features directly and also in the implementation of
16 /// indexes, as indexes also need to heuristically limit their results.
17 ///
18 /// The facilities here are:
19 ///   - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
20 ///     These are structured in a way that they can be debugged, and are fairly
21 ///     consistent regardless of the source.
22 ///   - compute scores from scoring signals. These are suitable for sorting.
23 ///   - sorting utilities like the TopN container.
24 /// These could be split up further to isolate dependencies if we care.
25 ///
26 //===---------------------------------------------------------------------===//
27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
29 #include "clang/Sema/CodeCompleteConsumer.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/StringRef.h"
32 #include <algorithm>
33 #include <functional>
34 #include <vector>
35 namespace llvm {
36 class raw_ostream;
37 }
38 namespace clang {
39 class CodeCompletionResult;
40 namespace clangd {
41 struct Symbol;
42 class URIDistance;
43 
44 // Signals structs are designed to be aggregated from 0 or more sources.
45 // A default instance has neutral signals, and sources are merged into it.
46 // They can be dumped for debugging, and evaluate()d into a score.
47 
48 /// Attributes of a symbol that affect how much we like it.
49 struct SymbolQualitySignals {
50   bool Deprecated = false;
51   bool ReservedName = false; // __foo, _Foo are usually implementation details.
52                              // FIXME: make these findable once user types _.
53   unsigned References = 0;
54 
55   enum SymbolCategory {
56     Unknown = 0,
57     Variable,
58     Macro,
59     Type,
60     Function,
61     Constructor,
62     Namespace,
63     Keyword,
64   } Category = Unknown;
65 
66   void merge(const CodeCompletionResult &SemaCCResult);
67   void merge(const Symbol &IndexResult);
68 
69   // Condense these signals down to a single number, higher is better.
70   float evaluate() const;
71 };
72 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
73                               const SymbolQualitySignals &);
74 
75 /// Attributes of a symbol-query pair that affect how much we like it.
76 struct SymbolRelevanceSignals {
77   /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
78   float NameMatch = 1;
79   bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
80 
81   URIDistance *FileProximityMatch = nullptr;
82   /// This is used to calculate proximity between the index symbol and the
83   /// query.
84   llvm::StringRef SymbolURI;
85   /// Proximity between best declaration and the query. [0-1], 1 is closest.
86   /// FIXME: unify with index proximity score - signals should be
87   /// source-independent.
88   float SemaProximityScore = 0;
89 
90   // An approximate measure of where we expect the symbol to be used.
91   enum AccessibleScope {
92     FunctionScope,
93     ClassScope,
94     FileScope,
95     GlobalScope,
96   } Scope = GlobalScope;
97 
98   enum QueryType {
99     CodeComplete,
100     Generic,
101   } Query = Generic;
102 
103   CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;
104 
105   // Whether symbol is an instance member of a class.
106   bool IsInstanceMember = false;
107 
108   void merge(const CodeCompletionResult &SemaResult);
109   void merge(const Symbol &IndexResult);
110 
111   // Condense these signals down to a single number, higher is better.
112   float evaluate() const;
113 };
114 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
115                               const SymbolRelevanceSignals &);
116 
117 /// Combine symbol quality and relevance into a single score.
118 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
119 
120 /// TopN<T> is a lossy container that preserves only the "best" N elements.
121 template <typename T, typename Compare = std::greater<T>> class TopN {
122 public:
123   using value_type = T;
124   TopN(size_t N, Compare Greater = Compare())
N(N)125       : N(N), Greater(std::move(Greater)) {}
126 
127   // Adds a candidate to the set.
128   // Returns true if a candidate was dropped to get back under N.
push(value_type && V)129   bool push(value_type &&V) {
130     bool Dropped = false;
131     if (Heap.size() >= N) {
132       Dropped = true;
133       if (N > 0 && Greater(V, Heap.front())) {
134         std::pop_heap(Heap.begin(), Heap.end(), Greater);
135         Heap.back() = std::move(V);
136         std::push_heap(Heap.begin(), Heap.end(), Greater);
137       }
138     } else {
139       Heap.push_back(std::move(V));
140       std::push_heap(Heap.begin(), Heap.end(), Greater);
141     }
142     assert(Heap.size() <= N);
143     assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
144     return Dropped;
145   }
146 
147   // Returns candidates from best to worst.
items()148   std::vector<value_type> items() && {
149     std::sort_heap(Heap.begin(), Heap.end(), Greater);
150     assert(Heap.size() <= N);
151     return std::move(Heap);
152   }
153 
154 private:
155   const size_t N;
156   std::vector<value_type> Heap; // Min-heap, comparator is Greater.
157   Compare Greater;
158 };
159 
160 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for
161 /// LSP. (The highest score compares smallest so it sorts at the top).
162 std::string sortText(float Score, llvm::StringRef Tiebreak = "");
163 
164 } // namespace clangd
165 } // namespace clang
166 
167 #endif
168