1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Iterator.h"
10 #include "llvm/Support/Casting.h"
11 #include <algorithm>
12 #include <cassert>
13 #include <numeric>
14 
15 namespace clang {
16 namespace clangd {
17 namespace dex {
18 namespace {
19 
20 /// Implements Iterator over the intersection of other iterators.
21 ///
22 /// AndIterator iterates through common items among all children. It becomes
23 /// exhausted as soon as any child becomes exhausted. After each mutation, the
24 /// iterator restores the invariant: all children must point to the same item.
25 class AndIterator : public Iterator {
26 public:
AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)27   explicit AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
28       : Iterator(Kind::And), Children(std::move(AllChildren)) {
29     assert(!Children.empty() && "AND iterator should have at least one child.");
30     // Establish invariants.
31     for (const auto &Child : Children)
32       ReachedEnd |= Child->reachedEnd();
33     sync();
34     // When children are sorted by the estimateSize(), sync() calls are more
35     // effective. Each sync() starts with the first child and makes sure all
36     // children point to the same element. If any child is "above" the previous
37     // ones, the algorithm resets and and advances the children to the next
38     // highest element starting from the front. When child iterators in the
39     // beginning have smaller estimated size, the sync() will have less restarts
40     // and become more effective.
41     llvm::sort(Children, [](const std::unique_ptr<Iterator> &LHS,
42                             const std::unique_ptr<Iterator> &RHS) {
43       return LHS->estimateSize() < RHS->estimateSize();
44     });
45   }
46 
reachedEnd() const47   bool reachedEnd() const override { return ReachedEnd; }
48 
49   /// Advances all children to the next common item.
advance()50   void advance() override {
51     assert(!reachedEnd() && "AND iterator can't advance() at the end.");
52     Children.front()->advance();
53     sync();
54   }
55 
56   /// Advances all children to the next common item with DocumentID >= ID.
advanceTo(DocID ID)57   void advanceTo(DocID ID) override {
58     assert(!reachedEnd() && "AND iterator can't advanceTo() at the end.");
59     Children.front()->advanceTo(ID);
60     sync();
61   }
62 
peek() const63   DocID peek() const override { return Children.front()->peek(); }
64 
consume()65   float consume() override {
66     assert(!reachedEnd() && "AND iterator can't consume() at the end.");
67     float Boost = 1;
68     for (const auto &Child : Children)
69       Boost *= Child->consume();
70     return Boost;
71   }
72 
estimateSize() const73   size_t estimateSize() const override {
74     return Children.front()->estimateSize();
75   }
76 
77 private:
dump(llvm::raw_ostream & OS) const78   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
79     OS << "(& ";
80     auto Separator = "";
81     for (const auto &Child : Children) {
82       OS << Separator << *Child;
83       Separator = " ";
84     }
85     OS << ')';
86     return OS;
87   }
88 
89   /// Restores class invariants: each child will point to the same element after
90   /// sync.
sync()91   void sync() {
92     ReachedEnd |= Children.front()->reachedEnd();
93     if (ReachedEnd)
94       return;
95     auto SyncID = Children.front()->peek();
96     // Indicates whether any child needs to be advanced to new SyncID.
97     bool NeedsAdvance = false;
98     do {
99       NeedsAdvance = false;
100       for (auto &Child : Children) {
101         Child->advanceTo(SyncID);
102         ReachedEnd |= Child->reachedEnd();
103         // If any child reaches end And iterator can not match any other items.
104         // In this case, just terminate the process.
105         if (ReachedEnd)
106           return;
107         // Cache the result so that peek() is not called again as it may be
108         // quite expensive in AND with large subtrees.
109         auto Candidate = Child->peek();
110         // If any child goes beyond given ID (i.e. ID is not the common item),
111         // all children should be advanced to the next common item.
112         if (Candidate > SyncID) {
113           SyncID = Candidate;
114           NeedsAdvance = true;
115           // Reset and try to sync again. Sync starts with the first child as
116           // this is the cheapest (smallest size estimate). This way advanceTo
117           // is called on the large posting lists once the sync point is very
118           // likely.
119           break;
120         }
121       }
122     } while (NeedsAdvance);
123   }
124 
125   /// AndIterator owns its children and ensures that all of them point to the
126   /// same element. As soon as one child gets exhausted, AndIterator can no
127   /// longer advance and has reached its end.
128   std::vector<std::unique_ptr<Iterator>> Children;
129   /// Indicates whether any child is exhausted. It is cheaper to maintain and
130   /// update the field, rather than traversing the whole subtree in each
131   /// reachedEnd() call.
132   bool ReachedEnd = false;
133   friend Corpus; // For optimizations.
134 };
135 
136 /// Implements Iterator over the union of other iterators.
137 ///
138 /// OrIterator iterates through all items which can be pointed to by at least
139 /// one child. To preserve the sorted order, this iterator always advances the
140 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
141 /// soon as all of its children are exhausted.
142 class OrIterator : public Iterator {
143 public:
OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)144   explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
145       : Iterator(Kind::Or), Children(std::move(AllChildren)) {
146     assert(!Children.empty() && "OR iterator should have at least one child.");
147   }
148 
149   /// Returns true if all children are exhausted.
reachedEnd() const150   bool reachedEnd() const override {
151     for (const auto &Child : Children)
152       if (!Child->reachedEnd())
153         return false;
154     return true;
155   }
156 
157   /// Moves each child pointing to the smallest DocID to the next item.
advance()158   void advance() override {
159     assert(!reachedEnd() && "OR iterator can't advance() at the end.");
160     const auto SmallestID = peek();
161     for (const auto &Child : Children)
162       if (!Child->reachedEnd() && Child->peek() == SmallestID)
163         Child->advance();
164   }
165 
166   /// Advances each child to the next existing element with DocumentID >= ID.
advanceTo(DocID ID)167   void advanceTo(DocID ID) override {
168     assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
169     for (const auto &Child : Children)
170       if (!Child->reachedEnd())
171         Child->advanceTo(ID);
172   }
173 
174   /// Returns the element under cursor of the child with smallest Child->peek()
175   /// value.
peek() const176   DocID peek() const override {
177     assert(!reachedEnd() && "OR iterator can't peek() at the end.");
178     DocID Result = std::numeric_limits<DocID>::max();
179 
180     for (const auto &Child : Children)
181       if (!Child->reachedEnd())
182         Result = std::min(Result, Child->peek());
183 
184     return Result;
185   }
186 
187   // Returns the maximum boosting score among all Children when iterator
188   // points to the current ID.
consume()189   float consume() override {
190     assert(!reachedEnd() && "OR iterator can't consume() at the end.");
191     const DocID ID = peek();
192     float Boost = 1;
193     for (const auto &Child : Children)
194       if (!Child->reachedEnd() && Child->peek() == ID)
195         Boost = std::max(Boost, Child->consume());
196     return Boost;
197   }
198 
estimateSize() const199   size_t estimateSize() const override {
200     size_t Size = 0;
201     for (const auto &Child : Children)
202       Size = std::max(Size, Child->estimateSize());
203     return Size;
204   }
205 
206 private:
dump(llvm::raw_ostream & OS) const207   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
208     OS << "(| ";
209     auto Separator = "";
210     for (const auto &Child : Children) {
211       OS << Separator << *Child;
212       Separator = " ";
213     }
214     OS << ')';
215     return OS;
216   }
217 
218   std::vector<std::unique_ptr<Iterator>> Children;
219   friend Corpus; // For optimizations.
220 };
221 
222 /// TrueIterator handles PostingLists which contain all items of the index. It
223 /// stores size of the virtual posting list, and all operations are performed
224 /// in O(1).
225 class TrueIterator : public Iterator {
226 public:
TrueIterator(DocID Size)227   explicit TrueIterator(DocID Size) : Iterator(Kind::True), Size(Size) {}
228 
reachedEnd() const229   bool reachedEnd() const override { return Index >= Size; }
230 
advance()231   void advance() override {
232     assert(!reachedEnd() && "TRUE iterator can't advance() at the end.");
233     ++Index;
234   }
235 
advanceTo(DocID ID)236   void advanceTo(DocID ID) override {
237     assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end.");
238     Index = std::min(ID, Size);
239   }
240 
peek() const241   DocID peek() const override {
242     assert(!reachedEnd() && "TRUE iterator can't peek() at the end.");
243     return Index;
244   }
245 
consume()246   float consume() override {
247     assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
248     return 1;
249   }
250 
estimateSize() const251   size_t estimateSize() const override { return Size; }
252 
253 private:
dump(llvm::raw_ostream & OS) const254   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
255     return OS << "true";
256   }
257 
258   DocID Index = 0;
259   /// Size of the underlying virtual PostingList.
260   DocID Size;
261 };
262 
263 /// FalseIterator yields no results.
264 class FalseIterator : public Iterator {
265 public:
FalseIterator()266   FalseIterator() : Iterator(Kind::False) {}
reachedEnd() const267   bool reachedEnd() const override { return true; }
advance()268   void advance() override { assert(false); }
advanceTo(DocID ID)269   void advanceTo(DocID ID) override { assert(false); }
peek() const270   DocID peek() const override {
271     assert(false);
272     return 0;
273   }
consume()274   float consume() override {
275     assert(false);
276     return 1;
277   }
estimateSize() const278   size_t estimateSize() const override { return 0; }
279 
280 private:
dump(llvm::raw_ostream & OS) const281   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
282     return OS << "false";
283   }
284 };
285 
286 /// Boost iterator is a wrapper around its child which multiplies scores of
287 /// each retrieved item by a given factor.
288 class BoostIterator : public Iterator {
289 public:
BoostIterator(std::unique_ptr<Iterator> Child,float Factor)290   BoostIterator(std::unique_ptr<Iterator> Child, float Factor)
291       : Child(std::move(Child)), Factor(Factor) {}
292 
reachedEnd() const293   bool reachedEnd() const override { return Child->reachedEnd(); }
294 
advance()295   void advance() override { Child->advance(); }
296 
advanceTo(DocID ID)297   void advanceTo(DocID ID) override { Child->advanceTo(ID); }
298 
peek() const299   DocID peek() const override { return Child->peek(); }
300 
consume()301   float consume() override { return Child->consume() * Factor; }
302 
estimateSize() const303   size_t estimateSize() const override { return Child->estimateSize(); }
304 
305 private:
dump(llvm::raw_ostream & OS) const306   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
307     return OS << "(* " << Factor << ' ' << *Child << ')';
308   }
309 
310   std::unique_ptr<Iterator> Child;
311   float Factor;
312 };
313 
314 /// This iterator limits the number of items retrieved from the child iterator
315 /// on top of the query tree. To ensure that query tree with LIMIT iterators
316 /// inside works correctly, users have to call Root->consume(Root->peek()) each
317 /// time item is retrieved at the root of query tree.
318 class LimitIterator : public Iterator {
319 public:
LimitIterator(std::unique_ptr<Iterator> Child,size_t Limit)320   LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
321       : Child(std::move(Child)), Limit(Limit), ItemsLeft(Limit) {}
322 
reachedEnd() const323   bool reachedEnd() const override {
324     return ItemsLeft == 0 || Child->reachedEnd();
325   }
326 
advance()327   void advance() override { Child->advance(); }
328 
advanceTo(DocID ID)329   void advanceTo(DocID ID) override { Child->advanceTo(ID); }
330 
peek() const331   DocID peek() const override { return Child->peek(); }
332 
333   /// Decreases the limit in case the element consumed at top of the query tree
334   /// comes from the underlying iterator.
consume()335   float consume() override {
336     assert(!reachedEnd() && "LimitIterator can't consume() at the end.");
337     --ItemsLeft;
338     return Child->consume();
339   }
340 
estimateSize() const341   size_t estimateSize() const override {
342     return std::min(Child->estimateSize(), Limit);
343   }
344 
345 private:
dump(llvm::raw_ostream & OS) const346   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
347     return OS << "(LIMIT " << Limit << " " << *Child << ')';
348   }
349 
350   std::unique_ptr<Iterator> Child;
351   size_t Limit;
352   size_t ItemsLeft;
353 };
354 
355 } // end namespace
356 
consume(Iterator & It)357 std::vector<std::pair<DocID, float>> consume(Iterator &It) {
358   std::vector<std::pair<DocID, float>> Result;
359   for (; !It.reachedEnd(); It.advance())
360     Result.emplace_back(It.peek(), It.consume());
361   return Result;
362 }
363 
364 std::unique_ptr<Iterator>
intersect(std::vector<std::unique_ptr<Iterator>> Children) const365 Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const {
366   std::vector<std::unique_ptr<Iterator>> RealChildren;
367   for (auto &Child : Children) {
368     switch (Child->kind()) {
369     case Iterator::Kind::True:
370       break; // No effect, drop the iterator.
371     case Iterator::Kind::False:
372       return std::move(Child); // Intersection is empty.
373     case Iterator::Kind::And: {
374       // Inline nested AND into parent AND.
375       auto &NewChildren = static_cast<AndIterator *>(Child.get())->Children;
376       std::move(NewChildren.begin(), NewChildren.end(),
377                 std::back_inserter(RealChildren));
378       break;
379     }
380     default:
381       RealChildren.push_back(std::move(Child));
382     }
383   }
384   switch (RealChildren.size()) {
385   case 0:
386     return all();
387   case 1:
388     return std::move(RealChildren.front());
389   default:
390     return std::make_unique<AndIterator>(std::move(RealChildren));
391   }
392 }
393 
394 std::unique_ptr<Iterator>
unionOf(std::vector<std::unique_ptr<Iterator>> Children) const395 Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const {
396   std::vector<std::unique_ptr<Iterator>> RealChildren;
397   for (auto &Child : Children) {
398     switch (Child->kind()) {
399     case Iterator::Kind::False:
400       break; // No effect, drop the iterator.
401     case Iterator::Kind::Or: {
402       // Inline nested OR into parent OR.
403       auto &NewChildren = static_cast<OrIterator *>(Child.get())->Children;
404       std::move(NewChildren.begin(), NewChildren.end(),
405                 std::back_inserter(RealChildren));
406       break;
407     }
408     case Iterator::Kind::True:
409       // Don't return all(), which would discard sibling boosts.
410     default:
411       RealChildren.push_back(std::move(Child));
412     }
413   }
414   switch (RealChildren.size()) {
415   case 0:
416     return none();
417   case 1:
418     return std::move(RealChildren.front());
419   default:
420     return std::make_unique<OrIterator>(std::move(RealChildren));
421   }
422 }
423 
all() const424 std::unique_ptr<Iterator> Corpus::all() const {
425   return std::make_unique<TrueIterator>(Size);
426 }
427 
none() const428 std::unique_ptr<Iterator> Corpus::none() const {
429   return std::make_unique<FalseIterator>();
430 }
431 
boost(std::unique_ptr<Iterator> Child,float Factor) const432 std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child,
433                                         float Factor) const {
434   if (Factor == 1)
435     return Child;
436   if (Child->kind() == Iterator::Kind::False)
437     return Child;
438   return std::make_unique<BoostIterator>(std::move(Child), Factor);
439 }
440 
limit(std::unique_ptr<Iterator> Child,size_t Limit) const441 std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child,
442                                         size_t Limit) const {
443   if (Child->kind() == Iterator::Kind::False)
444     return Child;
445   return std::make_unique<LimitIterator>(std::move(Child), Limit);
446 }
447 
448 } // namespace dex
449 } // namespace clangd
450 } // namespace clang
451