1 #include "forward_index.h"
2 #include "index.h"
3 #include "varint.h"
4 #include "spec.h"
5 #include <math.h>
6 #include <limits.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <sys/param.h>
10 #include "rmalloc.h"
11 #include "rmutil/rm_assert.h"
12 
13 static int UI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit);
14 static inline int UI_ReadUnsorted(void *ctx, RSIndexResult **hit);
15 static int UI_ReadSorted(void *ctx, RSIndexResult **hit);
16 static size_t UI_NumEstimated(void *ctx);
17 static IndexCriteriaTester *UI_GetCriteriaTester(void *ctx);
18 static size_t UI_Len(void *ctx);
19 
20 static int II_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit);
21 static int II_ReadUnsorted(void *ctx, RSIndexResult **hit);
22 static IndexCriteriaTester *II_GetCriteriaTester(void *ctx);
23 static int II_ReadSorted(void *ctx, RSIndexResult **hit);
24 static size_t II_NumEstimated(void *ctx);
25 static size_t II_Len(void *ctx);
26 static t_docId II_LastDocId(void *ctx);
27 
28 #define CURRENT_RECORD(ii) (ii)->base.current
29 
30 typedef struct {
31   IndexIterator base;
32   /**
33    * We maintain two iterator arrays. One is the original iterator list, and
34    * the other is the list of currently active iterators. When an iterator
35    * reaches EOF, it is set to NULL in the `its` list, but is still retained in
36    * the `origits` list, for the purpose of supporting things like Rewind() and
37    * Free()
38    */
39   IndexIterator **its;
40   IndexIterator **origits;
41   uint32_t num;
42   uint32_t norig;
43   uint32_t currIt;
44   t_docId minDocId;
45 
46   // If set to 1, we exit skips after the first hit found and not merge further results
47   int quickExit;
48   size_t nexpected;
49   double weight;
50   uint64_t len;
51 } UnionIterator;
52 
UI_LastDocId(void * ctx)53 static inline t_docId UI_LastDocId(void *ctx) {
54   return ((UnionIterator *)ctx)->minDocId;
55 }
56 
UI_SyncIterList(UnionIterator * ui)57 static void UI_SyncIterList(UnionIterator *ui) {
58   ui->num = ui->norig;
59   memcpy(ui->its, ui->origits, sizeof(*ui->its) * ui->norig);
60   for (size_t ii = 0; ii < ui->num; ++ii) {
61     ui->its[ii]->minId = 0;
62   }
63 }
64 
65 /**
66  * Removes the exhausted iterator from the active list, so that future
67  * reads will no longer iterate over it
68  */
UI_RemoveExhausted(UnionIterator * it,size_t badix)69 static size_t UI_RemoveExhausted(UnionIterator *it, size_t badix) {
70   // e.g. assume we have 10 entries, and we want to remove index 8, which means
71   // one more valid entry at the end. This means we use
72   // source: its + 8 + 1
73   // destination: its + 8
74   // number: it->len (10) - (8) - 1 == 1
75   memmove(it->its + badix, it->its + badix + 1, sizeof(*it->its) * (it->num - badix - 1));
76   it->num--;
77   // Repeat the same index again, because we have a new iterator at the same
78   // position
79   return badix - 1;
80 }
81 
UI_Abort(void * ctx)82 static void UI_Abort(void *ctx) {
83   UnionIterator *it = ctx;
84   IITER_SET_EOF(&it->base);
85   for (int i = 0; i < it->num; i++) {
86     if (it->its[i]) {
87       it->its[i]->Abort(it->its[i]->ctx);
88     }
89   }
90 }
91 
UI_Rewind(void * ctx)92 static void UI_Rewind(void *ctx) {
93   UnionIterator *ui = ctx;
94   IITER_CLEAR_EOF(&ui->base);
95   ui->minDocId = 0;
96   CURRENT_RECORD(ui)->docId = 0;
97 
98   UI_SyncIterList(ui);
99 
100   // rewind all child iterators
101   for (size_t i = 0; i < ui->num; i++) {
102     ui->its[i]->minId = 0;
103     ui->its[i]->Rewind(ui->its[i]->ctx);
104   }
105 }
106 
NewUnionIterator(IndexIterator ** its,int num,DocTable * dt,int quickExit,double weight)107 IndexIterator *NewUnionIterator(IndexIterator **its, int num, DocTable *dt, int quickExit,
108                                 double weight) {
109   // create union context
110   UnionIterator *ctx = rm_calloc(1, sizeof(UnionIterator));
111   ctx->origits = its;
112   ctx->weight = weight;
113   ctx->num = num;
114   ctx->norig = num;
115   IITER_CLEAR_EOF(&ctx->base);
116   CURRENT_RECORD(ctx) = NewUnionResult(num, weight);
117   ctx->len = 0;
118   ctx->quickExit = quickExit;
119   ctx->its = rm_calloc(ctx->num, sizeof(*ctx->its));
120   ctx->nexpected = 0;
121   ctx->currIt = 0;
122 
123   // bind the union iterator calls
124   IndexIterator *it = &ctx->base;
125   it->mode = MODE_SORTED;
126   it->ctx = ctx;
127   it->GetCriteriaTester = UI_GetCriteriaTester;
128   it->NumEstimated = UI_NumEstimated;
129   it->LastDocId = UI_LastDocId;
130   it->Read = UI_ReadSorted;
131   it->SkipTo = UI_SkipTo;
132   it->HasNext = NULL;
133   it->Free = UnionIterator_Free;
134   it->Len = UI_Len;
135   it->Abort = UI_Abort;
136   it->Rewind = UI_Rewind;
137   UI_SyncIterList(ctx);
138 
139   for (size_t i = 0; i < num; ++i) {
140     ctx->nexpected += IITER_NUM_ESTIMATED(its[i]);
141     if (its[i]->mode == MODE_UNSORTED) {
142       it->mode = MODE_UNSORTED;
143       it->Read = UI_ReadUnsorted;
144     }
145   }
146 
147   const size_t maxresultsSorted = RSGlobalConfig.maxResultsToUnsortedMode;
148   // this code is normally (and should be) dead.
149   // i.e. the deepest-most IndexIterator does not have a CT
150   //      so it will always eventually return NULL CT
151   if (it->mode == MODE_SORTED && ctx->nexpected >= maxresultsSorted) {
152     // make sure all the children support CriteriaTester
153     int ctSupported = 1;
154     for (int i = 0; i < ctx->num; ++i) {
155       IndexCriteriaTester *tester = IITER_GET_CRITERIA_TESTER(ctx->origits[i]);
156       if (!tester) {
157         ctSupported = 0;
158         break;
159       }
160       tester->Free(tester);
161     }
162     if (ctSupported) {
163       it->mode = MODE_UNSORTED;
164       it->Read = UI_ReadUnsorted;
165     }
166   }
167 
168   return it;
169 }
170 
171 typedef struct {
172   IndexCriteriaTester base;
173   IndexCriteriaTester **children;
174   int nchildren;
175 } UnionCriteriaTester;
176 
UI_Test(struct IndexCriteriaTester * ct,t_docId id)177 static int UI_Test(struct IndexCriteriaTester *ct, t_docId id) {
178   UnionCriteriaTester *uct = (UnionCriteriaTester *)ct;
179   for (int i = 0; i < uct->nchildren; ++i) {
180     if (uct->children[i]->Test(uct->children[i], id)) {
181       return 1;
182     }
183   }
184   return 0;
185 }
186 
UI_TesterFree(struct IndexCriteriaTester * ct)187 static void UI_TesterFree(struct IndexCriteriaTester *ct) {
188   UnionCriteriaTester *uct = (UnionCriteriaTester *)ct;
189   for (int i = 0; i < uct->nchildren; ++i) {
190     if (uct->children[i]) {
191       uct->children[i]->Free(uct->children[i]);
192     }
193   }
194   rm_free(uct->children);
195   rm_free(uct);
196 }
197 
UI_GetCriteriaTester(void * ctx)198 static IndexCriteriaTester *UI_GetCriteriaTester(void *ctx) {
199   UnionIterator *ui = ctx;
200   IndexCriteriaTester **children = rm_malloc(ui->num * sizeof(IndexCriteriaTester *));
201   for (size_t i = 0; i < ui->num; ++i) {
202     children[i] = IITER_GET_CRITERIA_TESTER(ui->origits[i]);
203     if (!children[i]) {
204       for (size_t j = 0; j < i; j++) {
205         children[j]->Free(children[j]);
206       }
207       rm_free(children);
208       return NULL;
209     }
210   }
211   UnionCriteriaTester *ct = rm_malloc(sizeof(*ct));
212   ct->children = children;
213   ct->nchildren = ui->num;
214   ct->base.Test = UI_Test;
215   ct->base.Free = UI_TesterFree;
216   return &ct->base;
217 }
218 
UI_NumEstimated(void * ctx)219 static size_t UI_NumEstimated(void *ctx) {
220   UnionIterator *ui = ctx;
221   return ui->nexpected;
222 }
223 
UI_ReadUnsorted(void * ctx,RSIndexResult ** hit)224 static inline int UI_ReadUnsorted(void *ctx, RSIndexResult **hit) {
225   UnionIterator *ui = ctx;
226   int rc = INDEXREAD_OK;
227   RSIndexResult *res = NULL;
228   while (ui->currIt < ui->num) {
229     rc = ui->origits[ui->currIt]->Read(ui->origits[ui->currIt]->ctx, &res);
230     if (rc == INDEXREAD_OK) {
231       *hit = res;
232       return rc;
233     }
234     ++ui->currIt;
235   }
236   return INDEXREAD_EOF;
237 }
238 
UI_ReadSorted(void * ctx,RSIndexResult ** hit)239 static inline int UI_ReadSorted(void *ctx, RSIndexResult **hit) {
240   UnionIterator *ui = ctx;
241   // nothing to do
242   if (ui->num == 0 || !IITER_HAS_NEXT(&ui->base)) {
243     IITER_SET_EOF(&ui->base);
244     return INDEXREAD_EOF;
245   }
246 
247   int numActive = 0;
248   AggregateResult_Reset(CURRENT_RECORD(ui));
249 
250   do {
251 
252     // find the minimal iterator
253     t_docId minDocId = UINT64_MAX;
254     IndexIterator *minIt = NULL;
255     numActive = 0;
256     int rc = INDEXREAD_EOF;
257     unsigned nits = ui->num;
258 
259     for (unsigned i = 0; i < nits; i++) {
260       IndexIterator *it = ui->its[i];
261       RSIndexResult *res = IITER_CURRENT_RECORD(it);
262       rc = INDEXREAD_OK;
263       // if this hit is behind the min id - read the next entry
264       // printf("ui->docIds[%d]: %d, ui->minDocId: %d\n", i, ui->docIds[i], ui->minDocId);
265       while (it->minId <= ui->minDocId && rc != INDEXREAD_EOF) {
266         rc = INDEXREAD_NOTFOUND;
267         // read while we're not at the end and perhaps the flags do not match
268         while (rc == INDEXREAD_NOTFOUND) {
269           rc = it->Read(it->ctx, &res);
270           if (res) {
271             it->minId = res->docId;
272           }
273         }
274       }
275 
276       if (rc != INDEXREAD_EOF) {
277         numActive++;
278       } else {
279         // Remove this from the active list
280         i = UI_RemoveExhausted(ui, i);
281         nits = ui->num;
282         continue;
283       }
284 
285       if (rc == INDEXREAD_OK && res->docId <= minDocId) {
286         minDocId = res->docId;
287         minIt = it;
288       }
289     }
290 
291     // take the minimum entry and collect all results matching to it
292     if (minIt) {
293       UI_SkipTo(ui, minIt->minId, hit);
294       // return INDEXREAD_OK;
295       ui->minDocId = minIt->minId;
296       ui->len++;
297       return INDEXREAD_OK;
298     }
299 
300   } while (numActive > 0);
301   IITER_SET_EOF(&ui->base);
302 
303   return INDEXREAD_EOF;
304 }
305 
306 /**
307 Skip to the given docId, or one place after it
308 @param ctx IndexReader context
309 @param docId docId to seek to
310 @param hit an index hit we put our reads into
311 @return INDEXREAD_OK if found, INDEXREAD_NOTFOUND if not found, INDEXREAD_EOF
312 if
313 at EOF
314 */
UI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)315 static int UI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
316   UnionIterator *ui = ctx;
317   RS_LOG_ASSERT(ui->base.mode == MODE_SORTED, "union iterator mode is not MODE_SORTED");
318 
319   // printf("UI %p skipto %d\n", ui, docId);
320 
321   if (docId == 0) {
322     return UI_ReadSorted(ctx, hit);
323   }
324 
325   if (!IITER_HAS_NEXT(&ui->base)) {
326     return INDEXREAD_EOF;
327   }
328 
329   // reset the current hitf
330   AggregateResult_Reset(CURRENT_RECORD(ui));
331   CURRENT_RECORD(ui)->weight = ui->weight;
332   int numActive = 0;
333   int found = 0;
334   int rc = INDEXREAD_EOF;
335   unsigned num = ui->num;
336   const int quickExit = ui->quickExit;
337   t_docId minDocId = UINT32_MAX;
338   IndexIterator *it;
339   RSIndexResult *res;
340   RSIndexResult *minResult = NULL;
341   // skip all iterators to docId
342   for (unsigned i = 0; i < num; i++) {
343     it = ui->its[i];
344     // this happens for non existent words
345     res = NULL;
346     // If the requested docId is larger than the last read id from the iterator,
347     // we need to read an entry from the iterator, seeking to this docId
348     if (it->minId < docId) {
349       if ((rc = it->SkipTo(it->ctx, docId, &res)) == INDEXREAD_EOF) {
350         i = UI_RemoveExhausted(ui, i);
351         num = ui->num;
352         continue;
353       }
354       if (res) {
355         it->minId = res->docId;
356       }
357 
358     } else {
359       // if the iterator is ahead of docId - we avoid reading the entry
360       // in this case, we are either past or at the requested docId, no need to actually read
361       rc = (it->minId == docId) ? INDEXREAD_OK : INDEXREAD_NOTFOUND;
362       res = IITER_CURRENT_RECORD(it);
363     }
364 
365     // if we've read successfully, update the minimal docId we've found
366     if (it->minId && rc != INDEXREAD_EOF) {
367       if (it->minId < minDocId || !minResult) {
368         minResult = res;
369         minDocId = it->minId;
370       }
371       // sminDocId = MIN(ui->docIds[i], minDocId);
372     }
373 
374     // we found a hit - continue to all results matching the same docId
375     if (rc == INDEXREAD_OK) {
376 
377       // add the result to the aggregate result we are holding
378       if (hit) {
379         AggregateResult_AddChild(CURRENT_RECORD(ui), res ? res : IITER_CURRENT_RECORD(it));
380       }
381       ui->minDocId = it->minId;
382       ++found;
383     }
384     ++numActive;
385     // If we've found a single entry and we are iterating in quick exit mode - exit now
386     if (found && quickExit) break;
387   }
388 
389   // all iterators are at the end
390   if (numActive == 0) {
391     IITER_SET_EOF(&ui->base);
392     return INDEXREAD_EOF;
393   }
394 
395   // copy our aggregate to the upstream hit
396   *hit = CURRENT_RECORD(ui);
397   if (found > 0) {
398     return INDEXREAD_OK;
399   }
400   if (minResult) {
401     *hit = minResult;
402     AggregateResult_AddChild(CURRENT_RECORD(ui), minResult);
403   }
404   // not found...
405   ui->minDocId = minDocId;
406   return INDEXREAD_NOTFOUND;
407 }
408 
UnionIterator_Free(IndexIterator * itbase)409 void UnionIterator_Free(IndexIterator *itbase) {
410   if (itbase == NULL) return;
411 
412   UnionIterator *ui = itbase->ctx;
413   for (int i = 0; i < ui->norig; i++) {
414     IndexIterator *it = ui->origits[i];
415     if (it) {
416       it->Free(it);
417     }
418   }
419 
420   IndexResult_Free(CURRENT_RECORD(ui));
421   rm_free(ui->its);
422   rm_free(ui->origits);
423   rm_free(ui);
424 }
425 
UI_Len(void * ctx)426 static size_t UI_Len(void *ctx) {
427   return ((UnionIterator *)ctx)->len;
428 }
429 
430 /* The context used by the intersection methods during iterating an intersect
431  * iterator */
432 typedef struct {
433   IndexIterator base;
434   IndexIterator **its;
435   IndexIterator *bestIt;
436   IndexCriteriaTester **testers;
437   t_docId *docIds;
438   int *rcs;
439   unsigned num;
440   size_t len;
441   int maxSlop;
442   int inOrder;
443   // the last read docId from any child
444   t_docId lastDocId;
445   // the last id that was found on all children
446   t_docId lastFoundId;
447 
448   // RSIndexResult *result;
449   DocTable *docTable;
450   t_fieldMask fieldMask;
451   double weight;
452   size_t nexpected;
453 } IntersectIterator;
454 
IntersectIterator_Free(IndexIterator * it)455 void IntersectIterator_Free(IndexIterator *it) {
456   if (it == NULL) return;
457   IntersectIterator *ui = it->ctx;
458   for (int i = 0; i < ui->num; i++) {
459     if (ui->its[i] != NULL) {
460       ui->its[i]->Free(ui->its[i]);
461     }
462     // IndexResult_Free(&ui->currentHits[i]);
463   }
464 
465   for (int i = 0; i < array_len(ui->testers); i++) {
466     if (ui->testers[i]) {
467       ui->testers[i]->Free(ui->testers[i]);
468     }
469   }
470   if (ui->bestIt) {
471     ui->bestIt->Free(ui->bestIt);
472   }
473 
474   rm_free(ui->docIds);
475   rm_free(ui->its);
476   IndexResult_Free(it->current);
477   array_free(ui->testers);
478   rm_free(it);
479 }
480 
II_Abort(void * ctx)481 static void II_Abort(void *ctx) {
482   IntersectIterator *it = ctx;
483   it->base.isValid = 0;
484   for (int i = 0; i < it->num; i++) {
485     if (it->its[i]) {
486       it->its[i]->Abort(it->its[i]->ctx);
487     }
488   }
489 }
490 
II_Rewind(void * ctx)491 static void II_Rewind(void *ctx) {
492   IntersectIterator *ii = ctx;
493   ii->base.isValid = 1;
494   ii->lastDocId = 0;
495 
496   // rewind all child iterators
497   for (int i = 0; i < ii->num; i++) {
498     ii->docIds[i] = 0;
499     if (ii->its[i]) {
500       ii->its[i]->Rewind(ii->its[i]->ctx);
501     }
502   }
503 }
504 
505 typedef int (*CompareFunc)(const void *a, const void *b);
cmpIter(IndexIterator ** it1,IndexIterator ** it2)506 static int cmpIter(IndexIterator **it1, IndexIterator **it2) {
507   if (!*it1 && !*it2) return 0;
508   if (!*it1) return -1;
509   if (!*it2) return 1;
510 
511   return (int)((*it1)->NumEstimated((*it1)->ctx) - (*it2)->NumEstimated((*it2)->ctx));
512 }
513 
II_SortChildren(IntersectIterator * ctx)514 static void II_SortChildren(IntersectIterator *ctx) {
515   /**
516    * 1. Go through all the iterators, ensuring none of them is NULL
517    *    (replace with empty if indeed NULL)
518    * 2. If all the iterators are unsorted then set the mode to UNSORTED
519    * 3. If all or any of the iterators are sorted, then remove the
520    *    unsorted iterators from the equation, simply adding them to the
521    *    tester list
522    */
523   IndexIterator **unsortedIts = NULL;
524   IndexIterator **sortedIts = rm_malloc(sizeof(IndexIterator *) * ctx->num);
525   size_t sortedItsSize = 0;
526   for (size_t i = 0; i < ctx->num; ++i) {
527     IndexIterator *curit = ctx->its[i];
528 
529     if (!curit) {
530       // If the current iterator is empty, then the entire
531       // query will fail; just free all the iterators and call it good
532       if (sortedIts) {
533         rm_free(sortedIts);
534       }
535       if (unsortedIts) {
536         array_free(unsortedIts);
537       }
538       ctx->bestIt = NULL;
539       return;
540     }
541 
542     size_t amount = IITER_NUM_ESTIMATED(curit);
543     if (amount < ctx->nexpected) {
544       ctx->nexpected = amount;
545       ctx->bestIt = curit;
546     }
547 
548     if (curit->mode == MODE_UNSORTED) {
549       unsortedIts = array_ensure_append(unsortedIts, &curit, 1, IndexIterator *);
550     } else {
551       sortedIts[sortedItsSize++] = curit;
552     }
553   }
554 
555   if (unsortedIts) {
556     if (array_len(unsortedIts) == ctx->num) {
557       ctx->base.mode = MODE_UNSORTED;
558       ctx->base.Read = II_ReadUnsorted;
559       ctx->num = 1;
560       ctx->its[0] = ctx->bestIt;
561       // The other iterators are also stored in unsortedIts
562       // and because we know that there are no sorted iterators
563     }
564 
565     for (size_t ii = 0; ii < array_len(unsortedIts); ++ii) {
566       IndexIterator *cur = unsortedIts[ii];
567       if (ctx->base.mode == MODE_UNSORTED && ctx->bestIt == cur) {
568         continue;
569       }
570       IndexCriteriaTester *tester = IITER_GET_CRITERIA_TESTER(cur);
571       ctx->testers = array_ensure_append(ctx->testers, &tester, 1, IndexCriteriaTester *);
572       cur->Free(cur);
573     }
574   } else {
575     ctx->bestIt = NULL;
576   }
577 
578   rm_free(ctx->its);
579   ctx->its = sortedIts;
580   ctx->num = sortedItsSize;
581   array_free(unsortedIts);
582 }
583 
NewIntersecIterator(IndexIterator ** its_,size_t num,DocTable * dt,t_fieldMask fieldMask,int maxSlop,int inOrder,double weight)584 IndexIterator *NewIntersecIterator(IndexIterator **its_, size_t num, DocTable *dt,
585                                    t_fieldMask fieldMask, int maxSlop, int inOrder, double weight) {
586   // printf("Creating new intersection iterator with fieldMask=%llx\n", fieldMask);
587   IntersectIterator *ctx = rm_calloc(1, sizeof(*ctx));
588   ctx->lastDocId = 0;
589   ctx->lastFoundId = 0;
590   ctx->len = 0;
591   ctx->maxSlop = maxSlop;
592   ctx->inOrder = inOrder;
593   ctx->fieldMask = fieldMask;
594   ctx->weight = weight;
595   ctx->docIds = rm_calloc(num, sizeof(t_docId));
596   ctx->docTable = dt;
597   ctx->nexpected = UINT32_MAX;
598 
599   ctx->base.isValid = 1;
600   ctx->base.current = NewIntersectResult(num, weight);
601   ctx->its = its_;
602   ctx->num = num;
603 
604   // Sort children iterators from low count to high count which reduces the number of iterations.
605   if (!ctx->inOrder) {
606     qsort(ctx->its, ctx->num, sizeof(*ctx->its), (CompareFunc)cmpIter);
607   }
608 
609   // bind the iterator calls
610   IndexIterator *it = &ctx->base;
611   it->ctx = ctx;
612   it->LastDocId = II_LastDocId;
613   it->NumEstimated = II_NumEstimated;
614   it->GetCriteriaTester = II_GetCriteriaTester;
615   it->Read = II_ReadSorted;
616   it->SkipTo = II_SkipTo;
617   it->Len = II_Len;
618   it->Free = IntersectIterator_Free;
619   it->Abort = II_Abort;
620   it->Rewind = II_Rewind;
621   it->HasNext = NULL;
622   it->mode = MODE_SORTED;
623   II_SortChildren(ctx);
624   return it;
625 }
626 
II_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)627 static int II_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
628   /* A seek with docId 0 is equivalent to a read */
629   if (docId == 0) {
630     return II_ReadSorted(ctx, hit);
631   }
632   IntersectIterator *ic = ctx;
633   AggregateResult_Reset(ic->base.current);
634   int nfound = 0;
635 
636   int rc = INDEXREAD_EOF;
637   // skip all iterators to docId
638   for (int i = 0; i < ic->num; i++) {
639     IndexIterator *it = ic->its[i];
640 
641     if (!it || !IITER_HAS_NEXT(it)) return INDEXREAD_EOF;
642 
643     RSIndexResult *res = IITER_CURRENT_RECORD(it);
644     rc = INDEXREAD_OK;
645 
646     // only read if we are not already at the seek to position
647     if (ic->docIds[i] != docId) {
648       rc = it->SkipTo(it->ctx, docId, &res);
649       if (rc != INDEXREAD_EOF) {
650         if (res) ic->docIds[i] = res->docId;
651       }
652     }
653 
654     if (rc == INDEXREAD_EOF) {
655       // we are at the end!
656       ic->base.isValid = 0;
657       return rc;
658     } else if (rc == INDEXREAD_OK) {
659 
660       // YAY! found!
661       if (res) {
662         AggregateResult_AddChild(ic->base.current, res);
663       }
664       ic->lastDocId = docId;
665 
666       ++nfound;
667     } else if (ic->docIds[i] > ic->lastDocId) {
668       ic->lastDocId = ic->docIds[i];
669       break;
670     }
671   }
672 
673   // unless we got an EOF - we put the current record into hit
674 
675   // if the requested id was found on all children - we return OK
676   if (nfound == ic->num) {
677     // printf("Skipto %d hit @%d\n", docId, ic->current->docId);
678 
679     // Update the last found id
680     // if maxSlop == -1 there is no need to verify maxSlop and inorder, otherwise lets verify
681     if (ic->maxSlop == -1 ||
682         IndexResult_IsWithinRange(ic->base.current, ic->maxSlop, ic->inOrder)) {
683       ic->lastFoundId = ic->base.current->docId;
684       if (hit) *hit = ic->base.current;
685       return INDEXREAD_OK;
686     }
687   }
688 
689   // Not found - but we need to read the next valid result into hit
690   rc = II_ReadSorted(ic, hit);
691   // this might have brought us to our end, in which case we just terminate
692   if (rc == INDEXREAD_EOF) return INDEXREAD_EOF;
693 
694   // otherwise - not found
695   return INDEXREAD_NOTFOUND;
696 }
697 
II_ReadUnsorted(void * ctx,RSIndexResult ** hit)698 static int II_ReadUnsorted(void *ctx, RSIndexResult **hit) {
699   IntersectIterator *ic = ctx;
700   int rc = INDEXREAD_OK;
701   RSIndexResult *res = NULL;
702   while (1) {
703     rc = ic->bestIt->Read(ic->bestIt->ctx, &res);
704     if (rc == INDEXREAD_EOF) {
705       return INDEXREAD_EOF;
706       *hit = res;
707       return rc;
708     }
709     int isMatch = 1;
710     for (size_t i = 0; i < array_len(ic->testers); ++i) {
711       if (!ic->testers[i]->Test(ic->testers[i], res->docId)) {
712         isMatch = 0;
713         break;
714       }
715     }
716     if (!isMatch) {
717       continue;
718     }
719     *hit = res;
720     return rc;
721   }
722 }
723 
724 typedef struct {
725   IndexCriteriaTester base;
726   IndexCriteriaTester **children;
727 } IICriteriaTester;
728 
II_Test(struct IndexCriteriaTester * ct,t_docId id)729 static int II_Test(struct IndexCriteriaTester *ct, t_docId id) {
730   IICriteriaTester *ict = (IICriteriaTester *)ct;
731   for (size_t i = 0; i < array_len(ict->children); ++i) {
732     if (!ict->children[i]->Test(ict->children[i], id)) {
733       return 0;
734     }
735   }
736   return 1;
737 }
738 
II_TesterFree(struct IndexCriteriaTester * ct)739 static void II_TesterFree(struct IndexCriteriaTester *ct) {
740   IICriteriaTester *ict = (IICriteriaTester *)ct;
741   for (size_t i = 0; i < array_len(ict->children); ++i) {
742     ict->children[i]->Free(ict->children[i]);
743   }
744   array_free(ict->children);
745   rm_free(ict);
746 }
747 
II_GetCriteriaTester(void * ctx)748 static IndexCriteriaTester *II_GetCriteriaTester(void *ctx) {
749   IntersectIterator *ic = ctx;
750   for (size_t i = 0; i < ic->num; ++i) {
751     IndexCriteriaTester *tester = NULL;
752     if (ic->its[i]) {
753       tester = IITER_GET_CRITERIA_TESTER(ic->its[i]);
754     }
755     if (!tester) {
756       for (int j = 0; j < i; j++) {
757         ic->testers[j]->Free(ic->testers[j]);
758       }
759       array_free(ic->testers);
760       return NULL;
761     }
762     ic->testers = array_ensure_append(ic->testers, tester, 1, IndexCriteriaTester *);
763   }
764   IICriteriaTester *ict = rm_malloc(sizeof(*ict));
765   ict->children = ic->testers;
766   ic->testers = NULL;
767   ict->base.Test = II_Test;
768   ict->base.Free = II_TesterFree;
769   return &ict->base;
770 }
771 
II_NumEstimated(void * ctx)772 static size_t II_NumEstimated(void *ctx) {
773   IntersectIterator *ic = ctx;
774   return ic->nexpected;
775 }
776 
II_ReadSorted(void * ctx,RSIndexResult ** hit)777 static int II_ReadSorted(void *ctx, RSIndexResult **hit) {
778   IntersectIterator *ic = ctx;
779   if (ic->num == 0) return INDEXREAD_EOF;
780 
781   int nh = 0;
782   int i = 0;
783 
784   do {
785     nh = 0;
786     AggregateResult_Reset(ic->base.current);
787 
788     for (i = 0; i < ic->num; i++) {
789       IndexIterator *it = ic->its[i];
790 
791       if (!it) goto eof;
792 
793       RSIndexResult *h = IITER_CURRENT_RECORD(it);
794       // skip to the next
795       int rc = INDEXREAD_OK;
796       if (ic->docIds[i] != ic->lastDocId || ic->lastDocId == 0) {
797 
798         if (i == 0 && ic->docIds[i] >= ic->lastDocId) {
799           rc = it->Read(it->ctx, &h);
800         } else {
801           rc = it->SkipTo(it->ctx, ic->lastDocId, &h);
802         }
803         // printf("II %p last docId %d, it %d read docId %d(%d), rc %d\n", ic, ic->lastDocId, i,
804         //        h->docId, it->LastDocId(it->ctx), rc);
805 
806         if (rc == INDEXREAD_EOF) goto eof;
807         ic->docIds[i] = h->docId;
808       }
809 
810       if (ic->docIds[i] > ic->lastDocId) {
811         ic->lastDocId = ic->docIds[i];
812         break;
813       }
814       if (rc == INDEXREAD_OK) {
815         ++nh;
816         AggregateResult_AddChild(ic->base.current, h);
817       } else {
818         ic->lastDocId++;
819       }
820     }
821 
822     if (nh == ic->num) {
823       // printf("II %p HIT @ %d\n", ic, ic->current->docId);
824       // sum up all hits
825       if (hit != NULL) {
826         *hit = ic->base.current;
827       }
828       // Update the last valid found id
829       ic->lastFoundId = ic->base.current->docId;
830 
831       // advance the doc id so next time we'll read a new record
832       ic->lastDocId++;
833 
834       // // make sure the flags are matching.
835       if ((ic->base.current->fieldMask & ic->fieldMask) == 0) {
836         // printf("Field masks don't match!\n");
837         continue;
838       }
839 
840       // If we need to match slop and order, we do it now, and possibly skip the result
841       if (ic->maxSlop >= 0) {
842         // printf("Checking SLOP... (%d)\n", ic->maxSlop);
843         if (!IndexResult_IsWithinRange(ic->base.current, ic->maxSlop, ic->inOrder)) {
844           // printf("Not within range!\n");
845           continue;
846         }
847       }
848 
849       //      for(size_t i = 0 ; i < array_len(ic->testers) ; ++i){
850       //        if(!ic->testers[i]->TextCriteria(ic->testers[i]->ctx, ic->lastFoundId)){
851       //          continue;
852       //        }
853       //      }
854 
855       ic->len++;
856       // printf("Returning OK\n");
857       return INDEXREAD_OK;
858     }
859   } while (1);
860 eof:
861   ic->base.isValid = 0;
862   return INDEXREAD_EOF;
863 }
864 
II_LastDocId(void * ctx)865 static t_docId II_LastDocId(void *ctx) {
866   // return last FOUND id, not last read id form any child
867   return ((IntersectIterator *)ctx)->lastFoundId;
868 }
869 
II_Len(void * ctx)870 static size_t II_Len(void *ctx) {
871   return ((IntersectIterator *)ctx)->len;
872 }
873 
874 /* A Not iterator works by wrapping another iterator, and returning OK for misses, and NOTFOUND
875  * for hits */
876 typedef struct {
877   IndexIterator base;
878   IndexIterator *child;
879   IndexCriteriaTester *childCT;
880   t_docId lastDocId;
881   t_docId maxDocId;
882   size_t len;
883   double weight;
884 } NotIterator, NotContext;
885 
NI_Abort(void * ctx)886 static void NI_Abort(void *ctx) {
887   NotContext *nc = ctx;
888   if (nc->child) {
889     nc->child->Abort(nc->child->ctx);
890   }
891 }
892 
NI_Rewind(void * ctx)893 static void NI_Rewind(void *ctx) {
894   NotContext *nc = ctx;
895   nc->lastDocId = 0;
896   nc->base.current->docId = 0;
897   if (nc->child) {
898     nc->child->Rewind(nc->child->ctx);
899   }
900 }
NI_Free(IndexIterator * it)901 static void NI_Free(IndexIterator *it) {
902 
903   NotContext *nc = it->ctx;
904   if (nc->child) {
905     nc->child->Free(nc->child);
906   }
907   if (nc->childCT) {
908     nc->childCT->Free(nc->childCT);
909   }
910   IndexResult_Free(nc->base.current);
911   rm_free(it);
912 }
913 
914 /* SkipTo for NOT iterator. If we have a match - return NOTFOUND. If we don't or we're at the end
915  * - return OK */
NI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)916 static int NI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
917   NotContext *nc = ctx;
918 
919   // do not skip beyond max doc id
920   if (docId > nc->maxDocId) {
921     return INDEXREAD_EOF;
922   }
923   // If we don't have a child it means the sub iterator is of a meaningless expression.
924   // So negating it means we will always return OK!
925   if (!nc->child) {
926     goto ok;
927   }
928 
929   // Get the child's last read docId
930   t_docId childId = nc->child->LastDocId(nc->child->ctx);
931 
932   // If the child is ahead of the skipto id, it means the child doesn't have this id.
933   // So we are okay!
934   if (childId > docId) {
935     goto ok;
936   }
937 
938   // If the child docId is the one we are looking for, it's an anti match!
939   if (childId == docId) {
940     nc->base.current->docId = docId;
941     nc->lastDocId = docId;
942     *hit = nc->base.current;
943     return INDEXREAD_NOTFOUND;
944   }
945 
946   // read the next entry from the child
947   int rc = nc->child->SkipTo(nc->child->ctx, docId, hit);
948 
949   // OK means not found
950   if (rc == INDEXREAD_OK) {
951     return INDEXREAD_NOTFOUND;
952   }
953 
954 ok:
955   // NOT FOUND or end means OK. We need to set the docId on the hit we will bubble up
956   nc->base.current->docId = docId;
957   nc->lastDocId = docId;
958   *hit = nc->base.current;
959   return INDEXREAD_OK;
960 }
961 
962 typedef struct {
963   IndexCriteriaTester base;
964   IndexCriteriaTester *child;
965 } NI_CriteriaTester;
966 
NI_Test(struct IndexCriteriaTester * ct,t_docId id)967 static int NI_Test(struct IndexCriteriaTester *ct, t_docId id) {
968   NI_CriteriaTester *nct = (NI_CriteriaTester *)ct;
969   return !nct->child->Test(nct->child, id);
970 }
NI_TesterFree(struct IndexCriteriaTester * ct)971 static void NI_TesterFree(struct IndexCriteriaTester *ct) {
972   NI_CriteriaTester *nct = (NI_CriteriaTester *)ct;
973   nct->child->Free(nct->child);
974   rm_free(nct);
975 }
976 
NI_GetCriteriaTester(void * ctx)977 static IndexCriteriaTester *NI_GetCriteriaTester(void *ctx) {
978   NotContext *nc = ctx;
979   if (!nc->child) {
980     return NULL;
981   }
982   IndexCriteriaTester *ct = IITER_GET_CRITERIA_TESTER(nc->child);
983   if (!ct) {
984     return NULL;
985   }
986   NI_CriteriaTester *nct = rm_malloc(sizeof(*nct));
987   nct->child = ct;
988   nct->base.Test = NI_Test;
989   nct->base.Free = NI_TesterFree;
990   return &nct->base;
991 }
992 
NI_NumEstimated(void * ctx)993 static size_t NI_NumEstimated(void *ctx) {
994   NotContext *nc = ctx;
995   return nc->maxDocId;
996 }
997 
NI_ReadUnsorted(void * ctx,RSIndexResult ** hit)998 static int NI_ReadUnsorted(void *ctx, RSIndexResult **hit) {
999   NotContext *nc = ctx;
1000   while (nc->lastDocId > nc->maxDocId) {
1001     if (!nc->childCT->Test(nc->childCT, nc->lastDocId)) {
1002       nc->base.current->docId = nc->lastDocId;
1003       *hit = nc->base.current;
1004       ++nc->lastDocId;
1005       return INDEXREAD_OK;
1006     }
1007     ++nc->lastDocId;
1008   }
1009   return INDEXREAD_EOF;
1010 }
1011 
1012 /* Read from a NOT iterator. This is applicable only if the only or leftmost node of a query is a
1013  * NOT node. We simply read until max docId, skipping docIds that exist in the child*/
NI_ReadSorted(void * ctx,RSIndexResult ** hit)1014 static int NI_ReadSorted(void *ctx, RSIndexResult **hit) {
1015   NotContext *nc = ctx;
1016   if (nc->lastDocId > nc->maxDocId) return INDEXREAD_EOF;
1017 
1018   RSIndexResult *cr = NULL;
1019   // if we have a child, get the latest result from the child
1020   if (nc->child) {
1021     cr = IITER_CURRENT_RECORD(nc->child);
1022 
1023     if (cr == NULL || cr->docId == 0) {
1024       nc->child->Read(nc->child->ctx, &cr);
1025     }
1026   }
1027 
1028   // advance our reader by one, and let's test if it's a valid value or not
1029   nc->base.current->docId++;
1030 
1031   // If we don't have a child result, or the child result is ahead of the current counter,
1032   // we just increment our virtual result's id until we hit the child result's
1033   // in which case we'll read from the child and bypass it by one.
1034   if (cr == NULL || cr->docId > nc->base.current->docId) {
1035     goto ok;
1036   }
1037 
1038   while (cr->docId == nc->base.current->docId) {
1039     // advance our docId to the next possible id
1040     nc->base.current->docId++;
1041 
1042     // read the next entry from the child
1043     if (nc->child->Read(nc->child->ctx, &cr) == INDEXREAD_EOF) {
1044       break;
1045     }
1046   }
1047 
1048   // make sure we did not overflow
1049   if (nc->base.current->docId > nc->maxDocId) {
1050     return INDEXREAD_EOF;
1051   }
1052 
1053 ok:
1054   // Set the next entry and return ok
1055   nc->lastDocId = nc->base.current->docId;
1056   if (hit) *hit = nc->base.current;
1057   ++nc->len;
1058 
1059   return INDEXREAD_OK;
1060 }
1061 
1062 /* We always have next, in case anyone asks... ;) */
NI_HasNext(void * ctx)1063 static int NI_HasNext(void *ctx) {
1064   NotContext *nc = ctx;
1065 
1066   return nc->lastDocId <= nc->maxDocId;
1067 }
1068 
1069 /* Our len is the child's len? TBD it might be better to just return 0 */
NI_Len(void * ctx)1070 static size_t NI_Len(void *ctx) {
1071   NotContext *nc = ctx;
1072   return nc->len;
1073 }
1074 
1075 /* Last docId */
NI_LastDocId(void * ctx)1076 static t_docId NI_LastDocId(void *ctx) {
1077   NotContext *nc = ctx;
1078 
1079   return nc->lastDocId;
1080 }
1081 
NewNotIterator(IndexIterator * it,t_docId maxDocId,double weight)1082 IndexIterator *NewNotIterator(IndexIterator *it, t_docId maxDocId, double weight) {
1083 
1084   NotContext *nc = rm_malloc(sizeof(*nc));
1085   nc->base.current = NewVirtualResult(weight);
1086   nc->base.current->fieldMask = RS_FIELDMASK_ALL;
1087   nc->base.current->docId = 0;
1088   nc->child = it;
1089   nc->childCT = NULL;
1090   nc->lastDocId = 0;
1091   nc->maxDocId = maxDocId;
1092   nc->len = 0;
1093   nc->weight = weight;
1094 
1095   IndexIterator *ret = &nc->base;
1096   ret->ctx = nc;
1097   ret->GetCriteriaTester = NI_GetCriteriaTester;
1098   ret->NumEstimated = NI_NumEstimated;
1099   ret->Free = NI_Free;
1100   ret->HasNext = NI_HasNext;
1101   ret->LastDocId = NI_LastDocId;
1102   ret->Len = NI_Len;
1103   ret->Read = NI_ReadSorted;
1104   ret->SkipTo = NI_SkipTo;
1105   ret->Abort = NI_Abort;
1106   ret->Rewind = NI_Rewind;
1107   ret->mode = MODE_SORTED;
1108 
1109   if (nc->child && nc->child->mode == MODE_UNSORTED) {
1110     nc->childCT = IITER_GET_CRITERIA_TESTER(nc->child);
1111     RS_LOG_ASSERT(nc->childCT, "childCT should not be NULL");
1112     ret->Read = NI_ReadUnsorted;
1113   }
1114 
1115   return ret;
1116 }
1117 
1118 /**********************************************************
1119  * Optional clause iterator
1120  **********************************************************/
1121 
1122 typedef struct {
1123   IndexIterator base;
1124   IndexIterator *child;
1125   IndexCriteriaTester *childCT;
1126   RSIndexResult *virt;
1127   t_fieldMask fieldMask;
1128   t_docId lastDocId;
1129   t_docId maxDocId;
1130   t_docId nextRealId;
1131   double weight;
1132 } OptionalMatchContext, OptionalIterator;
1133 
OI_Free(IndexIterator * it)1134 static void OI_Free(IndexIterator *it) {
1135   OptionalMatchContext *nc = it->ctx;
1136   if (nc->child) {
1137     nc->child->Free(nc->child);
1138   }
1139   if (nc->childCT) {
1140     nc->childCT->Free(nc->childCT);
1141   }
1142   IndexResult_Free(nc->virt);
1143   rm_free(it);
1144 }
1145 
OI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)1146 static int OI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
1147   OptionalMatchContext *nc = ctx;
1148   //  printf("OI_SkipTo => %llu!. NextReal: %llu. Max: %llu. Last: %llu\n", docId, nc->nextRealId,
1149   //  nc->maxDocId, nc->lastDocId);
1150 
1151   int found = 0;
1152 
1153   // Set the current ID
1154   nc->lastDocId = docId;
1155 
1156   if (nc->lastDocId > nc->maxDocId) {
1157     return INDEXREAD_EOF;
1158   }
1159 
1160   if (!nc->child) {
1161     nc->virt->docId = docId;
1162     nc->base.current = nc->virt;
1163     return INDEXREAD_OK;
1164   }
1165 
1166   if (docId == 0) {
1167     return nc->base.Read(ctx, hit);
1168   }
1169 
1170   if (docId == nc->nextRealId) {
1171     // Edge case -- match on the docid we just looked for
1172     found = 1;
1173     // reset current pointer since this might have been a prior
1174     // virt return
1175     nc->base.current = nc->child->current;
1176 
1177   } else if (docId > nc->nextRealId) {
1178     int rc = nc->child->SkipTo(nc->child->ctx, docId, &nc->base.current);
1179     if (rc == INDEXREAD_OK) {
1180       found = 1;
1181     }
1182     if (nc->base.current) {
1183       nc->nextRealId = nc->base.current->docId;
1184     }
1185   }
1186 
1187   if (found) {
1188     // Has a real hit
1189     RSIndexResult *r = nc->base.current;
1190   } else {
1191     nc->virt->docId = docId;
1192     nc->base.current = nc->virt;
1193   }
1194 
1195   *hit = nc->base.current;
1196   return INDEXREAD_OK;
1197 }
1198 
OI_Test(struct IndexCriteriaTester * ct,t_docId id)1199 static int OI_Test(struct IndexCriteriaTester *ct, t_docId id) {
1200   return 1;
1201 }
1202 
OI_TesterFree(struct IndexCriteriaTester * ct)1203 static void OI_TesterFree(struct IndexCriteriaTester *ct) {
1204   rm_free(ct);
1205 }
1206 
OI_GetCriteriaTester(void * ctx)1207 static IndexCriteriaTester *OI_GetCriteriaTester(void *ctx) {
1208   IndexCriteriaTester *tester = rm_malloc(sizeof(*tester));
1209   tester->Test = OI_Test;
1210   tester->Free = OI_TesterFree;
1211   return tester;
1212 }
1213 
OI_NumEstimated(void * ctx)1214 static size_t OI_NumEstimated(void *ctx) {
1215   OptionalMatchContext *nc = ctx;
1216   return nc->maxDocId;
1217 }
1218 
OI_ReadUnsorted(void * ctx,RSIndexResult ** hit)1219 static int OI_ReadUnsorted(void *ctx, RSIndexResult **hit) {
1220   OptionalMatchContext *nc = ctx;
1221   if (nc->lastDocId >= nc->maxDocId) return INDEXREAD_EOF;
1222   nc->lastDocId++;
1223   nc->base.current = nc->virt;
1224   nc->base.current->docId = nc->lastDocId;
1225   *hit = nc->base.current;
1226   if (nc->childCT->Test(nc->childCT, nc->lastDocId)) {
1227     nc->base.current->weight = nc->weight * 2;  // we increase the weight cause we found the id
1228   } else {
1229     nc->base.current->weight = nc->weight * 2;  // we do increase the weight cause id was not found
1230   }
1231   return INDEXREAD_OK;
1232 }
1233 
1234 /* Read has no meaning in the sense of an OPTIONAL iterator, so we just read the next record from
1235  * our child */
OI_ReadSorted(void * ctx,RSIndexResult ** hit)1236 static int OI_ReadSorted(void *ctx, RSIndexResult **hit) {
1237   OptionalMatchContext *nc = ctx;
1238   if (nc->lastDocId >= nc->maxDocId) {
1239     return INDEXREAD_EOF;
1240   }
1241 
1242   // Increase the size by one
1243   nc->lastDocId++;
1244 
1245   if (nc->lastDocId > nc->nextRealId) {
1246     int rc = nc->child->Read(nc->child->ctx, &nc->base.current);
1247     if (rc == INDEXREAD_EOF) {
1248       nc->nextRealId = nc->maxDocId + 1;
1249     } else {
1250       nc->nextRealId = nc->base.current->docId;
1251     }
1252   }
1253 
1254   if (nc->lastDocId != nc->nextRealId) {
1255     nc->base.current = nc->virt;
1256     nc->base.current->weight = 0;
1257   } else {
1258     nc->base.current->weight = nc->weight;
1259   }
1260 
1261   nc->base.current->docId = nc->lastDocId;
1262   *hit = nc->base.current;
1263   return INDEXREAD_OK;
1264 }
1265 
1266 /* We always have next, in case anyone asks... ;) */
OI_HasNext(void * ctx)1267 static int OI_HasNext(void *ctx) {
1268   OptionalMatchContext *nc = ctx;
1269   return (nc->lastDocId <= nc->maxDocId);
1270 }
1271 
OI_Abort(void * ctx)1272 static void OI_Abort(void *ctx) {
1273   OptionalMatchContext *nc = ctx;
1274   if (nc->child) {
1275     nc->child->Abort(nc->child->ctx);
1276   }
1277 }
1278 
1279 /* Our len is the child's len? TBD it might be better to just return 0 */
OI_Len(void * ctx)1280 static size_t OI_Len(void *ctx) {
1281   OptionalMatchContext *nc = ctx;
1282   return nc->child ? nc->child->Len(nc->child->ctx) : 0;
1283 }
1284 
1285 /* Last docId */
OI_LastDocId(void * ctx)1286 static t_docId OI_LastDocId(void *ctx) {
1287   OptionalMatchContext *nc = ctx;
1288 
1289   return nc->lastDocId;
1290 }
1291 
OI_Rewind(void * ctx)1292 static void OI_Rewind(void *ctx) {
1293   OptionalMatchContext *nc = ctx;
1294   nc->lastDocId = 0;
1295   nc->virt->docId = 0;
1296   nc->nextRealId = 0;
1297   if (nc->child) {
1298     nc->child->Rewind(nc->child->ctx);
1299   }
1300 }
1301 
NewOptionalIterator(IndexIterator * it,t_docId maxDocId,double weight)1302 IndexIterator *NewOptionalIterator(IndexIterator *it, t_docId maxDocId, double weight) {
1303   OptionalMatchContext *nc = rm_calloc(1, sizeof(*nc));
1304   nc->virt = NewVirtualResult(weight);
1305   nc->virt->fieldMask = RS_FIELDMASK_ALL;
1306   nc->virt->freq = 1;
1307   nc->base.current = nc->virt;
1308   nc->child = it;
1309   nc->childCT = NULL;
1310   nc->lastDocId = 0;
1311   nc->maxDocId = maxDocId;
1312   nc->weight = weight;
1313   nc->nextRealId = 0;
1314 
1315   IndexIterator *ret = &nc->base;
1316   ret->ctx = nc;
1317   ret->GetCriteriaTester = OI_GetCriteriaTester;
1318   ret->NumEstimated = OI_NumEstimated;
1319   ret->Free = OI_Free;
1320   ret->HasNext = OI_HasNext;
1321   ret->LastDocId = OI_LastDocId;
1322   ret->Len = OI_Len;
1323   ret->Read = OI_ReadSorted;
1324   ret->SkipTo = OI_SkipTo;
1325   ret->Abort = OI_Abort;
1326   ret->Rewind = OI_Rewind;
1327   ret->mode = MODE_SORTED;
1328 
1329   if (nc->child && nc->child->mode == MODE_UNSORTED) {
1330     nc->childCT = IITER_GET_CRITERIA_TESTER(nc->child);
1331     RS_LOG_ASSERT(nc->childCT, "childCT should not be NULL");
1332     ret->Read = OI_ReadUnsorted;
1333   }
1334   if (!nc->child) {
1335     nc->child = NewEmptyIterator();
1336   }
1337 
1338   return ret;
1339 }
1340 
1341 /* Wildcard iterator, matchin ALL documents in the index. This is used for one thing only -
1342  * purely negative queries. If the root of the query is a negative expression, we cannot process
1343  * it
1344  * without a positive expression. So we create a wildcard iterator that basically just iterates
1345  * all
1346  * the incremental document ids, and matches every skip within its range. */
1347 typedef struct {
1348   IndexIterator base;
1349   t_docId topId;
1350   t_docId current;
1351 } WildcardIterator, WildcardIteratorCtx;
1352 
1353 /* Free a wildcard iterator */
WI_Free(IndexIterator * it)1354 static void WI_Free(IndexIterator *it) {
1355 
1356   WildcardIteratorCtx *nc = it->ctx;
1357   IndexResult_Free(CURRENT_RECORD(nc));
1358   rm_free(it);
1359 }
1360 
1361 /* Read reads the next consecutive id, unless we're at the end */
WI_Read(void * ctx,RSIndexResult ** hit)1362 static int WI_Read(void *ctx, RSIndexResult **hit) {
1363   WildcardIteratorCtx *nc = ctx;
1364   if (nc->current > nc->topId) {
1365     return INDEXREAD_EOF;
1366   }
1367   CURRENT_RECORD(nc)->docId = nc->current++;
1368   if (hit) {
1369     *hit = CURRENT_RECORD(nc);
1370   }
1371   return INDEXREAD_OK;
1372 }
1373 
1374 /* Skipto for wildcard iterator - always succeeds, but this should normally not happen as it has
1375  * no
1376  * meaning */
WI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)1377 static int WI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
1378   // printf("WI_Skipto %d\n", docId);
1379   WildcardIteratorCtx *nc = ctx;
1380 
1381   if (nc->current > nc->topId) return INDEXREAD_EOF;
1382 
1383   if (docId == 0) return WI_Read(ctx, hit);
1384 
1385   nc->current = docId;
1386   CURRENT_RECORD(nc)->docId = docId;
1387   if (hit) {
1388     *hit = CURRENT_RECORD(nc);
1389   }
1390   return INDEXREAD_OK;
1391 }
1392 
WI_Abort(void * ctx)1393 static void WI_Abort(void *ctx) {
1394   WildcardIteratorCtx *nc = ctx;
1395   nc->current = nc->topId + 1;
1396 }
1397 
1398 /* We always have next, in case anyone asks... ;) */
WI_HasNext(void * ctx)1399 static int WI_HasNext(void *ctx) {
1400   WildcardIteratorCtx *nc = ctx;
1401 
1402   return nc->current <= nc->topId;
1403 }
1404 
1405 /* Our len is the len of the index... */
WI_Len(void * ctx)1406 static size_t WI_Len(void *ctx) {
1407   WildcardIteratorCtx *nc = ctx;
1408   return nc->topId;
1409 }
1410 
1411 /* Last docId */
WI_LastDocId(void * ctx)1412 static t_docId WI_LastDocId(void *ctx) {
1413   WildcardIteratorCtx *nc = ctx;
1414 
1415   return nc->current;
1416 }
1417 
WI_Rewind(void * p)1418 static void WI_Rewind(void *p) {
1419   WildcardIteratorCtx *ctx = p;
1420   ctx->current = 1;
1421 }
1422 
WI_NumEstimated(void * p)1423 static size_t WI_NumEstimated(void *p) {
1424   return SIZE_MAX;
1425 }
1426 
1427 /* Create a new wildcard iterator */
NewWildcardIterator(t_docId maxId)1428 IndexIterator *NewWildcardIterator(t_docId maxId) {
1429   WildcardIteratorCtx *c = rm_calloc(1, sizeof(*c));
1430   c->current = 1;
1431   c->topId = maxId;
1432 
1433   CURRENT_RECORD(c) = NewVirtualResult(1);
1434   CURRENT_RECORD(c)->freq = 1;
1435   CURRENT_RECORD(c)->fieldMask = RS_FIELDMASK_ALL;
1436 
1437   IndexIterator *ret = &c->base;
1438   ret->ctx = c;
1439   ret->Free = WI_Free;
1440   ret->HasNext = WI_HasNext;
1441   ret->LastDocId = WI_LastDocId;
1442   ret->Len = WI_Len;
1443   ret->Read = WI_Read;
1444   ret->SkipTo = WI_SkipTo;
1445   ret->Abort = WI_Abort;
1446   ret->Rewind = WI_Rewind;
1447   ret->NumEstimated = WI_NumEstimated;
1448   return ret;
1449 }
1450 
EOI_Read(void * p,RSIndexResult ** e)1451 static int EOI_Read(void *p, RSIndexResult **e) {
1452   return INDEXREAD_EOF;
1453 }
EOI_Free(struct indexIterator * self)1454 static void EOI_Free(struct indexIterator *self) {
1455   // Nothing
1456 }
EOI_NumEstimated(void * ctx)1457 static size_t EOI_NumEstimated(void *ctx) {
1458   return 0;
1459 }
EOI_Len(void * ctx)1460 static size_t EOI_Len(void *ctx) {
1461   return 0;
1462 }
EOI_LastDocId(void * ctx)1463 static t_docId EOI_LastDocId(void *ctx) {
1464   return 0;
1465 }
1466 
EOI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)1467 static int EOI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
1468   return INDEXREAD_EOF;
1469 }
EOI_Abort(void * ctx)1470 static void EOI_Abort(void *ctx) {
1471 }
EOI_Rewind(void * ctx)1472 static void EOI_Rewind(void *ctx) {
1473 }
1474 
1475 static IndexIterator eofIterator = {.Read = EOI_Read,
1476                                     .Free = EOI_Free,
1477                                     .SkipTo = EOI_SkipTo,
1478                                     .Len = EOI_Len,
1479                                     .LastDocId = EOI_LastDocId,
1480                                     .NumEstimated = EOI_NumEstimated,
1481                                     .Abort = EOI_Abort,
1482                                     .Rewind = EOI_Rewind};
1483 
NewEmptyIterator(void)1484 IndexIterator *NewEmptyIterator(void) {
1485   return &eofIterator;
1486 }
1487 
1488 // LCOV_EXCL_START unused
IndexIterator_GetTypeString(const IndexIterator * it)1489 const char *IndexIterator_GetTypeString(const IndexIterator *it) {
1490   if (it->Free == UnionIterator_Free) {
1491     return "UNION";
1492   } else if (it->Free == IntersectIterator_Free) {
1493     return "INTERSECTION";
1494   } else if (it->Free == OI_Free) {
1495     return "OPTIONAL";
1496   } else if (it->Free == WI_Free) {
1497     return "WILDCARD";
1498   } else if (it->Free == NI_Free) {
1499     return "NOT";
1500   } else if (it->Free == ReadIterator_Free) {
1501     return "IIDX";
1502   } else if (it == &eofIterator) {
1503     return "EMPTY";
1504   } else {
1505     return "Unknown";
1506   }
1507 }
1508 // LCOV_EXCL_STOP
1509