1 #include "forward_index.h"
2 #include "index.h"
3 #include "varint.h"
4 #include "spec.h"
5 #include <math.h>
6 #include <limits.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <sys/param.h>
10 #include "rmalloc.h"
11 #include "rmutil/rm_assert.h"
12
13 static int UI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit);
14 static inline int UI_ReadUnsorted(void *ctx, RSIndexResult **hit);
15 static int UI_ReadSorted(void *ctx, RSIndexResult **hit);
16 static size_t UI_NumEstimated(void *ctx);
17 static IndexCriteriaTester *UI_GetCriteriaTester(void *ctx);
18 static size_t UI_Len(void *ctx);
19
20 static int II_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit);
21 static int II_ReadUnsorted(void *ctx, RSIndexResult **hit);
22 static IndexCriteriaTester *II_GetCriteriaTester(void *ctx);
23 static int II_ReadSorted(void *ctx, RSIndexResult **hit);
24 static size_t II_NumEstimated(void *ctx);
25 static size_t II_Len(void *ctx);
26 static t_docId II_LastDocId(void *ctx);
27
28 #define CURRENT_RECORD(ii) (ii)->base.current
29
30 typedef struct {
31 IndexIterator base;
32 /**
33 * We maintain two iterator arrays. One is the original iterator list, and
34 * the other is the list of currently active iterators. When an iterator
35 * reaches EOF, it is set to NULL in the `its` list, but is still retained in
36 * the `origits` list, for the purpose of supporting things like Rewind() and
37 * Free()
38 */
39 IndexIterator **its;
40 IndexIterator **origits;
41 uint32_t num;
42 uint32_t norig;
43 uint32_t currIt;
44 t_docId minDocId;
45
46 // If set to 1, we exit skips after the first hit found and not merge further results
47 int quickExit;
48 size_t nexpected;
49 double weight;
50 uint64_t len;
51 } UnionIterator;
52
UI_LastDocId(void * ctx)53 static inline t_docId UI_LastDocId(void *ctx) {
54 return ((UnionIterator *)ctx)->minDocId;
55 }
56
UI_SyncIterList(UnionIterator * ui)57 static void UI_SyncIterList(UnionIterator *ui) {
58 ui->num = ui->norig;
59 memcpy(ui->its, ui->origits, sizeof(*ui->its) * ui->norig);
60 for (size_t ii = 0; ii < ui->num; ++ii) {
61 ui->its[ii]->minId = 0;
62 }
63 }
64
65 /**
66 * Removes the exhausted iterator from the active list, so that future
67 * reads will no longer iterate over it
68 */
UI_RemoveExhausted(UnionIterator * it,size_t badix)69 static size_t UI_RemoveExhausted(UnionIterator *it, size_t badix) {
70 // e.g. assume we have 10 entries, and we want to remove index 8, which means
71 // one more valid entry at the end. This means we use
72 // source: its + 8 + 1
73 // destination: its + 8
74 // number: it->len (10) - (8) - 1 == 1
75 memmove(it->its + badix, it->its + badix + 1, sizeof(*it->its) * (it->num - badix - 1));
76 it->num--;
77 // Repeat the same index again, because we have a new iterator at the same
78 // position
79 return badix - 1;
80 }
81
UI_Abort(void * ctx)82 static void UI_Abort(void *ctx) {
83 UnionIterator *it = ctx;
84 IITER_SET_EOF(&it->base);
85 for (int i = 0; i < it->num; i++) {
86 if (it->its[i]) {
87 it->its[i]->Abort(it->its[i]->ctx);
88 }
89 }
90 }
91
UI_Rewind(void * ctx)92 static void UI_Rewind(void *ctx) {
93 UnionIterator *ui = ctx;
94 IITER_CLEAR_EOF(&ui->base);
95 ui->minDocId = 0;
96 CURRENT_RECORD(ui)->docId = 0;
97
98 UI_SyncIterList(ui);
99
100 // rewind all child iterators
101 for (size_t i = 0; i < ui->num; i++) {
102 ui->its[i]->minId = 0;
103 ui->its[i]->Rewind(ui->its[i]->ctx);
104 }
105 }
106
NewUnionIterator(IndexIterator ** its,int num,DocTable * dt,int quickExit,double weight)107 IndexIterator *NewUnionIterator(IndexIterator **its, int num, DocTable *dt, int quickExit,
108 double weight) {
109 // create union context
110 UnionIterator *ctx = rm_calloc(1, sizeof(UnionIterator));
111 ctx->origits = its;
112 ctx->weight = weight;
113 ctx->num = num;
114 ctx->norig = num;
115 IITER_CLEAR_EOF(&ctx->base);
116 CURRENT_RECORD(ctx) = NewUnionResult(num, weight);
117 ctx->len = 0;
118 ctx->quickExit = quickExit;
119 ctx->its = rm_calloc(ctx->num, sizeof(*ctx->its));
120 ctx->nexpected = 0;
121 ctx->currIt = 0;
122
123 // bind the union iterator calls
124 IndexIterator *it = &ctx->base;
125 it->mode = MODE_SORTED;
126 it->ctx = ctx;
127 it->GetCriteriaTester = UI_GetCriteriaTester;
128 it->NumEstimated = UI_NumEstimated;
129 it->LastDocId = UI_LastDocId;
130 it->Read = UI_ReadSorted;
131 it->SkipTo = UI_SkipTo;
132 it->HasNext = NULL;
133 it->Free = UnionIterator_Free;
134 it->Len = UI_Len;
135 it->Abort = UI_Abort;
136 it->Rewind = UI_Rewind;
137 UI_SyncIterList(ctx);
138
139 for (size_t i = 0; i < num; ++i) {
140 ctx->nexpected += IITER_NUM_ESTIMATED(its[i]);
141 if (its[i]->mode == MODE_UNSORTED) {
142 it->mode = MODE_UNSORTED;
143 it->Read = UI_ReadUnsorted;
144 }
145 }
146
147 const size_t maxresultsSorted = RSGlobalConfig.maxResultsToUnsortedMode;
148 // this code is normally (and should be) dead.
149 // i.e. the deepest-most IndexIterator does not have a CT
150 // so it will always eventually return NULL CT
151 if (it->mode == MODE_SORTED && ctx->nexpected >= maxresultsSorted) {
152 // make sure all the children support CriteriaTester
153 int ctSupported = 1;
154 for (int i = 0; i < ctx->num; ++i) {
155 IndexCriteriaTester *tester = IITER_GET_CRITERIA_TESTER(ctx->origits[i]);
156 if (!tester) {
157 ctSupported = 0;
158 break;
159 }
160 tester->Free(tester);
161 }
162 if (ctSupported) {
163 it->mode = MODE_UNSORTED;
164 it->Read = UI_ReadUnsorted;
165 }
166 }
167
168 return it;
169 }
170
171 typedef struct {
172 IndexCriteriaTester base;
173 IndexCriteriaTester **children;
174 int nchildren;
175 } UnionCriteriaTester;
176
UI_Test(struct IndexCriteriaTester * ct,t_docId id)177 static int UI_Test(struct IndexCriteriaTester *ct, t_docId id) {
178 UnionCriteriaTester *uct = (UnionCriteriaTester *)ct;
179 for (int i = 0; i < uct->nchildren; ++i) {
180 if (uct->children[i]->Test(uct->children[i], id)) {
181 return 1;
182 }
183 }
184 return 0;
185 }
186
UI_TesterFree(struct IndexCriteriaTester * ct)187 static void UI_TesterFree(struct IndexCriteriaTester *ct) {
188 UnionCriteriaTester *uct = (UnionCriteriaTester *)ct;
189 for (int i = 0; i < uct->nchildren; ++i) {
190 if (uct->children[i]) {
191 uct->children[i]->Free(uct->children[i]);
192 }
193 }
194 rm_free(uct->children);
195 rm_free(uct);
196 }
197
UI_GetCriteriaTester(void * ctx)198 static IndexCriteriaTester *UI_GetCriteriaTester(void *ctx) {
199 UnionIterator *ui = ctx;
200 IndexCriteriaTester **children = rm_malloc(ui->num * sizeof(IndexCriteriaTester *));
201 for (size_t i = 0; i < ui->num; ++i) {
202 children[i] = IITER_GET_CRITERIA_TESTER(ui->origits[i]);
203 if (!children[i]) {
204 for (size_t j = 0; j < i; j++) {
205 children[j]->Free(children[j]);
206 }
207 rm_free(children);
208 return NULL;
209 }
210 }
211 UnionCriteriaTester *ct = rm_malloc(sizeof(*ct));
212 ct->children = children;
213 ct->nchildren = ui->num;
214 ct->base.Test = UI_Test;
215 ct->base.Free = UI_TesterFree;
216 return &ct->base;
217 }
218
UI_NumEstimated(void * ctx)219 static size_t UI_NumEstimated(void *ctx) {
220 UnionIterator *ui = ctx;
221 return ui->nexpected;
222 }
223
UI_ReadUnsorted(void * ctx,RSIndexResult ** hit)224 static inline int UI_ReadUnsorted(void *ctx, RSIndexResult **hit) {
225 UnionIterator *ui = ctx;
226 int rc = INDEXREAD_OK;
227 RSIndexResult *res = NULL;
228 while (ui->currIt < ui->num) {
229 rc = ui->origits[ui->currIt]->Read(ui->origits[ui->currIt]->ctx, &res);
230 if (rc == INDEXREAD_OK) {
231 *hit = res;
232 return rc;
233 }
234 ++ui->currIt;
235 }
236 return INDEXREAD_EOF;
237 }
238
UI_ReadSorted(void * ctx,RSIndexResult ** hit)239 static inline int UI_ReadSorted(void *ctx, RSIndexResult **hit) {
240 UnionIterator *ui = ctx;
241 // nothing to do
242 if (ui->num == 0 || !IITER_HAS_NEXT(&ui->base)) {
243 IITER_SET_EOF(&ui->base);
244 return INDEXREAD_EOF;
245 }
246
247 int numActive = 0;
248 AggregateResult_Reset(CURRENT_RECORD(ui));
249
250 do {
251
252 // find the minimal iterator
253 t_docId minDocId = UINT64_MAX;
254 IndexIterator *minIt = NULL;
255 numActive = 0;
256 int rc = INDEXREAD_EOF;
257 unsigned nits = ui->num;
258
259 for (unsigned i = 0; i < nits; i++) {
260 IndexIterator *it = ui->its[i];
261 RSIndexResult *res = IITER_CURRENT_RECORD(it);
262 rc = INDEXREAD_OK;
263 // if this hit is behind the min id - read the next entry
264 // printf("ui->docIds[%d]: %d, ui->minDocId: %d\n", i, ui->docIds[i], ui->minDocId);
265 while (it->minId <= ui->minDocId && rc != INDEXREAD_EOF) {
266 rc = INDEXREAD_NOTFOUND;
267 // read while we're not at the end and perhaps the flags do not match
268 while (rc == INDEXREAD_NOTFOUND) {
269 rc = it->Read(it->ctx, &res);
270 if (res) {
271 it->minId = res->docId;
272 }
273 }
274 }
275
276 if (rc != INDEXREAD_EOF) {
277 numActive++;
278 } else {
279 // Remove this from the active list
280 i = UI_RemoveExhausted(ui, i);
281 nits = ui->num;
282 continue;
283 }
284
285 if (rc == INDEXREAD_OK && res->docId <= minDocId) {
286 minDocId = res->docId;
287 minIt = it;
288 }
289 }
290
291 // take the minimum entry and collect all results matching to it
292 if (minIt) {
293 UI_SkipTo(ui, minIt->minId, hit);
294 // return INDEXREAD_OK;
295 ui->minDocId = minIt->minId;
296 ui->len++;
297 return INDEXREAD_OK;
298 }
299
300 } while (numActive > 0);
301 IITER_SET_EOF(&ui->base);
302
303 return INDEXREAD_EOF;
304 }
305
306 /**
307 Skip to the given docId, or one place after it
308 @param ctx IndexReader context
309 @param docId docId to seek to
310 @param hit an index hit we put our reads into
311 @return INDEXREAD_OK if found, INDEXREAD_NOTFOUND if not found, INDEXREAD_EOF
312 if
313 at EOF
314 */
UI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)315 static int UI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
316 UnionIterator *ui = ctx;
317 RS_LOG_ASSERT(ui->base.mode == MODE_SORTED, "union iterator mode is not MODE_SORTED");
318
319 // printf("UI %p skipto %d\n", ui, docId);
320
321 if (docId == 0) {
322 return UI_ReadSorted(ctx, hit);
323 }
324
325 if (!IITER_HAS_NEXT(&ui->base)) {
326 return INDEXREAD_EOF;
327 }
328
329 // reset the current hitf
330 AggregateResult_Reset(CURRENT_RECORD(ui));
331 CURRENT_RECORD(ui)->weight = ui->weight;
332 int numActive = 0;
333 int found = 0;
334 int rc = INDEXREAD_EOF;
335 unsigned num = ui->num;
336 const int quickExit = ui->quickExit;
337 t_docId minDocId = UINT32_MAX;
338 IndexIterator *it;
339 RSIndexResult *res;
340 RSIndexResult *minResult = NULL;
341 // skip all iterators to docId
342 for (unsigned i = 0; i < num; i++) {
343 it = ui->its[i];
344 // this happens for non existent words
345 res = NULL;
346 // If the requested docId is larger than the last read id from the iterator,
347 // we need to read an entry from the iterator, seeking to this docId
348 if (it->minId < docId) {
349 if ((rc = it->SkipTo(it->ctx, docId, &res)) == INDEXREAD_EOF) {
350 i = UI_RemoveExhausted(ui, i);
351 num = ui->num;
352 continue;
353 }
354 if (res) {
355 it->minId = res->docId;
356 }
357
358 } else {
359 // if the iterator is ahead of docId - we avoid reading the entry
360 // in this case, we are either past or at the requested docId, no need to actually read
361 rc = (it->minId == docId) ? INDEXREAD_OK : INDEXREAD_NOTFOUND;
362 res = IITER_CURRENT_RECORD(it);
363 }
364
365 // if we've read successfully, update the minimal docId we've found
366 if (it->minId && rc != INDEXREAD_EOF) {
367 if (it->minId < minDocId || !minResult) {
368 minResult = res;
369 minDocId = it->minId;
370 }
371 // sminDocId = MIN(ui->docIds[i], minDocId);
372 }
373
374 // we found a hit - continue to all results matching the same docId
375 if (rc == INDEXREAD_OK) {
376
377 // add the result to the aggregate result we are holding
378 if (hit) {
379 AggregateResult_AddChild(CURRENT_RECORD(ui), res ? res : IITER_CURRENT_RECORD(it));
380 }
381 ui->minDocId = it->minId;
382 ++found;
383 }
384 ++numActive;
385 // If we've found a single entry and we are iterating in quick exit mode - exit now
386 if (found && quickExit) break;
387 }
388
389 // all iterators are at the end
390 if (numActive == 0) {
391 IITER_SET_EOF(&ui->base);
392 return INDEXREAD_EOF;
393 }
394
395 // copy our aggregate to the upstream hit
396 *hit = CURRENT_RECORD(ui);
397 if (found > 0) {
398 return INDEXREAD_OK;
399 }
400 if (minResult) {
401 *hit = minResult;
402 AggregateResult_AddChild(CURRENT_RECORD(ui), minResult);
403 }
404 // not found...
405 ui->minDocId = minDocId;
406 return INDEXREAD_NOTFOUND;
407 }
408
UnionIterator_Free(IndexIterator * itbase)409 void UnionIterator_Free(IndexIterator *itbase) {
410 if (itbase == NULL) return;
411
412 UnionIterator *ui = itbase->ctx;
413 for (int i = 0; i < ui->norig; i++) {
414 IndexIterator *it = ui->origits[i];
415 if (it) {
416 it->Free(it);
417 }
418 }
419
420 IndexResult_Free(CURRENT_RECORD(ui));
421 rm_free(ui->its);
422 rm_free(ui->origits);
423 rm_free(ui);
424 }
425
UI_Len(void * ctx)426 static size_t UI_Len(void *ctx) {
427 return ((UnionIterator *)ctx)->len;
428 }
429
430 /* The context used by the intersection methods during iterating an intersect
431 * iterator */
432 typedef struct {
433 IndexIterator base;
434 IndexIterator **its;
435 IndexIterator *bestIt;
436 IndexCriteriaTester **testers;
437 t_docId *docIds;
438 int *rcs;
439 unsigned num;
440 size_t len;
441 int maxSlop;
442 int inOrder;
443 // the last read docId from any child
444 t_docId lastDocId;
445 // the last id that was found on all children
446 t_docId lastFoundId;
447
448 // RSIndexResult *result;
449 DocTable *docTable;
450 t_fieldMask fieldMask;
451 double weight;
452 size_t nexpected;
453 } IntersectIterator;
454
IntersectIterator_Free(IndexIterator * it)455 void IntersectIterator_Free(IndexIterator *it) {
456 if (it == NULL) return;
457 IntersectIterator *ui = it->ctx;
458 for (int i = 0; i < ui->num; i++) {
459 if (ui->its[i] != NULL) {
460 ui->its[i]->Free(ui->its[i]);
461 }
462 // IndexResult_Free(&ui->currentHits[i]);
463 }
464
465 for (int i = 0; i < array_len(ui->testers); i++) {
466 if (ui->testers[i]) {
467 ui->testers[i]->Free(ui->testers[i]);
468 }
469 }
470 if (ui->bestIt) {
471 ui->bestIt->Free(ui->bestIt);
472 }
473
474 rm_free(ui->docIds);
475 rm_free(ui->its);
476 IndexResult_Free(it->current);
477 array_free(ui->testers);
478 rm_free(it);
479 }
480
II_Abort(void * ctx)481 static void II_Abort(void *ctx) {
482 IntersectIterator *it = ctx;
483 it->base.isValid = 0;
484 for (int i = 0; i < it->num; i++) {
485 if (it->its[i]) {
486 it->its[i]->Abort(it->its[i]->ctx);
487 }
488 }
489 }
490
II_Rewind(void * ctx)491 static void II_Rewind(void *ctx) {
492 IntersectIterator *ii = ctx;
493 ii->base.isValid = 1;
494 ii->lastDocId = 0;
495
496 // rewind all child iterators
497 for (int i = 0; i < ii->num; i++) {
498 ii->docIds[i] = 0;
499 if (ii->its[i]) {
500 ii->its[i]->Rewind(ii->its[i]->ctx);
501 }
502 }
503 }
504
505 typedef int (*CompareFunc)(const void *a, const void *b);
cmpIter(IndexIterator ** it1,IndexIterator ** it2)506 static int cmpIter(IndexIterator **it1, IndexIterator **it2) {
507 if (!*it1 && !*it2) return 0;
508 if (!*it1) return -1;
509 if (!*it2) return 1;
510
511 return (int)((*it1)->NumEstimated((*it1)->ctx) - (*it2)->NumEstimated((*it2)->ctx));
512 }
513
II_SortChildren(IntersectIterator * ctx)514 static void II_SortChildren(IntersectIterator *ctx) {
515 /**
516 * 1. Go through all the iterators, ensuring none of them is NULL
517 * (replace with empty if indeed NULL)
518 * 2. If all the iterators are unsorted then set the mode to UNSORTED
519 * 3. If all or any of the iterators are sorted, then remove the
520 * unsorted iterators from the equation, simply adding them to the
521 * tester list
522 */
523 IndexIterator **unsortedIts = NULL;
524 IndexIterator **sortedIts = rm_malloc(sizeof(IndexIterator *) * ctx->num);
525 size_t sortedItsSize = 0;
526 for (size_t i = 0; i < ctx->num; ++i) {
527 IndexIterator *curit = ctx->its[i];
528
529 if (!curit) {
530 // If the current iterator is empty, then the entire
531 // query will fail; just free all the iterators and call it good
532 if (sortedIts) {
533 rm_free(sortedIts);
534 }
535 if (unsortedIts) {
536 array_free(unsortedIts);
537 }
538 ctx->bestIt = NULL;
539 return;
540 }
541
542 size_t amount = IITER_NUM_ESTIMATED(curit);
543 if (amount < ctx->nexpected) {
544 ctx->nexpected = amount;
545 ctx->bestIt = curit;
546 }
547
548 if (curit->mode == MODE_UNSORTED) {
549 unsortedIts = array_ensure_append(unsortedIts, &curit, 1, IndexIterator *);
550 } else {
551 sortedIts[sortedItsSize++] = curit;
552 }
553 }
554
555 if (unsortedIts) {
556 if (array_len(unsortedIts) == ctx->num) {
557 ctx->base.mode = MODE_UNSORTED;
558 ctx->base.Read = II_ReadUnsorted;
559 ctx->num = 1;
560 ctx->its[0] = ctx->bestIt;
561 // The other iterators are also stored in unsortedIts
562 // and because we know that there are no sorted iterators
563 }
564
565 for (size_t ii = 0; ii < array_len(unsortedIts); ++ii) {
566 IndexIterator *cur = unsortedIts[ii];
567 if (ctx->base.mode == MODE_UNSORTED && ctx->bestIt == cur) {
568 continue;
569 }
570 IndexCriteriaTester *tester = IITER_GET_CRITERIA_TESTER(cur);
571 ctx->testers = array_ensure_append(ctx->testers, &tester, 1, IndexCriteriaTester *);
572 cur->Free(cur);
573 }
574 } else {
575 ctx->bestIt = NULL;
576 }
577
578 rm_free(ctx->its);
579 ctx->its = sortedIts;
580 ctx->num = sortedItsSize;
581 array_free(unsortedIts);
582 }
583
NewIntersecIterator(IndexIterator ** its_,size_t num,DocTable * dt,t_fieldMask fieldMask,int maxSlop,int inOrder,double weight)584 IndexIterator *NewIntersecIterator(IndexIterator **its_, size_t num, DocTable *dt,
585 t_fieldMask fieldMask, int maxSlop, int inOrder, double weight) {
586 // printf("Creating new intersection iterator with fieldMask=%llx\n", fieldMask);
587 IntersectIterator *ctx = rm_calloc(1, sizeof(*ctx));
588 ctx->lastDocId = 0;
589 ctx->lastFoundId = 0;
590 ctx->len = 0;
591 ctx->maxSlop = maxSlop;
592 ctx->inOrder = inOrder;
593 ctx->fieldMask = fieldMask;
594 ctx->weight = weight;
595 ctx->docIds = rm_calloc(num, sizeof(t_docId));
596 ctx->docTable = dt;
597 ctx->nexpected = UINT32_MAX;
598
599 ctx->base.isValid = 1;
600 ctx->base.current = NewIntersectResult(num, weight);
601 ctx->its = its_;
602 ctx->num = num;
603
604 // Sort children iterators from low count to high count which reduces the number of iterations.
605 if (!ctx->inOrder) {
606 qsort(ctx->its, ctx->num, sizeof(*ctx->its), (CompareFunc)cmpIter);
607 }
608
609 // bind the iterator calls
610 IndexIterator *it = &ctx->base;
611 it->ctx = ctx;
612 it->LastDocId = II_LastDocId;
613 it->NumEstimated = II_NumEstimated;
614 it->GetCriteriaTester = II_GetCriteriaTester;
615 it->Read = II_ReadSorted;
616 it->SkipTo = II_SkipTo;
617 it->Len = II_Len;
618 it->Free = IntersectIterator_Free;
619 it->Abort = II_Abort;
620 it->Rewind = II_Rewind;
621 it->HasNext = NULL;
622 it->mode = MODE_SORTED;
623 II_SortChildren(ctx);
624 return it;
625 }
626
II_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)627 static int II_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
628 /* A seek with docId 0 is equivalent to a read */
629 if (docId == 0) {
630 return II_ReadSorted(ctx, hit);
631 }
632 IntersectIterator *ic = ctx;
633 AggregateResult_Reset(ic->base.current);
634 int nfound = 0;
635
636 int rc = INDEXREAD_EOF;
637 // skip all iterators to docId
638 for (int i = 0; i < ic->num; i++) {
639 IndexIterator *it = ic->its[i];
640
641 if (!it || !IITER_HAS_NEXT(it)) return INDEXREAD_EOF;
642
643 RSIndexResult *res = IITER_CURRENT_RECORD(it);
644 rc = INDEXREAD_OK;
645
646 // only read if we are not already at the seek to position
647 if (ic->docIds[i] != docId) {
648 rc = it->SkipTo(it->ctx, docId, &res);
649 if (rc != INDEXREAD_EOF) {
650 if (res) ic->docIds[i] = res->docId;
651 }
652 }
653
654 if (rc == INDEXREAD_EOF) {
655 // we are at the end!
656 ic->base.isValid = 0;
657 return rc;
658 } else if (rc == INDEXREAD_OK) {
659
660 // YAY! found!
661 if (res) {
662 AggregateResult_AddChild(ic->base.current, res);
663 }
664 ic->lastDocId = docId;
665
666 ++nfound;
667 } else if (ic->docIds[i] > ic->lastDocId) {
668 ic->lastDocId = ic->docIds[i];
669 break;
670 }
671 }
672
673 // unless we got an EOF - we put the current record into hit
674
675 // if the requested id was found on all children - we return OK
676 if (nfound == ic->num) {
677 // printf("Skipto %d hit @%d\n", docId, ic->current->docId);
678
679 // Update the last found id
680 // if maxSlop == -1 there is no need to verify maxSlop and inorder, otherwise lets verify
681 if (ic->maxSlop == -1 ||
682 IndexResult_IsWithinRange(ic->base.current, ic->maxSlop, ic->inOrder)) {
683 ic->lastFoundId = ic->base.current->docId;
684 if (hit) *hit = ic->base.current;
685 return INDEXREAD_OK;
686 }
687 }
688
689 // Not found - but we need to read the next valid result into hit
690 rc = II_ReadSorted(ic, hit);
691 // this might have brought us to our end, in which case we just terminate
692 if (rc == INDEXREAD_EOF) return INDEXREAD_EOF;
693
694 // otherwise - not found
695 return INDEXREAD_NOTFOUND;
696 }
697
II_ReadUnsorted(void * ctx,RSIndexResult ** hit)698 static int II_ReadUnsorted(void *ctx, RSIndexResult **hit) {
699 IntersectIterator *ic = ctx;
700 int rc = INDEXREAD_OK;
701 RSIndexResult *res = NULL;
702 while (1) {
703 rc = ic->bestIt->Read(ic->bestIt->ctx, &res);
704 if (rc == INDEXREAD_EOF) {
705 return INDEXREAD_EOF;
706 *hit = res;
707 return rc;
708 }
709 int isMatch = 1;
710 for (size_t i = 0; i < array_len(ic->testers); ++i) {
711 if (!ic->testers[i]->Test(ic->testers[i], res->docId)) {
712 isMatch = 0;
713 break;
714 }
715 }
716 if (!isMatch) {
717 continue;
718 }
719 *hit = res;
720 return rc;
721 }
722 }
723
724 typedef struct {
725 IndexCriteriaTester base;
726 IndexCriteriaTester **children;
727 } IICriteriaTester;
728
II_Test(struct IndexCriteriaTester * ct,t_docId id)729 static int II_Test(struct IndexCriteriaTester *ct, t_docId id) {
730 IICriteriaTester *ict = (IICriteriaTester *)ct;
731 for (size_t i = 0; i < array_len(ict->children); ++i) {
732 if (!ict->children[i]->Test(ict->children[i], id)) {
733 return 0;
734 }
735 }
736 return 1;
737 }
738
II_TesterFree(struct IndexCriteriaTester * ct)739 static void II_TesterFree(struct IndexCriteriaTester *ct) {
740 IICriteriaTester *ict = (IICriteriaTester *)ct;
741 for (size_t i = 0; i < array_len(ict->children); ++i) {
742 ict->children[i]->Free(ict->children[i]);
743 }
744 array_free(ict->children);
745 rm_free(ict);
746 }
747
II_GetCriteriaTester(void * ctx)748 static IndexCriteriaTester *II_GetCriteriaTester(void *ctx) {
749 IntersectIterator *ic = ctx;
750 for (size_t i = 0; i < ic->num; ++i) {
751 IndexCriteriaTester *tester = NULL;
752 if (ic->its[i]) {
753 tester = IITER_GET_CRITERIA_TESTER(ic->its[i]);
754 }
755 if (!tester) {
756 for (int j = 0; j < i; j++) {
757 ic->testers[j]->Free(ic->testers[j]);
758 }
759 array_free(ic->testers);
760 return NULL;
761 }
762 ic->testers = array_ensure_append(ic->testers, tester, 1, IndexCriteriaTester *);
763 }
764 IICriteriaTester *ict = rm_malloc(sizeof(*ict));
765 ict->children = ic->testers;
766 ic->testers = NULL;
767 ict->base.Test = II_Test;
768 ict->base.Free = II_TesterFree;
769 return &ict->base;
770 }
771
II_NumEstimated(void * ctx)772 static size_t II_NumEstimated(void *ctx) {
773 IntersectIterator *ic = ctx;
774 return ic->nexpected;
775 }
776
II_ReadSorted(void * ctx,RSIndexResult ** hit)777 static int II_ReadSorted(void *ctx, RSIndexResult **hit) {
778 IntersectIterator *ic = ctx;
779 if (ic->num == 0) return INDEXREAD_EOF;
780
781 int nh = 0;
782 int i = 0;
783
784 do {
785 nh = 0;
786 AggregateResult_Reset(ic->base.current);
787
788 for (i = 0; i < ic->num; i++) {
789 IndexIterator *it = ic->its[i];
790
791 if (!it) goto eof;
792
793 RSIndexResult *h = IITER_CURRENT_RECORD(it);
794 // skip to the next
795 int rc = INDEXREAD_OK;
796 if (ic->docIds[i] != ic->lastDocId || ic->lastDocId == 0) {
797
798 if (i == 0 && ic->docIds[i] >= ic->lastDocId) {
799 rc = it->Read(it->ctx, &h);
800 } else {
801 rc = it->SkipTo(it->ctx, ic->lastDocId, &h);
802 }
803 // printf("II %p last docId %d, it %d read docId %d(%d), rc %d\n", ic, ic->lastDocId, i,
804 // h->docId, it->LastDocId(it->ctx), rc);
805
806 if (rc == INDEXREAD_EOF) goto eof;
807 ic->docIds[i] = h->docId;
808 }
809
810 if (ic->docIds[i] > ic->lastDocId) {
811 ic->lastDocId = ic->docIds[i];
812 break;
813 }
814 if (rc == INDEXREAD_OK) {
815 ++nh;
816 AggregateResult_AddChild(ic->base.current, h);
817 } else {
818 ic->lastDocId++;
819 }
820 }
821
822 if (nh == ic->num) {
823 // printf("II %p HIT @ %d\n", ic, ic->current->docId);
824 // sum up all hits
825 if (hit != NULL) {
826 *hit = ic->base.current;
827 }
828 // Update the last valid found id
829 ic->lastFoundId = ic->base.current->docId;
830
831 // advance the doc id so next time we'll read a new record
832 ic->lastDocId++;
833
834 // // make sure the flags are matching.
835 if ((ic->base.current->fieldMask & ic->fieldMask) == 0) {
836 // printf("Field masks don't match!\n");
837 continue;
838 }
839
840 // If we need to match slop and order, we do it now, and possibly skip the result
841 if (ic->maxSlop >= 0) {
842 // printf("Checking SLOP... (%d)\n", ic->maxSlop);
843 if (!IndexResult_IsWithinRange(ic->base.current, ic->maxSlop, ic->inOrder)) {
844 // printf("Not within range!\n");
845 continue;
846 }
847 }
848
849 // for(size_t i = 0 ; i < array_len(ic->testers) ; ++i){
850 // if(!ic->testers[i]->TextCriteria(ic->testers[i]->ctx, ic->lastFoundId)){
851 // continue;
852 // }
853 // }
854
855 ic->len++;
856 // printf("Returning OK\n");
857 return INDEXREAD_OK;
858 }
859 } while (1);
860 eof:
861 ic->base.isValid = 0;
862 return INDEXREAD_EOF;
863 }
864
II_LastDocId(void * ctx)865 static t_docId II_LastDocId(void *ctx) {
866 // return last FOUND id, not last read id form any child
867 return ((IntersectIterator *)ctx)->lastFoundId;
868 }
869
II_Len(void * ctx)870 static size_t II_Len(void *ctx) {
871 return ((IntersectIterator *)ctx)->len;
872 }
873
874 /* A Not iterator works by wrapping another iterator, and returning OK for misses, and NOTFOUND
875 * for hits */
876 typedef struct {
877 IndexIterator base;
878 IndexIterator *child;
879 IndexCriteriaTester *childCT;
880 t_docId lastDocId;
881 t_docId maxDocId;
882 size_t len;
883 double weight;
884 } NotIterator, NotContext;
885
NI_Abort(void * ctx)886 static void NI_Abort(void *ctx) {
887 NotContext *nc = ctx;
888 if (nc->child) {
889 nc->child->Abort(nc->child->ctx);
890 }
891 }
892
NI_Rewind(void * ctx)893 static void NI_Rewind(void *ctx) {
894 NotContext *nc = ctx;
895 nc->lastDocId = 0;
896 nc->base.current->docId = 0;
897 if (nc->child) {
898 nc->child->Rewind(nc->child->ctx);
899 }
900 }
NI_Free(IndexIterator * it)901 static void NI_Free(IndexIterator *it) {
902
903 NotContext *nc = it->ctx;
904 if (nc->child) {
905 nc->child->Free(nc->child);
906 }
907 if (nc->childCT) {
908 nc->childCT->Free(nc->childCT);
909 }
910 IndexResult_Free(nc->base.current);
911 rm_free(it);
912 }
913
914 /* SkipTo for NOT iterator. If we have a match - return NOTFOUND. If we don't or we're at the end
915 * - return OK */
NI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)916 static int NI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
917 NotContext *nc = ctx;
918
919 // do not skip beyond max doc id
920 if (docId > nc->maxDocId) {
921 return INDEXREAD_EOF;
922 }
923 // If we don't have a child it means the sub iterator is of a meaningless expression.
924 // So negating it means we will always return OK!
925 if (!nc->child) {
926 goto ok;
927 }
928
929 // Get the child's last read docId
930 t_docId childId = nc->child->LastDocId(nc->child->ctx);
931
932 // If the child is ahead of the skipto id, it means the child doesn't have this id.
933 // So we are okay!
934 if (childId > docId) {
935 goto ok;
936 }
937
938 // If the child docId is the one we are looking for, it's an anti match!
939 if (childId == docId) {
940 nc->base.current->docId = docId;
941 nc->lastDocId = docId;
942 *hit = nc->base.current;
943 return INDEXREAD_NOTFOUND;
944 }
945
946 // read the next entry from the child
947 int rc = nc->child->SkipTo(nc->child->ctx, docId, hit);
948
949 // OK means not found
950 if (rc == INDEXREAD_OK) {
951 return INDEXREAD_NOTFOUND;
952 }
953
954 ok:
955 // NOT FOUND or end means OK. We need to set the docId on the hit we will bubble up
956 nc->base.current->docId = docId;
957 nc->lastDocId = docId;
958 *hit = nc->base.current;
959 return INDEXREAD_OK;
960 }
961
962 typedef struct {
963 IndexCriteriaTester base;
964 IndexCriteriaTester *child;
965 } NI_CriteriaTester;
966
NI_Test(struct IndexCriteriaTester * ct,t_docId id)967 static int NI_Test(struct IndexCriteriaTester *ct, t_docId id) {
968 NI_CriteriaTester *nct = (NI_CriteriaTester *)ct;
969 return !nct->child->Test(nct->child, id);
970 }
NI_TesterFree(struct IndexCriteriaTester * ct)971 static void NI_TesterFree(struct IndexCriteriaTester *ct) {
972 NI_CriteriaTester *nct = (NI_CriteriaTester *)ct;
973 nct->child->Free(nct->child);
974 rm_free(nct);
975 }
976
NI_GetCriteriaTester(void * ctx)977 static IndexCriteriaTester *NI_GetCriteriaTester(void *ctx) {
978 NotContext *nc = ctx;
979 if (!nc->child) {
980 return NULL;
981 }
982 IndexCriteriaTester *ct = IITER_GET_CRITERIA_TESTER(nc->child);
983 if (!ct) {
984 return NULL;
985 }
986 NI_CriteriaTester *nct = rm_malloc(sizeof(*nct));
987 nct->child = ct;
988 nct->base.Test = NI_Test;
989 nct->base.Free = NI_TesterFree;
990 return &nct->base;
991 }
992
NI_NumEstimated(void * ctx)993 static size_t NI_NumEstimated(void *ctx) {
994 NotContext *nc = ctx;
995 return nc->maxDocId;
996 }
997
NI_ReadUnsorted(void * ctx,RSIndexResult ** hit)998 static int NI_ReadUnsorted(void *ctx, RSIndexResult **hit) {
999 NotContext *nc = ctx;
1000 while (nc->lastDocId > nc->maxDocId) {
1001 if (!nc->childCT->Test(nc->childCT, nc->lastDocId)) {
1002 nc->base.current->docId = nc->lastDocId;
1003 *hit = nc->base.current;
1004 ++nc->lastDocId;
1005 return INDEXREAD_OK;
1006 }
1007 ++nc->lastDocId;
1008 }
1009 return INDEXREAD_EOF;
1010 }
1011
1012 /* Read from a NOT iterator. This is applicable only if the only or leftmost node of a query is a
1013 * NOT node. We simply read until max docId, skipping docIds that exist in the child*/
NI_ReadSorted(void * ctx,RSIndexResult ** hit)1014 static int NI_ReadSorted(void *ctx, RSIndexResult **hit) {
1015 NotContext *nc = ctx;
1016 if (nc->lastDocId > nc->maxDocId) return INDEXREAD_EOF;
1017
1018 RSIndexResult *cr = NULL;
1019 // if we have a child, get the latest result from the child
1020 if (nc->child) {
1021 cr = IITER_CURRENT_RECORD(nc->child);
1022
1023 if (cr == NULL || cr->docId == 0) {
1024 nc->child->Read(nc->child->ctx, &cr);
1025 }
1026 }
1027
1028 // advance our reader by one, and let's test if it's a valid value or not
1029 nc->base.current->docId++;
1030
1031 // If we don't have a child result, or the child result is ahead of the current counter,
1032 // we just increment our virtual result's id until we hit the child result's
1033 // in which case we'll read from the child and bypass it by one.
1034 if (cr == NULL || cr->docId > nc->base.current->docId) {
1035 goto ok;
1036 }
1037
1038 while (cr->docId == nc->base.current->docId) {
1039 // advance our docId to the next possible id
1040 nc->base.current->docId++;
1041
1042 // read the next entry from the child
1043 if (nc->child->Read(nc->child->ctx, &cr) == INDEXREAD_EOF) {
1044 break;
1045 }
1046 }
1047
1048 // make sure we did not overflow
1049 if (nc->base.current->docId > nc->maxDocId) {
1050 return INDEXREAD_EOF;
1051 }
1052
1053 ok:
1054 // Set the next entry and return ok
1055 nc->lastDocId = nc->base.current->docId;
1056 if (hit) *hit = nc->base.current;
1057 ++nc->len;
1058
1059 return INDEXREAD_OK;
1060 }
1061
1062 /* We always have next, in case anyone asks... ;) */
NI_HasNext(void * ctx)1063 static int NI_HasNext(void *ctx) {
1064 NotContext *nc = ctx;
1065
1066 return nc->lastDocId <= nc->maxDocId;
1067 }
1068
1069 /* Our len is the child's len? TBD it might be better to just return 0 */
NI_Len(void * ctx)1070 static size_t NI_Len(void *ctx) {
1071 NotContext *nc = ctx;
1072 return nc->len;
1073 }
1074
1075 /* Last docId */
NI_LastDocId(void * ctx)1076 static t_docId NI_LastDocId(void *ctx) {
1077 NotContext *nc = ctx;
1078
1079 return nc->lastDocId;
1080 }
1081
NewNotIterator(IndexIterator * it,t_docId maxDocId,double weight)1082 IndexIterator *NewNotIterator(IndexIterator *it, t_docId maxDocId, double weight) {
1083
1084 NotContext *nc = rm_malloc(sizeof(*nc));
1085 nc->base.current = NewVirtualResult(weight);
1086 nc->base.current->fieldMask = RS_FIELDMASK_ALL;
1087 nc->base.current->docId = 0;
1088 nc->child = it;
1089 nc->childCT = NULL;
1090 nc->lastDocId = 0;
1091 nc->maxDocId = maxDocId;
1092 nc->len = 0;
1093 nc->weight = weight;
1094
1095 IndexIterator *ret = &nc->base;
1096 ret->ctx = nc;
1097 ret->GetCriteriaTester = NI_GetCriteriaTester;
1098 ret->NumEstimated = NI_NumEstimated;
1099 ret->Free = NI_Free;
1100 ret->HasNext = NI_HasNext;
1101 ret->LastDocId = NI_LastDocId;
1102 ret->Len = NI_Len;
1103 ret->Read = NI_ReadSorted;
1104 ret->SkipTo = NI_SkipTo;
1105 ret->Abort = NI_Abort;
1106 ret->Rewind = NI_Rewind;
1107 ret->mode = MODE_SORTED;
1108
1109 if (nc->child && nc->child->mode == MODE_UNSORTED) {
1110 nc->childCT = IITER_GET_CRITERIA_TESTER(nc->child);
1111 RS_LOG_ASSERT(nc->childCT, "childCT should not be NULL");
1112 ret->Read = NI_ReadUnsorted;
1113 }
1114
1115 return ret;
1116 }
1117
1118 /**********************************************************
1119 * Optional clause iterator
1120 **********************************************************/
1121
1122 typedef struct {
1123 IndexIterator base;
1124 IndexIterator *child;
1125 IndexCriteriaTester *childCT;
1126 RSIndexResult *virt;
1127 t_fieldMask fieldMask;
1128 t_docId lastDocId;
1129 t_docId maxDocId;
1130 t_docId nextRealId;
1131 double weight;
1132 } OptionalMatchContext, OptionalIterator;
1133
OI_Free(IndexIterator * it)1134 static void OI_Free(IndexIterator *it) {
1135 OptionalMatchContext *nc = it->ctx;
1136 if (nc->child) {
1137 nc->child->Free(nc->child);
1138 }
1139 if (nc->childCT) {
1140 nc->childCT->Free(nc->childCT);
1141 }
1142 IndexResult_Free(nc->virt);
1143 rm_free(it);
1144 }
1145
OI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)1146 static int OI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
1147 OptionalMatchContext *nc = ctx;
1148 // printf("OI_SkipTo => %llu!. NextReal: %llu. Max: %llu. Last: %llu\n", docId, nc->nextRealId,
1149 // nc->maxDocId, nc->lastDocId);
1150
1151 int found = 0;
1152
1153 // Set the current ID
1154 nc->lastDocId = docId;
1155
1156 if (nc->lastDocId > nc->maxDocId) {
1157 return INDEXREAD_EOF;
1158 }
1159
1160 if (!nc->child) {
1161 nc->virt->docId = docId;
1162 nc->base.current = nc->virt;
1163 return INDEXREAD_OK;
1164 }
1165
1166 if (docId == 0) {
1167 return nc->base.Read(ctx, hit);
1168 }
1169
1170 if (docId == nc->nextRealId) {
1171 // Edge case -- match on the docid we just looked for
1172 found = 1;
1173 // reset current pointer since this might have been a prior
1174 // virt return
1175 nc->base.current = nc->child->current;
1176
1177 } else if (docId > nc->nextRealId) {
1178 int rc = nc->child->SkipTo(nc->child->ctx, docId, &nc->base.current);
1179 if (rc == INDEXREAD_OK) {
1180 found = 1;
1181 }
1182 if (nc->base.current) {
1183 nc->nextRealId = nc->base.current->docId;
1184 }
1185 }
1186
1187 if (found) {
1188 // Has a real hit
1189 RSIndexResult *r = nc->base.current;
1190 } else {
1191 nc->virt->docId = docId;
1192 nc->base.current = nc->virt;
1193 }
1194
1195 *hit = nc->base.current;
1196 return INDEXREAD_OK;
1197 }
1198
OI_Test(struct IndexCriteriaTester * ct,t_docId id)1199 static int OI_Test(struct IndexCriteriaTester *ct, t_docId id) {
1200 return 1;
1201 }
1202
OI_TesterFree(struct IndexCriteriaTester * ct)1203 static void OI_TesterFree(struct IndexCriteriaTester *ct) {
1204 rm_free(ct);
1205 }
1206
OI_GetCriteriaTester(void * ctx)1207 static IndexCriteriaTester *OI_GetCriteriaTester(void *ctx) {
1208 IndexCriteriaTester *tester = rm_malloc(sizeof(*tester));
1209 tester->Test = OI_Test;
1210 tester->Free = OI_TesterFree;
1211 return tester;
1212 }
1213
OI_NumEstimated(void * ctx)1214 static size_t OI_NumEstimated(void *ctx) {
1215 OptionalMatchContext *nc = ctx;
1216 return nc->maxDocId;
1217 }
1218
OI_ReadUnsorted(void * ctx,RSIndexResult ** hit)1219 static int OI_ReadUnsorted(void *ctx, RSIndexResult **hit) {
1220 OptionalMatchContext *nc = ctx;
1221 if (nc->lastDocId >= nc->maxDocId) return INDEXREAD_EOF;
1222 nc->lastDocId++;
1223 nc->base.current = nc->virt;
1224 nc->base.current->docId = nc->lastDocId;
1225 *hit = nc->base.current;
1226 if (nc->childCT->Test(nc->childCT, nc->lastDocId)) {
1227 nc->base.current->weight = nc->weight * 2; // we increase the weight cause we found the id
1228 } else {
1229 nc->base.current->weight = nc->weight * 2; // we do increase the weight cause id was not found
1230 }
1231 return INDEXREAD_OK;
1232 }
1233
1234 /* Read has no meaning in the sense of an OPTIONAL iterator, so we just read the next record from
1235 * our child */
OI_ReadSorted(void * ctx,RSIndexResult ** hit)1236 static int OI_ReadSorted(void *ctx, RSIndexResult **hit) {
1237 OptionalMatchContext *nc = ctx;
1238 if (nc->lastDocId >= nc->maxDocId) {
1239 return INDEXREAD_EOF;
1240 }
1241
1242 // Increase the size by one
1243 nc->lastDocId++;
1244
1245 if (nc->lastDocId > nc->nextRealId) {
1246 int rc = nc->child->Read(nc->child->ctx, &nc->base.current);
1247 if (rc == INDEXREAD_EOF) {
1248 nc->nextRealId = nc->maxDocId + 1;
1249 } else {
1250 nc->nextRealId = nc->base.current->docId;
1251 }
1252 }
1253
1254 if (nc->lastDocId != nc->nextRealId) {
1255 nc->base.current = nc->virt;
1256 nc->base.current->weight = 0;
1257 } else {
1258 nc->base.current->weight = nc->weight;
1259 }
1260
1261 nc->base.current->docId = nc->lastDocId;
1262 *hit = nc->base.current;
1263 return INDEXREAD_OK;
1264 }
1265
1266 /* We always have next, in case anyone asks... ;) */
OI_HasNext(void * ctx)1267 static int OI_HasNext(void *ctx) {
1268 OptionalMatchContext *nc = ctx;
1269 return (nc->lastDocId <= nc->maxDocId);
1270 }
1271
OI_Abort(void * ctx)1272 static void OI_Abort(void *ctx) {
1273 OptionalMatchContext *nc = ctx;
1274 if (nc->child) {
1275 nc->child->Abort(nc->child->ctx);
1276 }
1277 }
1278
1279 /* Our len is the child's len? TBD it might be better to just return 0 */
OI_Len(void * ctx)1280 static size_t OI_Len(void *ctx) {
1281 OptionalMatchContext *nc = ctx;
1282 return nc->child ? nc->child->Len(nc->child->ctx) : 0;
1283 }
1284
1285 /* Last docId */
OI_LastDocId(void * ctx)1286 static t_docId OI_LastDocId(void *ctx) {
1287 OptionalMatchContext *nc = ctx;
1288
1289 return nc->lastDocId;
1290 }
1291
OI_Rewind(void * ctx)1292 static void OI_Rewind(void *ctx) {
1293 OptionalMatchContext *nc = ctx;
1294 nc->lastDocId = 0;
1295 nc->virt->docId = 0;
1296 nc->nextRealId = 0;
1297 if (nc->child) {
1298 nc->child->Rewind(nc->child->ctx);
1299 }
1300 }
1301
NewOptionalIterator(IndexIterator * it,t_docId maxDocId,double weight)1302 IndexIterator *NewOptionalIterator(IndexIterator *it, t_docId maxDocId, double weight) {
1303 OptionalMatchContext *nc = rm_calloc(1, sizeof(*nc));
1304 nc->virt = NewVirtualResult(weight);
1305 nc->virt->fieldMask = RS_FIELDMASK_ALL;
1306 nc->virt->freq = 1;
1307 nc->base.current = nc->virt;
1308 nc->child = it;
1309 nc->childCT = NULL;
1310 nc->lastDocId = 0;
1311 nc->maxDocId = maxDocId;
1312 nc->weight = weight;
1313 nc->nextRealId = 0;
1314
1315 IndexIterator *ret = &nc->base;
1316 ret->ctx = nc;
1317 ret->GetCriteriaTester = OI_GetCriteriaTester;
1318 ret->NumEstimated = OI_NumEstimated;
1319 ret->Free = OI_Free;
1320 ret->HasNext = OI_HasNext;
1321 ret->LastDocId = OI_LastDocId;
1322 ret->Len = OI_Len;
1323 ret->Read = OI_ReadSorted;
1324 ret->SkipTo = OI_SkipTo;
1325 ret->Abort = OI_Abort;
1326 ret->Rewind = OI_Rewind;
1327 ret->mode = MODE_SORTED;
1328
1329 if (nc->child && nc->child->mode == MODE_UNSORTED) {
1330 nc->childCT = IITER_GET_CRITERIA_TESTER(nc->child);
1331 RS_LOG_ASSERT(nc->childCT, "childCT should not be NULL");
1332 ret->Read = OI_ReadUnsorted;
1333 }
1334 if (!nc->child) {
1335 nc->child = NewEmptyIterator();
1336 }
1337
1338 return ret;
1339 }
1340
1341 /* Wildcard iterator, matchin ALL documents in the index. This is used for one thing only -
1342 * purely negative queries. If the root of the query is a negative expression, we cannot process
1343 * it
1344 * without a positive expression. So we create a wildcard iterator that basically just iterates
1345 * all
1346 * the incremental document ids, and matches every skip within its range. */
1347 typedef struct {
1348 IndexIterator base;
1349 t_docId topId;
1350 t_docId current;
1351 } WildcardIterator, WildcardIteratorCtx;
1352
1353 /* Free a wildcard iterator */
WI_Free(IndexIterator * it)1354 static void WI_Free(IndexIterator *it) {
1355
1356 WildcardIteratorCtx *nc = it->ctx;
1357 IndexResult_Free(CURRENT_RECORD(nc));
1358 rm_free(it);
1359 }
1360
1361 /* Read reads the next consecutive id, unless we're at the end */
WI_Read(void * ctx,RSIndexResult ** hit)1362 static int WI_Read(void *ctx, RSIndexResult **hit) {
1363 WildcardIteratorCtx *nc = ctx;
1364 if (nc->current > nc->topId) {
1365 return INDEXREAD_EOF;
1366 }
1367 CURRENT_RECORD(nc)->docId = nc->current++;
1368 if (hit) {
1369 *hit = CURRENT_RECORD(nc);
1370 }
1371 return INDEXREAD_OK;
1372 }
1373
1374 /* Skipto for wildcard iterator - always succeeds, but this should normally not happen as it has
1375 * no
1376 * meaning */
WI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)1377 static int WI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
1378 // printf("WI_Skipto %d\n", docId);
1379 WildcardIteratorCtx *nc = ctx;
1380
1381 if (nc->current > nc->topId) return INDEXREAD_EOF;
1382
1383 if (docId == 0) return WI_Read(ctx, hit);
1384
1385 nc->current = docId;
1386 CURRENT_RECORD(nc)->docId = docId;
1387 if (hit) {
1388 *hit = CURRENT_RECORD(nc);
1389 }
1390 return INDEXREAD_OK;
1391 }
1392
WI_Abort(void * ctx)1393 static void WI_Abort(void *ctx) {
1394 WildcardIteratorCtx *nc = ctx;
1395 nc->current = nc->topId + 1;
1396 }
1397
1398 /* We always have next, in case anyone asks... ;) */
WI_HasNext(void * ctx)1399 static int WI_HasNext(void *ctx) {
1400 WildcardIteratorCtx *nc = ctx;
1401
1402 return nc->current <= nc->topId;
1403 }
1404
1405 /* Our len is the len of the index... */
WI_Len(void * ctx)1406 static size_t WI_Len(void *ctx) {
1407 WildcardIteratorCtx *nc = ctx;
1408 return nc->topId;
1409 }
1410
1411 /* Last docId */
WI_LastDocId(void * ctx)1412 static t_docId WI_LastDocId(void *ctx) {
1413 WildcardIteratorCtx *nc = ctx;
1414
1415 return nc->current;
1416 }
1417
WI_Rewind(void * p)1418 static void WI_Rewind(void *p) {
1419 WildcardIteratorCtx *ctx = p;
1420 ctx->current = 1;
1421 }
1422
WI_NumEstimated(void * p)1423 static size_t WI_NumEstimated(void *p) {
1424 return SIZE_MAX;
1425 }
1426
1427 /* Create a new wildcard iterator */
NewWildcardIterator(t_docId maxId)1428 IndexIterator *NewWildcardIterator(t_docId maxId) {
1429 WildcardIteratorCtx *c = rm_calloc(1, sizeof(*c));
1430 c->current = 1;
1431 c->topId = maxId;
1432
1433 CURRENT_RECORD(c) = NewVirtualResult(1);
1434 CURRENT_RECORD(c)->freq = 1;
1435 CURRENT_RECORD(c)->fieldMask = RS_FIELDMASK_ALL;
1436
1437 IndexIterator *ret = &c->base;
1438 ret->ctx = c;
1439 ret->Free = WI_Free;
1440 ret->HasNext = WI_HasNext;
1441 ret->LastDocId = WI_LastDocId;
1442 ret->Len = WI_Len;
1443 ret->Read = WI_Read;
1444 ret->SkipTo = WI_SkipTo;
1445 ret->Abort = WI_Abort;
1446 ret->Rewind = WI_Rewind;
1447 ret->NumEstimated = WI_NumEstimated;
1448 return ret;
1449 }
1450
EOI_Read(void * p,RSIndexResult ** e)1451 static int EOI_Read(void *p, RSIndexResult **e) {
1452 return INDEXREAD_EOF;
1453 }
EOI_Free(struct indexIterator * self)1454 static void EOI_Free(struct indexIterator *self) {
1455 // Nothing
1456 }
EOI_NumEstimated(void * ctx)1457 static size_t EOI_NumEstimated(void *ctx) {
1458 return 0;
1459 }
EOI_Len(void * ctx)1460 static size_t EOI_Len(void *ctx) {
1461 return 0;
1462 }
EOI_LastDocId(void * ctx)1463 static t_docId EOI_LastDocId(void *ctx) {
1464 return 0;
1465 }
1466
EOI_SkipTo(void * ctx,t_docId docId,RSIndexResult ** hit)1467 static int EOI_SkipTo(void *ctx, t_docId docId, RSIndexResult **hit) {
1468 return INDEXREAD_EOF;
1469 }
EOI_Abort(void * ctx)1470 static void EOI_Abort(void *ctx) {
1471 }
EOI_Rewind(void * ctx)1472 static void EOI_Rewind(void *ctx) {
1473 }
1474
1475 static IndexIterator eofIterator = {.Read = EOI_Read,
1476 .Free = EOI_Free,
1477 .SkipTo = EOI_SkipTo,
1478 .Len = EOI_Len,
1479 .LastDocId = EOI_LastDocId,
1480 .NumEstimated = EOI_NumEstimated,
1481 .Abort = EOI_Abort,
1482 .Rewind = EOI_Rewind};
1483
NewEmptyIterator(void)1484 IndexIterator *NewEmptyIterator(void) {
1485 return &eofIterator;
1486 }
1487
1488 // LCOV_EXCL_START unused
IndexIterator_GetTypeString(const IndexIterator * it)1489 const char *IndexIterator_GetTypeString(const IndexIterator *it) {
1490 if (it->Free == UnionIterator_Free) {
1491 return "UNION";
1492 } else if (it->Free == IntersectIterator_Free) {
1493 return "INTERSECTION";
1494 } else if (it->Free == OI_Free) {
1495 return "OPTIONAL";
1496 } else if (it->Free == WI_Free) {
1497 return "WILDCARD";
1498 } else if (it->Free == NI_Free) {
1499 return "NOT";
1500 } else if (it->Free == ReadIterator_Free) {
1501 return "IIDX";
1502 } else if (it == &eofIterator) {
1503 return "EMPTY";
1504 } else {
1505 return "Unknown";
1506 }
1507 }
1508 // LCOV_EXCL_STOP
1509