1 #include "../buffer.h"
2 #include "../index.h"
3 #include "../inverted_index.h"
4 #include "../index_result.h"
5 #include "../query_parser/tokenizer.h"
6 #include "../rmutil/alloc.h"
7 #include "../spec.h"
8 #include "../tokenize.h"
9 #include "../varint.h"
10 #include "../rmutil/alloc.h"
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14 #include <time.h>
15 #include <float.h>
16 #include <gtest/gtest.h>
17 #include <vector>
18 #include <cstdint>
19 
20 class IndexTest : public ::testing::Test {};
21 
offsetsFromVVW(const VarintVectorWriter * vvw)22 static RSOffsetVector offsetsFromVVW(const VarintVectorWriter *vvw) {
23   RSOffsetVector ret = {0};
24   ret.data = VVW_GetByteData(vvw);
25   ret.len = VVW_GetByteLength(vvw);
26   return ret;
27 }
28 
TEST_F(IndexTest,testVarint)29 TEST_F(IndexTest, testVarint) {
30   VarintVectorWriter *vw = NewVarintVectorWriter(8);
31   uint32_t expected[5] = {10, 1000, 1020, 10000, 10020};
32   for (int i = 0; i < 5; i++) {
33     VVW_Write(vw, expected[i]);
34   }
35 
36   // VVW_Write(vw, 100);
37   // printf("%ld %ld\n", BufferLen(vw->bw.buf), vw->bw.buf->cap);
38   VVW_Truncate(vw);
39 
40   RSOffsetVector vec = offsetsFromVVW(vw);
41   // Buffer_Seek(vw->bw.buf, 0);
42   RSOffsetIterator it = RSOffsetVector_Iterate(&vec, NULL);
43   int x = 0;
44   uint32_t n = 0;
45   while (RS_OFFSETVECTOR_EOF != (n = it.Next(it.ctx, NULL))) {
46     auto curexp = expected[x++];
47     ASSERT_EQ(curexp, n) << "Wrong number decoded";
48     // printf("%d %d\n", x, n);
49   }
50   it.Free(it.ctx);
51   VVW_Free(vw);
52 }
53 
TEST_F(IndexTest,testDistance)54 TEST_F(IndexTest, testDistance) {
55   VarintVectorWriter *vw = NewVarintVectorWriter(8);
56   VarintVectorWriter *vw2 = NewVarintVectorWriter(8);
57   VarintVectorWriter *vw3 = NewVarintVectorWriter(8);
58   VVW_Write(vw, 1);
59   VVW_Write(vw, 9);
60   VVW_Write(vw, 13);
61   VVW_Write(vw, 16);
62   VVW_Write(vw, 22);
63 
64   VVW_Write(vw2, 4);
65   VVW_Write(vw2, 7);
66   VVW_Write(vw2, 32);
67 
68   VVW_Write(vw3, 20);
69   VVW_Write(vw3, 25);
70 
71   VVW_Truncate(vw);
72   VVW_Truncate(vw2);
73 
74   RSIndexResult *tr1 = NewTokenRecord(NULL, 1);
75   tr1->docId = 1;
76   tr1->term.offsets = offsetsFromVVW(vw);
77 
78   RSIndexResult *tr2 = NewTokenRecord(NULL, 1);
79   tr2->docId = 1;
80   tr2->term.offsets = offsetsFromVVW(vw2);
81 
82   RSIndexResult *res = NewIntersectResult(2, 1);
83   AggregateResult_AddChild(res, tr1);
84   AggregateResult_AddChild(res, tr2);
85 
86   int delta = IndexResult_MinOffsetDelta(res);
87   ASSERT_EQ(2, delta);
88 
89   ASSERT_EQ(0, IndexResult_IsWithinRange(res, 0, 0));
90   ASSERT_EQ(0, IndexResult_IsWithinRange(res, 0, 1));
91   ASSERT_EQ(0, IndexResult_IsWithinRange(res, 1, 1));
92   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 1, 0));
93   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 2, 1));
94   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 2, 0));
95   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 3, 1));
96   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 4, 0));
97   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 4, 1));
98   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 5, 1));
99 
100   RSIndexResult *tr3 = NewTokenRecord(NULL, 1);
101   tr3->docId = 1;
102   tr3->term.offsets = offsetsFromVVW(vw3);
103   AggregateResult_AddChild(res, tr3);
104 
105   delta = IndexResult_MinOffsetDelta(res);
106   ASSERT_EQ(7, delta);
107 
108   // test merge iteration
109   RSOffsetIterator it = RSIndexResult_IterateOffsets(res);
110   uint32_t expected[] = {1, 4, 7, 9, 13, 16, 20, 22, 25, 32, RS_OFFSETVECTOR_EOF};
111 
112   uint32_t rc;
113   int i = 0;
114   do {
115     rc = it.Next(it.ctx, NULL);
116     ASSERT_EQ(rc, (expected[i++]));
117   } while (rc != RS_OFFSETVECTOR_EOF);
118   it.Free(it.ctx);
119 
120   IndexResult_Free(tr1);
121   IndexResult_Free(tr2);
122   IndexResult_Free(tr3);
123   IndexResult_Free(res);
124   VVW_Free(vw);
125   VVW_Free(vw2);
126   VVW_Free(vw3);
127 }
128 
129 class IndexFlagsTest : public testing::TestWithParam<int> {};
130 
TEST_P(IndexFlagsTest,testRWFlags)131 TEST_P(IndexFlagsTest, testRWFlags) {
132   IndexFlags indexFlags = (IndexFlags)GetParam();
133   InvertedIndex *idx = NewInvertedIndex(indexFlags, 1);
134 
135   IndexEncoder enc = InvertedIndex_GetEncoder(indexFlags);
136   ASSERT_TRUE(enc != NULL);
137 
138   for (size_t i = 0; i < 200; i++) {
139     // if (i % 10000 == 1) {
140     //     printf("iw cap: %ld, iw size: %d, numdocs: %d\n", w->cap, IW_Len(w),
141     //     w->ndocs);
142     // }
143 
144     ForwardIndexEntry h;
145     h.docId = i;
146     h.fieldMask = 1;
147     h.freq = (1 + i % 100) / (float)101;
148 
149     h.vw = NewVarintVectorWriter(8);
150     for (int n = 0; n < i % 4; n++) {
151       VVW_Write(h.vw, n);
152     }
153     VVW_Truncate(h.vw);
154 
155     InvertedIndex_WriteForwardIndexEntry(idx, enc, &h);
156 
157     // printf("doc %d, score %f offset %zd\n", h.docId, h.docScore, w->bw.buf->offset);
158     VVW_Free(h.vw);
159   }
160 
161   ASSERT_EQ(200, idx->numDocs);
162   ASSERT_EQ(2, idx->size);
163   ASSERT_EQ(199, idx->lastId);
164 
165   // IW_MakeSkipIndex(w, NewMemoryBuffer(8, BUFFER_WRITE));
166 
167   //   for (int x = 0; x < w->skipIdx.len; x++) {
168   //     printf("Skip entry %d: %d, %d\n", x, w->skipIdx.entries[x].docId,
169   //     w->skipIdx.entries[x].offset);
170   //   }
171   // printf("iw cap: %ld, iw size: %ld, numdocs: %d\n", w->bw.buf->cap, IW_Len(w), w->ndocs);
172 
173   for (int xx = 0; xx < 1; xx++) {
174     // printf("si: %d\n", si->len);
175     IndexReader *ir = NewTermIndexReader(idx, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
176     RSIndexResult *h = NULL;
177 
178     int n = 0;
179     int rc;
180     while (!ir->atEnd_) {
181       if ((rc = IR_Read(ir, &h)) == INDEXREAD_EOF) {
182         break;
183       }
184       ASSERT_EQ(INDEXREAD_OK, rc);
185       ASSERT_EQ(h->docId, n);
186       n++;
187     }
188     // for (int z= 0; z < 10; z++) {
189     // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time);
190 
191     // IR_SkipTo(ir, 900001, &h);
192 
193     // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_time);
194     // long diffInNanos = end_time.tv_nsec - start_time.tv_nsec;
195 
196     // printf("Time elapsed: %ldnano\n", diffInNanos);
197     // //IR_Free(ir);
198     // }
199     // IndexResult_Free(&h);
200     IR_Free(ir);
201   }
202 
203   // IW_Free(w);
204   // // overriding the regular IW_Free because we already deleted the buffer
205   InvertedIndex_Free(idx);
206 }
207 
208 INSTANTIATE_TEST_CASE_P(IndexFlagsP, IndexFlagsTest, ::testing::Range(1, 32));
209 
createIndex(int size,int idStep)210 InvertedIndex *createIndex(int size, int idStep) {
211   InvertedIndex *idx = NewInvertedIndex((IndexFlags)(INDEX_DEFAULT_FLAGS), 1);
212 
213   IndexEncoder enc = InvertedIndex_GetEncoder(idx->flags);
214   t_docId id = idStep;
215   for (int i = 0; i < size; i++) {
216     // if (i % 10000 == 1) {
217     //     printf("iw cap: %ld, iw size: %d, numdocs: %d\n", w->cap, IW_Len(w),
218     //     w->ndocs);
219     // }
220     ForwardIndexEntry h;
221     h.docId = id;
222     h.fieldMask = 1;
223     h.freq = 1;
224     h.term = "hello";
225     h.len = 5;
226 
227     h.vw = NewVarintVectorWriter(8);
228     for (int n = idStep; n < idStep + i % 4; n++) {
229       VVW_Write(h.vw, n);
230     }
231 
232     InvertedIndex_WriteForwardIndexEntry(idx, enc, &h);
233     VVW_Free(h.vw);
234 
235     id += idStep;
236   }
237 
238   // printf("BEFORE: iw cap: %ld, iw size: %zd, numdocs: %d\n", w->bw.buf->cap,
239   //        IW_Len(w), w->ndocs);
240 
241   return idx;
242 }
243 
printIntersect(void * ctx,RSIndexResult * hits,int argc)244 int printIntersect(void *ctx, RSIndexResult *hits, int argc) {
245   printf("intersect: %llu\n", (unsigned long long)hits[0].docId);
246   return 0;
247 }
248 
TEST_F(IndexTest,testReadIterator)249 TEST_F(IndexTest, testReadIterator) {
250   InvertedIndex *idx = createIndex(10, 1);
251 
252   IndexReader *r1 = NewTermIndexReader(idx, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
253 
254   RSIndexResult *h = NULL;
255 
256   IndexIterator *it = NewReadIterator(r1);
257   int i = 1;
258   while (IITER_HAS_NEXT(it)) {
259     if (it->Read(it->ctx, &h) == INDEXREAD_EOF) {
260       break;
261     }
262 
263     // printf("Iter got %d\n", h.docId);
264     ASSERT_EQ(h->docId, i);
265     i++;
266   }
267   ASSERT_EQ(11, i);
268 
269   it->Free(it);
270 
271   // IndexResult_Free(&h);
272   InvertedIndex_Free(idx);
273 }
274 
TEST_F(IndexTest,testUnion)275 TEST_F(IndexTest, testUnion) {
276   InvertedIndex *w = createIndex(10, 2);
277   InvertedIndex *w2 = createIndex(10, 3);
278   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
279   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
280 
281   // printf("Reading!\n");
282   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
283   irs[0] = NewReadIterator(r1);
284   irs[1] = NewReadIterator(r2);
285 
286   IndexIterator *ui = NewUnionIterator(irs, 2, NULL, 0, 1);
287   RSIndexResult *h = NULL;
288   int expected[] = {2, 3, 4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 24, 27, 30};
289   int i = 0;
290   while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
291     // printf("%d <=> %d\n", h.docId, expected[i]);
292     ASSERT_EQ(expected[i], h->docId);
293     i++;
294 
295     RSIndexResult *copy = IndexResult_DeepCopy(h);
296     ASSERT_TRUE(copy != NULL);
297     ASSERT_TRUE(copy != h);
298     ASSERT_TRUE(copy->isCopy);
299 
300     ASSERT_EQ(copy->docId, h->docId);
301     ASSERT_EQ(copy->type, h->type);
302 
303     IndexResult_Free(copy);
304 
305     // printf("%d, ", h.docId);
306   }
307 
308   ui->Free(ui);
309   // IndexResult_Free(&h);
310   InvertedIndex_Free(w);
311   InvertedIndex_Free(w2);
312 }
313 
TEST_F(IndexTest,testWeight)314 TEST_F(IndexTest, testWeight) {
315   InvertedIndex *w = createIndex(10, 1);
316   InvertedIndex *w2 = createIndex(10, 2);
317   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 0.5);  //
318   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
319 
320   // printf("Reading!\n");
321   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
322   irs[0] = NewReadIterator(r1);
323   irs[1] = NewReadIterator(r2);
324 
325   IndexIterator *ui = NewUnionIterator(irs, 2, NULL, 0, 0.8);
326   RSIndexResult *h = NULL;
327   int expected[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20};
328   int i = 0;
329   while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
330     // printf("%d <=> %d\n", h.docId, expected[i]);
331     ASSERT_EQ(h->docId, expected[i++]);
332     ASSERT_EQ(h->weight, 0.8);
333     if (h->agg.numChildren == 2) {
334       ASSERT_EQ(h->agg.children[0]->weight, 0.5);
335       ASSERT_EQ(h->agg.children[1]->weight, 1);
336     } else {
337       if (i <= 10) {
338         ASSERT_EQ(h->agg.children[0]->weight, 0.5);
339       } else {
340         ASSERT_EQ(h->agg.children[0]->weight, 1);
341       }
342     }
343   }
344 
345   ui->Free(ui);
346   // IndexResult_Free(&h);
347   InvertedIndex_Free(w);
348   InvertedIndex_Free(w2);
349 }
350 
TEST_F(IndexTest,testNot)351 TEST_F(IndexTest, testNot) {
352   InvertedIndex *w = createIndex(16, 1);
353   // not all numbers that divide by 3
354   InvertedIndex *w2 = createIndex(10, 3);
355   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
356   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
357 
358   // printf("Reading!\n");
359   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
360   irs[0] = NewReadIterator(r1);
361   irs[1] = NewNotIterator(NewReadIterator(r2), w2->lastId, 1);
362 
363   IndexIterator *ui = NewIntersecIterator(irs, 2, NULL, RS_FIELDMASK_ALL, -1, 0, 1);
364   RSIndexResult *h = NULL;
365   int expected[] = {1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16};
366   int i = 0;
367   while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
368     // printf("%d <=> %d\n", h->docId, expected[i]);
369     ASSERT_EQ(expected[i++], h->docId);
370     // printf("%d, ", h.docId);
371   }
372 
373   ui->Free(ui);
374   // IndexResult_Free(&h);
375   InvertedIndex_Free(w);
376   InvertedIndex_Free(w2);
377 }
378 
TEST_F(IndexTest,testPureNot)379 TEST_F(IndexTest, testPureNot) {
380   InvertedIndex *w = createIndex(10, 3);
381 
382   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
383   printf("last id: %llu\n", (unsigned long long)w->lastId);
384 
385   IndexIterator *ir = NewNotIterator(NewReadIterator(r1), w->lastId + 5, 1);
386 
387   RSIndexResult *h = NULL;
388   int expected[] = {1,  2,  4,  5,  7,  8,  10, 11, 13, 14, 16, 17, 19,
389                     20, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 35};
390   int i = 0;
391   while (ir->Read(ir->ctx, &h) != INDEXREAD_EOF) {
392 
393     // printf("%d <=> %d\n", h->docId, expected[i]);
394     ASSERT_EQ(expected[i++], h->docId);
395   }
396   ir->Free(ir);
397   InvertedIndex_Free(w);
398 }
399 
400 // Note -- in test_index.c, this test was never actually run!
TEST_F(IndexTest,DISABLED_testOptional)401 TEST_F(IndexTest, DISABLED_testOptional) {
402   InvertedIndex *w = createIndex(16, 1);
403   // not all numbers that divide by 3
404   InvertedIndex *w2 = createIndex(10, 3);
405   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
406   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
407 
408   // printf("Reading!\n");
409   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
410   irs[0] = NewReadIterator(r1);
411   irs[1] = NewOptionalIterator(NewReadIterator(r2), w2->lastId, 1);
412 
413   IndexIterator *ui = NewIntersecIterator(irs, 2, NULL, RS_FIELDMASK_ALL, -1, 0, 1);
414   RSIndexResult *h = NULL;
415 
416   int i = 1;
417   while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
418     // printf("%d <=> %d\n", h->docId, i);
419     ASSERT_EQ(i, h->docId);
420     if (i > 0 && i % 3 == 0) {
421       ASSERT_EQ(1, h->agg.children[1]->freq);
422     } else {
423       ASSERT_EQ(0, h->agg.children[1]->freq);
424     }
425     // printf("%d, ", h.docId);
426   }
427 
428   ui->Free(ui);
429   // IndexResult_Free(&h);
430   InvertedIndex_Free(w);
431   InvertedIndex_Free(w2);
432 }
433 
TEST_F(IndexTest,testNumericInverted)434 TEST_F(IndexTest, testNumericInverted) {
435 
436   InvertedIndex *idx = NewInvertedIndex(Index_StoreNumeric, 1);
437 
438   for (int i = 0; i < 75; i++) {
439     size_t sz = InvertedIndex_WriteNumericEntry(idx, i + 1, (double)(i + 1));
440     // printf("written %zd bytes\n", sz);
441 
442     ASSERT_TRUE(sz > 1);
443   }
444   ASSERT_EQ(75, idx->lastId);
445 
446   // printf("written %zd bytes\n", IndexBlock_DataLen(&idx->blocks[0]));
447 
448   IndexReader *ir = NewNumericReader(NULL, idx, NULL);
449   IndexIterator *it = NewReadIterator(ir);
450   RSIndexResult *res;
451   t_docId i = 1;
452   while (INDEXREAD_EOF != it->Read(it->ctx, &res)) {
453     // printf("%d %f\n", res->docId, res->num.value);
454 
455     ASSERT_EQ(i++, res->docId);
456     ASSERT_EQ(res->num.value, (float)res->docId);
457   }
458   InvertedIndex_Free(idx);
459   it->Free(it);
460 }
461 
TEST_F(IndexTest,testNumericVaried)462 TEST_F(IndexTest, testNumericVaried) {
463   InvertedIndex *idx = NewInvertedIndex(Index_StoreNumeric, 1);
464 
465   static const double nums[] = {0,          0.13,          0.001,     -0.1,     1.0,
466                                 5.0,        4.323,         65535,     65535.53, 32768.432,
467                                 1LLU << 32, -(1LLU << 32), 1LLU << 40};
468   static const size_t numCount = sizeof(nums) / sizeof(double);
469 
470   for (size_t i = 0; i < numCount; i++) {
471     size_t sz = InvertedIndex_WriteNumericEntry(idx, i + 1, nums[i]);
472     ASSERT_GT(sz, 1);
473     // printf("[%lu]: Stored %lf\n", i, nums[i]);
474   }
475 
476   IndexReader *ir = NewNumericReader(NULL, idx, NULL);
477   IndexIterator *it = NewReadIterator(ir);
478   RSIndexResult *res;
479 
480   for (size_t i = 0; i < numCount; i++) {
481     // printf("Checking i=%lu. Expected=%lf\n", i, nums[i]);
482     int rv = it->Read(it->ctx, &res);
483     ASSERT_NE(INDEXREAD_EOF, rv);
484     ASSERT_LT(fabs(nums[i] - res->num.value), 0.01);
485   }
486 
487   ASSERT_EQ(INDEXREAD_EOF, it->Read(it->ctx, &res));
488   InvertedIndex_Free(idx);
489   it->Free(it);
490 }
491 
492 typedef struct {
493   double value;
494   size_t size;
495 } encodingInfo;
496 static const encodingInfo infos[] = {
497     {0, 2},                    // 0
498     {1, 2},                    // 1
499     {63, 3},                   // 2
500     {-1, 3},                   // 3
501     {-63, 3},                  // 4
502     {64, 3},                   // 5
503     {-64, 3},                  // 6
504     {255, 3},                  // 7
505     {-255, 3},                 // 8
506     {65535, 4},                // 9
507     {-65535, 4},               // 10
508     {16777215, 5},             // 11
509     {-16777215, 5},            // 12
510     {4294967295, 6},           // 13
511     {-4294967295, 6},          // 14
512     {4294967295 + 1, 7},       // 15
513     {4294967295 + 2, 7},       // 16
514     {549755813888.0, 7},       // 17
515     {549755813888.0 + 2, 7},   // 18
516     {549755813888.0 - 23, 7},  // 19
517     {-549755813888.0, 7},      // 20
518     {1503342028.957225, 10},   // 21
519     {42.4345, 10},              // 22
520     {(float)0.5, 6},           // 23
521     {DBL_MAX, 10},             // 24
522     {UINT64_MAX >> 12, 9},     // 25
523     {INFINITY, 2},             // 26
524     {-INFINITY, 2}             // 27
525 };
526 
TEST_F(IndexTest,testNumericEncoding)527 TEST_F(IndexTest, testNumericEncoding) {
528   RSGlobalConfig.numericCompress = 0;
529 
530   static const size_t numInfos = sizeof(infos) / sizeof(infos[0]);
531   InvertedIndex *idx = NewInvertedIndex(Index_StoreNumeric, 1);
532   // printf("TestNumericEncoding\n");
533 
534   for (size_t ii = 0; ii < numInfos; ii++) {
535     // printf("\n[%lu]: Expecting Val=%lf, Sz=%lu\n", ii, infos[ii].value, infos[ii].size);
536     size_t sz = InvertedIndex_WriteNumericEntry(idx, ii + 1, infos[ii].value);
537     ASSERT_EQ(infos[ii].size, sz);
538   }
539 
540   IndexReader *ir = NewNumericReader(NULL, idx, NULL);
541   IndexIterator *it = NewReadIterator(ir);
542   RSIndexResult *res;
543 
544   for (size_t ii = 0; ii < numInfos; ii++) {
545     // printf("\nReading [%lu]\n", ii);
546 
547     int rc = it->Read(it->ctx, &res);
548     ASSERT_NE(rc, INDEXREAD_EOF);
549     // printf("%lf <-> %lf\n", infos[ii].value, res->num.value);
550     if (fabs(infos[ii].value) == INFINITY) {
551       ASSERT_EQ(infos[ii].value, res->num.value);
552     } else {
553       ASSERT_LT(fabs(infos[ii].value - res->num.value), 0.01);
554     }
555   }
556 
557   InvertedIndex_Free(idx);
558   it->Free(it);
559 }
560 
TEST_F(IndexTest,testAbort)561 TEST_F(IndexTest, testAbort) {
562 
563   InvertedIndex *w = createIndex(1000, 1);
564   IndexReader *r = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
565 
566   IndexIterator *it = NewReadIterator(r);
567   int n = 0;
568   RSIndexResult *res;
569   while (INDEXREAD_EOF != it->Read(it->ctx, &res)) {
570     if (n == 50) {
571       it->Abort(it->ctx);
572     }
573     n++;
574   }
575   ASSERT_EQ(51, n);
576   it->Free(it);
577   InvertedIndex_Free(w);
578 }
579 
TEST_F(IndexTest,testIntersection)580 TEST_F(IndexTest, testIntersection) {
581 
582   InvertedIndex *w = createIndex(100000, 4);
583   InvertedIndex *w2 = createIndex(100000, 2);
584   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
585   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
586 
587   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
588   irs[0] = NewReadIterator(r1);
589   irs[1] = NewReadIterator(r2);
590 
591   int count = 0;
592   IndexIterator *ii = NewIntersecIterator(irs, 2, NULL, RS_FIELDMASK_ALL, -1, 0, 1);
593 
594   RSIndexResult *h = NULL;
595 
596   uint32_t topFreq = 0;
597   while (ii->Read(ii->ctx, &h) != INDEXREAD_EOF) {
598     ASSERT_EQ(h->type, RSResultType_Intersection);
599     ASSERT_TRUE(RSIndexResult_IsAggregate(h));
600     ASSERT_TRUE(RSIndexResult_HasOffsets(h));
601     topFreq = topFreq > h->freq ? topFreq : h->freq;
602 
603     RSIndexResult *copy = IndexResult_DeepCopy(h);
604     ASSERT_TRUE(copy != NULL);
605     ASSERT_TRUE(copy != h);
606     ASSERT_TRUE(copy->isCopy == 1);
607 
608     ASSERT_TRUE(copy->docId == h->docId);
609     ASSERT_TRUE(copy->type == RSResultType_Intersection);
610     ASSERT_EQ((count * 2 + 2) * 2, h->docId);
611     ASSERT_EQ(count * 2 + 2, h->freq);
612     IndexResult_Free(copy);
613     ++count;
614   }
615 
616   // int count = IR_Intersect(r1, r2, onIntersect, &ctx);
617 
618   // printf("%d intersections in %lldms, %.0fns per iteration\n", count,
619   // TimeSampler_DurationMS(&ts),
620   // 1000000 * TimeSampler_IterationMS(&ts));
621   // printf("top freq: %f\n", topFreq);
622   ASSERT_EQ(count, 50000);
623   ASSERT_EQ(topFreq, 100000.0);
624 
625   ii->Free(ii);
626   // IndexResult_Free(&h);
627   InvertedIndex_Free(w);
628   InvertedIndex_Free(w2);
629 }
630 
TEST_F(IndexTest,testBuffer)631 TEST_F(IndexTest, testBuffer) {
632   // TEST_START();
633   Buffer b = {0};
634   Buffer_Init(&b, 2);
635   BufferWriter w = NewBufferWriter(&b);
636   ASSERT_TRUE(w.buf->cap == 2) << "Wrong capacity";
637   ASSERT_TRUE(w.buf->data != NULL);
638   ASSERT_TRUE(Buffer_Offset(w.buf) == 0);
639   ASSERT_TRUE(w.buf->data == w.pos);
640 
641   const char *x = "helololoolo";
642   size_t l = Buffer_Write(&w, (void *)x, strlen(x) + 1);
643 
644   ASSERT_TRUE(l == strlen(x) + 1);
645   ASSERT_TRUE(Buffer_Offset(w.buf) == l);
646   ASSERT_EQ(Buffer_Capacity(w.buf), 14);
647 
648   l = WriteVarint(1337654, &w);
649   ASSERT_TRUE(l == 3);
650   ASSERT_EQ(Buffer_Offset(w.buf), 15);
651   ASSERT_EQ(Buffer_Capacity(w.buf), 17);
652 
653   Buffer_Truncate(w.buf, 0);
654 
655   ASSERT_TRUE(Buffer_Capacity(w.buf) == 15);
656 
657   BufferReader br = NewBufferReader(w.buf);
658   ASSERT_TRUE(br.pos == 0);
659 
660   char *y = (char *)malloc(strlen(x) + 1);
661   l = Buffer_Read(&br, y, strlen(x) + 1);
662   ASSERT_TRUE(l == strlen(x) + 1);
663 
664   ASSERT_TRUE(strcmp(y, x) == 0);
665   ASSERT_TRUE(BufferReader_Offset(&br) == l);
666 
667   free(y);
668 
669   int n = ReadVarint(&br);
670   ASSERT_TRUE(n == 1337654);
671 
672   Buffer_Free(w.buf);
673 }
674 
675 typedef struct {
676   int num;
677   char **expected;
678 
679 } tokenContext;
680 
tokenFunc(void * ctx,const Token * t)681 int tokenFunc(void *ctx, const Token *t) {
682   tokenContext *tx = (tokenContext *)ctx;
683   int ret = strncmp(t->tok, tx->expected[tx->num++], t->tokLen);
684   EXPECT_TRUE(ret == 0);
685   EXPECT_TRUE(t->pos > 0);
686   return 0;
687 }
688 
689 // int testTokenize() {
690 //   char *txt = strdup("Hello? world...   ? -WAZZ@UP? שלום");
691 //   tokenContext ctx = {0};
692 //   const char *expected[] = {"hello", "world", "wazz", "up", "שלום"};
693 //   ctx.expected = (char **)expected;
694 
695 //   tokenize(txt, &ctx, tokenFunc, NULL, 0, DefaultStopWordList(), 0);
696 //   ASSERT_TRUE(ctx.num == 5);
697 
698 //   free(txt);
699 
700 //   return 0;
701 // }
702 
703 // int testForwardIndex() {
704 
705 //   Document doc = NewDocument(NULL, 1, 1, "english");
706 //   doc.docId = 1;
707 //   doc.fields[0] = N
708 //   ForwardIndex *idx = NewForwardIndex(doc);
709 //   char *txt = strdup("Hello? world...  hello hello ? __WAZZ@UP? שלום");
710 //   tokenize(txt, 1, 1, idx, forwardIndexTokenFunc);
711 
712 //   return 0;
713 // }
714 
TEST_F(IndexTest,testIndexSpec)715 TEST_F(IndexTest, testIndexSpec) {
716   const char *title = "title", *body = "body", *foo = "foo", *bar = "bar", *name = "name";
717   const char *args[] = {"STOPWORDS", "2",      "hello", "world",    "SCHEMA", title,
718                         "text",      "weight", "0.1",   body,       "text",   "weight",
719                         "2.0",       foo,      "text",  "sortable", bar,      "numeric",
720                         "sortable",  name,     "text",  "nostem"};
721   QueryError err = {QUERY_OK};
722   IndexSpec *s = IndexSpec_Parse("idx", args, sizeof(args) / sizeof(const char *), &err);
723   ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
724   ASSERT_TRUE(s);
725   ASSERT_TRUE(s->numFields == 5);
726   ASSERT_TRUE(s->stopwords != NULL);
727   ASSERT_TRUE(s->stopwords != DefaultStopWordList());
728   ASSERT_TRUE(s->flags & Index_StoreFieldFlags);
729   ASSERT_TRUE(s->flags & Index_StoreTermOffsets);
730   ASSERT_TRUE(s->flags & Index_HasCustomStopwords);
731 
732   ASSERT_TRUE(IndexSpec_IsStopWord(s, "hello", 5));
733   ASSERT_TRUE(IndexSpec_IsStopWord(s, "world", 5));
734   ASSERT_TRUE(!IndexSpec_IsStopWord(s, "werld", 5));
735 
736   const FieldSpec *f = IndexSpec_GetField(s, body, strlen(body));
737   ASSERT_TRUE(f != NULL);
738   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
739   ASSERT_STREQ(f->name, body);
740   ASSERT_EQ(f->ftWeight, 2.0);
741   ASSERT_EQ(FIELD_BIT(f), 2);
742   ASSERT_EQ(f->options, 0);
743   ASSERT_EQ(f->sortIdx, -1);
744 
745   f = IndexSpec_GetField(s, title, strlen(title));
746   ASSERT_TRUE(f != NULL);
747   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
748   ASSERT_TRUE(strcmp(f->name, title) == 0);
749   ASSERT_TRUE(f->ftWeight == 0.1);
750   ASSERT_TRUE(FIELD_BIT(f) == 1);
751   ASSERT_TRUE(f->options == 0);
752   ASSERT_TRUE(f->sortIdx == -1);
753 
754   f = IndexSpec_GetField(s, foo, strlen(foo));
755   ASSERT_TRUE(f != NULL);
756   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
757   ASSERT_TRUE(strcmp(f->name, foo) == 0);
758   ASSERT_TRUE(f->ftWeight == 1);
759   ASSERT_TRUE(FIELD_BIT(f) == 4);
760   ASSERT_TRUE(f->options == FieldSpec_Sortable);
761   ASSERT_TRUE(f->sortIdx == 0);
762 
763   f = IndexSpec_GetField(s, bar, strlen(bar));
764   ASSERT_TRUE(f != NULL);
765   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_NUMERIC));
766 
767   ASSERT_TRUE(strcmp(f->name, bar) == 0);
768   ASSERT_TRUE(f->options == FieldSpec_Sortable);
769   ASSERT_TRUE(f->sortIdx == 1);
770   ASSERT_TRUE(IndexSpec_GetField(s, "fooz", 4) == NULL);
771 
772   f = IndexSpec_GetField(s, name, strlen(name));
773   ASSERT_TRUE(f != NULL);
774   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
775   ASSERT_TRUE(strcmp(f->name, name) == 0);
776   ASSERT_TRUE(f->ftWeight == 1);
777   ASSERT_TRUE(FIELD_BIT(f) == 8);
778   ASSERT_TRUE(f->options == FieldSpec_NoStemming);
779   ASSERT_TRUE(f->sortIdx == -1);
780 
781   ASSERT_TRUE(s->sortables != NULL);
782   ASSERT_TRUE(s->sortables->len == 2);
783   int rc = IndexSpec_GetFieldSortingIndex(s, foo, strlen(foo));
784   ASSERT_EQ(0, rc);
785   rc = IndexSpec_GetFieldSortingIndex(s, bar, strlen(bar));
786   ASSERT_EQ(1, rc);
787   rc = IndexSpec_GetFieldSortingIndex(s, title, strlen(title));
788   ASSERT_EQ(-1, rc);
789 
790   IndexSpec_Free(s);
791 
792   QueryError_ClearError(&err);
793   const char *args2[] = {
794       "NOOFFSETS", "NOFIELDS", "SCHEMA", title, "text",
795   };
796   s = IndexSpec_Parse("idx", args2, sizeof(args2) / sizeof(const char *), &err);
797   ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
798   ASSERT_TRUE(s);
799   ASSERT_TRUE(s->numFields == 1);
800 
801   ASSERT_TRUE(!(s->flags & Index_StoreFieldFlags));
802   ASSERT_TRUE(!(s->flags & Index_StoreTermOffsets));
803   IndexSpec_Free(s);
804 
805   // User-reported bug
806   const char *args3[] = {"SCHEMA", "ha", "NUMERIC", "hb", "TEXT", "WEIGHT", "1", "NOSTEM"};
807   QueryError_ClearError(&err);
808   s = IndexSpec_Parse("idx", args3, sizeof(args3) / sizeof(args3[0]), &err);
809   ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
810   ASSERT_TRUE(s);
811   ASSERT_TRUE(FieldSpec_IsNoStem(s->fields + 1));
812   IndexSpec_Free(s);
813 }
814 
fillSchema(std::vector<char * > & args,size_t nfields)815 static void fillSchema(std::vector<char *> &args, size_t nfields) {
816   args.resize(1 + nfields * 3);
817   args[0] = strdup("SCHEMA");
818   size_t n = 1;
819   for (unsigned i = 0; i < nfields; i++) {
820     asprintf(&args[n++], "field%u", i);
821     if (i % 2 == 0) {
822       args[n++] = strdup("TEXT");
823     } else {
824       if (i < 40) {
825         // odd fields under 40 are TEXT noINDEX
826         args[n++] = strdup("TEXT");
827         args[n++] = strdup("NOINDEX");
828       } else {
829         // the rest are numeric
830         args[n++] = strdup("NUMERIC");
831       }
832     }
833   }
834   args.resize(n);
835 
836   // for (int i = 0; i < n; i++) {
837   //   printf("%s ", args[i]);
838   // }
839   // printf("\n");
840 }
841 
freeSchemaArgs(std::vector<char * > & args)842 static void freeSchemaArgs(std::vector<char *> &args) {
843   for (auto s : args) {
844     free(s);
845   }
846   args.clear();
847 }
848 
TEST_F(IndexTest,testHugeSpec)849 TEST_F(IndexTest, testHugeSpec) {
850   int N = 64;
851   std::vector<char *> args;
852   fillSchema(args, N);
853 
854   QueryError err = {QUERY_OK};
855   IndexSpec *s = IndexSpec_Parse("idx", (const char **)&args[0], args.size(), &err);
856   ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
857   ASSERT_TRUE(s);
858   ASSERT_TRUE(s->numFields == N);
859   IndexSpec_Free(s);
860   freeSchemaArgs(args);
861 
862   // test too big a schema
863   N = 300;
864   fillSchema(args, N);
865 
866   QueryError_ClearError(&err);
867   s = IndexSpec_Parse("idx", (const char **)&args[0], args.size(), &err);
868   ASSERT_TRUE(s == NULL);
869   ASSERT_TRUE(QueryError_HasError(&err));
870   ASSERT_STREQ("Too many TEXT fields in schema", QueryError_GetError(&err));
871   freeSchemaArgs(args);
872   QueryError_ClearError(&err);
873 }
874 
875 typedef union {
876 
877   int i;
878   float f;
879 } u;
880 
TEST_F(IndexTest,testIndexFlags)881 TEST_F(IndexTest, testIndexFlags) {
882 
883   ForwardIndexEntry h;
884   h.docId = 1234;
885   h.fieldMask = 0x01;
886   h.freq = 1;
887   h.vw = NewVarintVectorWriter(8);
888   for (int n = 0; n < 10; n++) {
889     VVW_Write(h.vw, n);
890   }
891   VVW_Truncate(h.vw);
892 
893   uint32_t flags = INDEX_DEFAULT_FLAGS;
894   InvertedIndex *w = NewInvertedIndex(IndexFlags(flags), 1);
895   IndexEncoder enc = InvertedIndex_GetEncoder(w->flags);
896   ASSERT_TRUE(w->flags == flags);
897   size_t sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
898   // printf("written %zd bytes. Offset=%zd\n", sz, h.vw->buf.offset);
899   ASSERT_EQ(15, sz);
900   InvertedIndex_Free(w);
901 
902   flags &= ~Index_StoreTermOffsets;
903   w = NewInvertedIndex(IndexFlags(flags), 1);
904   ASSERT_TRUE(!(w->flags & Index_StoreTermOffsets));
905   enc = InvertedIndex_GetEncoder(w->flags);
906   size_t sz2 = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
907   // printf("Wrote %zd bytes. Offset=%zd\n", sz2, h.vw->buf.offset);
908   ASSERT_EQ(sz2, sz - Buffer_Offset(&h.vw->buf) - 1);
909   InvertedIndex_Free(w);
910 
911   flags = INDEX_DEFAULT_FLAGS | Index_WideSchema;
912   w = NewInvertedIndex(IndexFlags(flags), 1);
913   ASSERT_TRUE((w->flags & Index_WideSchema));
914   enc = InvertedIndex_GetEncoder(w->flags);
915   h.fieldMask = 0xffffffffffff;
916   ASSERT_EQ(21, InvertedIndex_WriteForwardIndexEntry(w, enc, &h));
917   InvertedIndex_Free(w);
918 
919   flags |= Index_WideSchema;
920   w = NewInvertedIndex(IndexFlags(flags), 1);
921   ASSERT_TRUE((w->flags & Index_WideSchema));
922   enc = InvertedIndex_GetEncoder(w->flags);
923   h.fieldMask = 0xffffffffffff;
924   sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
925   ASSERT_EQ(21, sz);
926   InvertedIndex_Free(w);
927 
928   flags &= Index_StoreFreqs;
929   w = NewInvertedIndex(IndexFlags(flags), 1);
930   ASSERT_TRUE(!(w->flags & Index_StoreTermOffsets));
931   ASSERT_TRUE(!(w->flags & Index_StoreFieldFlags));
932   enc = InvertedIndex_GetEncoder(w->flags);
933   sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
934   ASSERT_EQ(3, sz);
935   InvertedIndex_Free(w);
936 
937   flags |= Index_StoreFieldFlags | Index_WideSchema;
938   w = NewInvertedIndex(IndexFlags(flags), 1);
939   ASSERT_TRUE((w->flags & Index_WideSchema));
940   ASSERT_TRUE((w->flags & Index_StoreFieldFlags));
941   enc = InvertedIndex_GetEncoder(w->flags);
942   h.fieldMask = 0xffffffffffff;
943   sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
944   ASSERT_EQ(10, sz);
945   InvertedIndex_Free(w);
946 
947   VVW_Free(h.vw);
948 }
949 
TEST_F(IndexTest,testDocTable)950 TEST_F(IndexTest, testDocTable) {
951   char buf[16];
952   DocTable dt = NewDocTable(10, 10);
953   t_docId did = 0;
954   // N is set to 100 and the max cap of the doc table is 10 so we surely will
955   // get overflow and check that everything works correctly
956   int N = 100;
957   for (int i = 0; i < N; i++) {
958     size_t nkey = sprintf(buf, "doc_%d", i);
959     t_docId nd = DocTable_Put(&dt, buf, nkey, (double)i, Document_DefaultFlags, buf, strlen(buf));
960     ASSERT_EQ(did + 1, nd);
961     did = nd;
962   }
963 
964   ASSERT_EQ(N + 1, dt.size);
965   ASSERT_EQ(N, dt.maxDocId);
966 #ifdef __x86_64__
967   ASSERT_EQ(10980, (int)dt.memsize);
968 #endif
969   for (int i = 0; i < N; i++) {
970     sprintf(buf, "doc_%d", i);
971     const char *key = DocTable_GetKey(&dt, i + 1, NULL);
972     ASSERT_STREQ(key, buf);
973 
974     float score = DocTable_GetScore(&dt, i + 1);
975     ASSERT_EQ((int)score, i);
976 
977     RSDocumentMetadata *dmd = DocTable_Get(&dt, i + 1);
978     DMD_Incref(dmd);
979     ASSERT_TRUE(dmd != NULL);
980     ASSERT_TRUE(dmd->flags & Document_HasPayload);
981     ASSERT_STREQ(dmd->keyPtr, buf);
982     char *pl = dmd->payload->data;
983     ASSERT_TRUE(!(strncmp(pl, (char *)buf, dmd->payload->len)));
984 
985     ASSERT_EQ((int)dmd->score, i);
986     ASSERT_EQ((int)dmd->flags, (int)(Document_DefaultFlags | Document_HasPayload));
987 
988     t_docId xid = DocIdMap_Get(&dt.dim, buf, strlen(buf));
989 
990     ASSERT_EQ((int)xid, i + 1);
991 
992     int rc = DocTable_Delete(&dt, dmd->keyPtr, sdslen(dmd->keyPtr));
993     ASSERT_EQ(1, rc);
994     ASSERT_TRUE((int)(dmd->flags & Document_Deleted));
995     DMD_Decref(dmd);
996     dmd = DocTable_Get(&dt, i + 1);
997     ASSERT_TRUE(!dmd);
998   }
999 
1000   ASSERT_FALSE(DocIdMap_Get(&dt.dim, "foo bar", strlen("foo bar")));
1001   ASSERT_FALSE(DocTable_Get(&dt, N + 2));
1002 
1003   t_docId strDocId = DocTable_Put(&dt, "Hello", 5, 1.0, 0, NULL, 0);
1004   ASSERT_TRUE(0 != strDocId);
1005 
1006   // Test that binary keys also work here
1007   static const char binBuf[] = {"Hello\x00World"};
1008   const size_t binBufLen = 11;
1009   ASSERT_FALSE(DocIdMap_Get(&dt.dim, binBuf, binBufLen));
1010   t_docId binDocId = DocTable_Put(&dt, binBuf, binBufLen, 1.0, 0, NULL, 0);
1011   ASSERT_TRUE(binDocId);
1012   ASSERT_NE(binDocId, strDocId);
1013   ASSERT_EQ(binDocId, DocIdMap_Get(&dt.dim, binBuf, binBufLen));
1014   ASSERT_EQ(strDocId, DocIdMap_Get(&dt.dim, "Hello", 5));
1015   DocTable_Free(&dt);
1016 }
1017 
TEST_F(IndexTest,testSortable)1018 TEST_F(IndexTest, testSortable) {
1019   RSSortingTable *tbl = NewSortingTable();
1020   RSSortingTable_Add(tbl, "foo", RSValue_String);
1021   RSSortingTable_Add(tbl, "bar", RSValue_String);
1022   RSSortingTable_Add(tbl, "baz", RSValue_String);
1023   ASSERT_EQ(3, tbl->len);
1024 
1025   ASSERT_STREQ("foo", tbl->fields[0].name);
1026   ASSERT_EQ(RSValue_String, tbl->fields[0].type);
1027   ASSERT_STREQ("bar", tbl->fields[1].name);
1028   ASSERT_STREQ("baz", tbl->fields[2].name);
1029   ASSERT_EQ(0, RSSortingTable_GetFieldIdx(tbl, "foo"));
1030   ASSERT_EQ(0, RSSortingTable_GetFieldIdx(tbl, "FoO"));
1031   ASSERT_EQ(-1, RSSortingTable_GetFieldIdx(NULL, "FoO"));
1032 
1033   ASSERT_EQ(1, RSSortingTable_GetFieldIdx(tbl, "bar"));
1034   ASSERT_EQ(-1, RSSortingTable_GetFieldIdx(tbl, "barbar"));
1035 
1036   RSSortingVector *v = NewSortingVector(tbl->len);
1037   ASSERT_EQ(v->len, tbl->len);
1038 
1039   const char *str = "hello";
1040   const char *masse = "Maße";
1041   double num = 3.141;
1042   ASSERT_TRUE(RSValue_IsNull(v->values[0]));
1043   RSSortingVector_Put(v, 0, str, RS_SORTABLE_STR);
1044   ASSERT_EQ(v->values[0]->t, RSValue_String);
1045   ASSERT_EQ(v->values[0]->strval.stype, RSString_RMAlloc);
1046 
1047   ASSERT_TRUE(RSValue_IsNull(v->values[1]));
1048   ASSERT_TRUE(RSValue_IsNull(v->values[2]));
1049   RSSortingVector_Put(v, 1, &num, RSValue_Number);
1050   ASSERT_EQ(v->values[1]->t, RS_SORTABLE_NUM);
1051 
1052   RSSortingVector *v2 = NewSortingVector(tbl->len);
1053   RSSortingVector_Put(v2, 0, masse, RS_SORTABLE_STR);
1054 
1055   /// test string unicode lowercase normalization
1056   ASSERT_STREQ("masse", v2->values[0]->strval.str);
1057 
1058   double s2 = 4.444;
1059   RSSortingVector_Put(v2, 1, &s2, RS_SORTABLE_NUM);
1060 
1061   RSSortingKey sk = {.index = 0, .ascending = 0};
1062 
1063   QueryError qerr;
1064   QueryError_Init(&qerr);
1065 
1066   int rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1067   ASSERT_LT(0, rc);
1068   ASSERT_EQ(QUERY_OK, qerr.code);
1069   sk.ascending = 1;
1070   rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1071   ASSERT_GT(0, rc);
1072   ASSERT_EQ(QUERY_OK, qerr.code);
1073   rc = RSSortingVector_Cmp(v, v, &sk, &qerr);
1074   ASSERT_EQ(0, rc);
1075   ASSERT_EQ(QUERY_OK, qerr.code);
1076 
1077   sk.index = 1;
1078 
1079   rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1080   ASSERT_TRUE(-1 == rc && qerr.code == QUERY_OK);
1081   sk.ascending = 0;
1082   rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1083   ASSERT_TRUE(1 == rc && qerr.code == QUERY_OK);
1084 
1085   SortingTable_Free(tbl);
1086   SortingVector_Free(v);
1087   SortingVector_Free(v2);
1088 }
1089 
TEST_F(IndexTest,testVarintFieldMask)1090 TEST_F(IndexTest, testVarintFieldMask) {
1091   t_fieldMask x = 127;
1092   size_t expected[] = {1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19};
1093   Buffer b = {0};
1094   Buffer_Init(&b, 1);
1095   BufferWriter bw = NewBufferWriter(&b);
1096   for (int i = 0; i < sizeof(t_fieldMask); i++, x |= x << 8) {
1097     size_t sz = WriteVarintFieldMask(x, &bw);
1098     ASSERT_EQ(expected[i], sz);
1099     BufferWriter_Seek(&bw, 0);
1100     BufferReader br = NewBufferReader(bw.buf);
1101 
1102     t_fieldMask y = ReadVarintFieldMask(&br);
1103 
1104     ASSERT_EQ(y, x);
1105   }
1106   Buffer_Free(&b);
1107 }
1108 
TEST_F(IndexTest,testDeltaSplits)1109 TEST_F(IndexTest, testDeltaSplits) {
1110   InvertedIndex *idx = NewInvertedIndex((IndexFlags)(INDEX_DEFAULT_FLAGS), 1);
1111   ForwardIndexEntry ent = {0};
1112   ent.docId = 1;
1113   ent.fieldMask = RS_FIELDMASK_ALL;
1114 
1115   IndexEncoder enc = InvertedIndex_GetEncoder(idx->flags);
1116   InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1117   ASSERT_EQ(idx->size, 1);
1118 
1119   ent.docId = 200;
1120   InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1121   ASSERT_EQ(idx->size, 1);
1122 
1123   ent.docId = 1LLU << 48;
1124   InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1125   ASSERT_EQ(idx->size, 2);
1126   ent.docId++;
1127   InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1128   ASSERT_EQ(idx->size, 2);
1129 
1130   IndexReader *ir = NewTermIndexReader(idx, NULL, RS_FIELDMASK_ALL, NULL, 1);
1131   RSIndexResult *h = NULL;
1132   ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1133   ASSERT_EQ(1, h->docId);
1134 
1135   ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1136   ASSERT_EQ(200, h->docId);
1137 
1138   ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1139   ASSERT_EQ((1LLU << 48), h->docId);
1140 
1141   ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1142   ASSERT_EQ((1LLU << 48) + 1, h->docId);
1143 
1144   ASSERT_EQ(INDEXREAD_EOF, IR_Read(ir, &h));
1145 
1146   IR_Free(ir);
1147   InvertedIndex_Free(idx);
1148 }
1149