1 #include "../../src/buffer.h"
2 #include "../../src/index.h"
3 #include "../../src/inverted_index.h"
4 #include "../../src/index_result.h"
5 #include "../../src/query_parser/tokenizer.h"
6 #include "../../src/rmutil/alloc.h"
7 #include "../../src/spec.h"
8 #include "../../src/tokenize.h"
9 #include "../../src/varint.h"
10 #include "../../src/rmutil/alloc.h"
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14 #include <time.h>
15 #include <float.h>
16 #include <gtest/gtest.h>
17 #include <vector>
18 #include <cstdint>
19 
20 class IndexTest : public ::testing::Test {};
21 
offsetsFromVVW(const VarintVectorWriter * vvw)22 static RSOffsetVector offsetsFromVVW(const VarintVectorWriter *vvw) {
23   RSOffsetVector ret = {0};
24   ret.data = VVW_GetByteData(vvw);
25   ret.len = VVW_GetByteLength(vvw);
26   return ret;
27 }
28 
TEST_F(IndexTest,testVarint)29 TEST_F(IndexTest, testVarint) {
30   VarintVectorWriter *vw = NewVarintVectorWriter(8);
31   uint32_t expected[5] = {10, 1000, 1020, 10000, 10020};
32   for (int i = 0; i < 5; i++) {
33     VVW_Write(vw, expected[i]);
34   }
35 
36   // VVW_Write(vw, 100);
37   // printf("%ld %ld\n", BufferLen(vw->bw.buf), vw->bw.buf->cap);
38   VVW_Truncate(vw);
39 
40   RSOffsetVector vec = offsetsFromVVW(vw);
41   // Buffer_Seek(vw->bw.buf, 0);
42   RSOffsetIterator it = RSOffsetVector_Iterate(&vec, NULL);
43   int x = 0;
44   uint32_t n = 0;
45   while (RS_OFFSETVECTOR_EOF != (n = it.Next(it.ctx, NULL))) {
46     auto curexp = expected[x++];
47     ASSERT_EQ(curexp, n) << "Wrong number decoded";
48     // printf("%d %d\n", x, n);
49   }
50   it.Free(it.ctx);
51   VVW_Free(vw);
52 }
53 
TEST_F(IndexTest,testDistance)54 TEST_F(IndexTest, testDistance) {
55   VarintVectorWriter *vw = NewVarintVectorWriter(8);
56   VarintVectorWriter *vw2 = NewVarintVectorWriter(8);
57   VarintVectorWriter *vw3 = NewVarintVectorWriter(8);
58   VVW_Write(vw, 1);
59   VVW_Write(vw, 9);
60   VVW_Write(vw, 13);
61   VVW_Write(vw, 16);
62   VVW_Write(vw, 22);
63 
64   VVW_Write(vw2, 4);
65   VVW_Write(vw2, 7);
66   VVW_Write(vw2, 32);
67 
68   VVW_Write(vw3, 20);
69   VVW_Write(vw3, 25);
70 
71   VVW_Truncate(vw);
72   VVW_Truncate(vw2);
73 
74   RSIndexResult *tr1 = NewTokenRecord(NULL, 1);
75   tr1->docId = 1;
76   tr1->term.offsets = offsetsFromVVW(vw);
77 
78   RSIndexResult *tr2 = NewTokenRecord(NULL, 1);
79   tr2->docId = 1;
80   tr2->term.offsets = offsetsFromVVW(vw2);
81 
82   RSIndexResult *res = NewIntersectResult(2, 1);
83   AggregateResult_AddChild(res, tr1);
84   AggregateResult_AddChild(res, tr2);
85 
86   int delta = IndexResult_MinOffsetDelta(res);
87   ASSERT_EQ(2, delta);
88 
89   ASSERT_EQ(0, IndexResult_IsWithinRange(res, 0, 0));
90   ASSERT_EQ(0, IndexResult_IsWithinRange(res, 0, 1));
91   ASSERT_EQ(0, IndexResult_IsWithinRange(res, 1, 1));
92   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 1, 0));
93   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 2, 1));
94   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 2, 0));
95   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 3, 1));
96   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 4, 0));
97   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 4, 1));
98   ASSERT_EQ(1, IndexResult_IsWithinRange(res, 5, 1));
99 
100   RSIndexResult *tr3 = NewTokenRecord(NULL, 1);
101   tr3->docId = 1;
102   tr3->term.offsets = offsetsFromVVW(vw3);
103   AggregateResult_AddChild(res, tr3);
104 
105   delta = IndexResult_MinOffsetDelta(res);
106   ASSERT_EQ(7, delta);
107 
108   // test merge iteration
109   RSOffsetIterator it = RSIndexResult_IterateOffsets(res);
110   uint32_t expected[] = {1, 4, 7, 9, 13, 16, 20, 22, 25, 32, RS_OFFSETVECTOR_EOF};
111 
112   uint32_t rc;
113   int i = 0;
114   do {
115     rc = it.Next(it.ctx, NULL);
116     ASSERT_EQ(rc, (expected[i++]));
117   } while (rc != RS_OFFSETVECTOR_EOF);
118   it.Free(it.ctx);
119 
120   IndexResult_Free(tr1);
121   IndexResult_Free(tr2);
122   IndexResult_Free(tr3);
123   IndexResult_Free(res);
124   VVW_Free(vw);
125   VVW_Free(vw2);
126   VVW_Free(vw3);
127 }
128 
129 class IndexFlagsTest : public testing::TestWithParam<int> {};
130 
TEST_P(IndexFlagsTest,testRWFlags)131 TEST_P(IndexFlagsTest, testRWFlags) {
132   IndexFlags indexFlags = (IndexFlags)GetParam();
133   InvertedIndex *idx = NewInvertedIndex(indexFlags, 1);
134 
135   IndexEncoder enc = InvertedIndex_GetEncoder(indexFlags);
136   IndexEncoder docIdEnc = InvertedIndex_GetEncoder(Index_DocIdsOnly);
137   ASSERT_TRUE(enc != NULL);
138   ASSERT_TRUE(docIdEnc != NULL);
139 
140   for (size_t i = 0; i < 200; i++) {
141     // if (i % 10000 == 1) {
142     //     printf("iw cap: %ld, iw size: %d, numdocs: %d\n", w->cap, IW_Len(w),
143     //     w->ndocs);
144     // }
145 
146     ForwardIndexEntry h;
147     h.docId = i;
148     h.fieldMask = 1;
149     h.freq = (1 + i % 100) / (float)101;
150 
151     h.vw = NewVarintVectorWriter(8);
152     for (int n = 0; n < i % 4; n++) {
153       VVW_Write(h.vw, n);
154     }
155     VVW_Truncate(h.vw);
156 
157     InvertedIndex_WriteForwardIndexEntry(idx, enc, &h);
158 
159     // printf("doc %d, score %f offset %zd\n", h.docId, h.docScore, w->bw.buf->offset);
160     VVW_Free(h.vw);
161   }
162 
163   ASSERT_EQ(200, idx->numDocs);
164   if (enc != docIdEnc) {
165     ASSERT_EQ(2, idx->size);
166   } else {
167     ASSERT_EQ(1, idx->size);
168   }
169   ASSERT_EQ(199, idx->lastId);
170 
171   // IW_MakeSkipIndex(w, NewMemoryBuffer(8, BUFFER_WRITE));
172 
173   //   for (int x = 0; x < w->skipIdx.len; x++) {
174   //     printf("Skip entry %d: %d, %d\n", x, w->skipIdx.entries[x].docId,
175   //     w->skipIdx.entries[x].offset);
176   //   }
177   // printf("iw cap: %ld, iw size: %ld, numdocs: %d\n", w->bw.buf->cap, IW_Len(w), w->ndocs);
178 
179   for (int xx = 0; xx < 1; xx++) {
180     // printf("si: %d\n", si->len);
181     IndexReader *ir = NewTermIndexReader(idx, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
182     RSIndexResult *h = NULL;
183 
184     int n = 0;
185     int rc;
186     while (!ir->atEnd_) {
187       if ((rc = IR_Read(ir, &h)) == INDEXREAD_EOF) {
188         break;
189       }
190       ASSERT_EQ(INDEXREAD_OK, rc);
191       ASSERT_EQ(h->docId, n);
192       n++;
193     }
194     // for (int z= 0; z < 10; z++) {
195     // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time);
196 
197     // IR_SkipTo(ir, 900001, &h);
198 
199     // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_time);
200     // long diffInNanos = end_time.tv_nsec - start_time.tv_nsec;
201 
202     // printf("Time elapsed: %ldnano\n", diffInNanos);
203     // //IR_Free(ir);
204     // }
205     // IndexResult_Free(&h);
206     IR_Free(ir);
207   }
208 
209   // IW_Free(w);
210   // // overriding the regular IW_Free because we already deleted the buffer
211   InvertedIndex_Free(idx);
212 }
213 
214 INSTANTIATE_TEST_CASE_P(IndexFlagsP, IndexFlagsTest, ::testing::Range(1, 32));
215 
createIndex(int size,int idStep)216 InvertedIndex *createIndex(int size, int idStep) {
217   InvertedIndex *idx = NewInvertedIndex((IndexFlags)(INDEX_DEFAULT_FLAGS), 1);
218 
219   IndexEncoder enc = InvertedIndex_GetEncoder(idx->flags);
220   t_docId id = idStep;
221   for (int i = 0; i < size; i++) {
222     // if (i % 10000 == 1) {
223     //     printf("iw cap: %ld, iw size: %d, numdocs: %d\n", w->cap, IW_Len(w),
224     //     w->ndocs);
225     // }
226     ForwardIndexEntry h;
227     h.docId = id;
228     h.fieldMask = 1;
229     h.freq = 1;
230     h.term = "hello";
231     h.len = 5;
232 
233     h.vw = NewVarintVectorWriter(8);
234     for (int n = idStep; n < idStep + i % 4; n++) {
235       VVW_Write(h.vw, n);
236     }
237 
238     InvertedIndex_WriteForwardIndexEntry(idx, enc, &h);
239     VVW_Free(h.vw);
240 
241     id += idStep;
242   }
243 
244   // printf("BEFORE: iw cap: %ld, iw size: %zd, numdocs: %d\n", w->bw.buf->cap,
245   //        IW_Len(w), w->ndocs);
246 
247   return idx;
248 }
249 
printIntersect(void * ctx,RSIndexResult * hits,int argc)250 int printIntersect(void *ctx, RSIndexResult *hits, int argc) {
251   printf("intersect: %llu\n", (unsigned long long)hits[0].docId);
252   return 0;
253 }
254 
TEST_F(IndexTest,testReadIterator)255 TEST_F(IndexTest, testReadIterator) {
256   InvertedIndex *idx = createIndex(10, 1);
257 
258   IndexReader *r1 = NewTermIndexReader(idx, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
259 
260   RSIndexResult *h = NULL;
261 
262   IndexIterator *it = NewReadIterator(r1);
263   int i = 1;
264   while (IITER_HAS_NEXT(it)) {
265     if (it->Read(it->ctx, &h) == INDEXREAD_EOF) {
266       break;
267     }
268 
269     // printf("Iter got %d\n", h.docId);
270     ASSERT_EQ(h->docId, i);
271     i++;
272   }
273   ASSERT_EQ(11, i);
274 
275   it->Free(it);
276 
277   // IndexResult_Free(&h);
278   InvertedIndex_Free(idx);
279 }
280 
TEST_F(IndexTest,testUnion)281 TEST_F(IndexTest, testUnion) {
282   InvertedIndex *w = createIndex(10, 2);
283   InvertedIndex *w2 = createIndex(10, 3);
284   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
285   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
286 
287   // printf("Reading!\n");
288   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
289   irs[0] = NewReadIterator(r1);
290   irs[1] = NewReadIterator(r2);
291 
292   IndexIterator *ui = NewUnionIterator(irs, 2, NULL, 0, 1, QN_UNION, NULL);
293   RSIndexResult *h = NULL;
294   int expected[] = {2, 3, 4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 24, 27, 30};
295   int i = 0;
296   while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
297     // printf("%d <=> %d\n", h.docId, expected[i]);
298     ASSERT_EQ(expected[i], h->docId);
299     i++;
300 
301     RSIndexResult *copy = IndexResult_DeepCopy(h);
302     ASSERT_TRUE(copy != NULL);
303     ASSERT_TRUE(copy != h);
304     ASSERT_TRUE(copy->isCopy);
305 
306     ASSERT_EQ(copy->docId, h->docId);
307     ASSERT_EQ(copy->type, h->type);
308 
309     IndexResult_Free(copy);
310 
311     // printf("%d, ", h.docId);
312   }
313 
314   ui->Free(ui);
315   // IndexResult_Free(&h);
316   InvertedIndex_Free(w);
317   InvertedIndex_Free(w2);
318 }
319 
TEST_F(IndexTest,testWeight)320 TEST_F(IndexTest, testWeight) {
321   InvertedIndex *w = createIndex(10, 1);
322   InvertedIndex *w2 = createIndex(10, 2);
323   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 0.5);  //
324   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
325 
326   // printf("Reading!\n");
327   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
328   irs[0] = NewReadIterator(r1);
329   irs[1] = NewReadIterator(r2);
330 
331   IndexIterator *ui = NewUnionIterator(irs, 2, NULL, 0, 0.8, QN_UNION, NULL);
332   RSIndexResult *h = NULL;
333   int expected[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20};
334   int i = 0;
335   while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
336     // printf("%d <=> %d\n", h.docId, expected[i]);
337     ASSERT_EQ(h->docId, expected[i++]);
338     ASSERT_EQ(h->weight, 0.8);
339     if (h->agg.numChildren == 2) {
340       ASSERT_EQ(h->agg.children[0]->weight, 0.5);
341       ASSERT_EQ(h->agg.children[1]->weight, 1);
342     } else {
343       if (i <= 10) {
344         ASSERT_EQ(h->agg.children[0]->weight, 0.5);
345       } else {
346         ASSERT_EQ(h->agg.children[0]->weight, 1);
347       }
348     }
349   }
350 
351   ui->Free(ui);
352   // IndexResult_Free(&h);
353   InvertedIndex_Free(w);
354   InvertedIndex_Free(w2);
355 }
356 
TEST_F(IndexTest,testNot)357 TEST_F(IndexTest, testNot) {
358   InvertedIndex *w = createIndex(16, 1);
359   // not all numbers that divide by 3
360   InvertedIndex *w2 = createIndex(10, 3);
361   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
362   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
363 
364   // printf("Reading!\n");
365   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
366   irs[0] = NewReadIterator(r1);
367   irs[1] = NewNotIterator(NewReadIterator(r2), w2->lastId, 1);
368 
369   IndexIterator *ui = NewIntersecIterator(irs, 2, NULL, RS_FIELDMASK_ALL, -1, 0, 1);
370   RSIndexResult *h = NULL;
371   int expected[] = {1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16};
372   int i = 0;
373   while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
374     // printf("%d <=> %d\n", h->docId, expected[i]);
375     ASSERT_EQ(expected[i++], h->docId);
376     // printf("%d, ", h.docId);
377   }
378 
379   ui->Free(ui);
380   // IndexResult_Free(&h);
381   InvertedIndex_Free(w);
382   InvertedIndex_Free(w2);
383 }
384 
TEST_F(IndexTest,testPureNot)385 TEST_F(IndexTest, testPureNot) {
386   InvertedIndex *w = createIndex(10, 3);
387 
388   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
389   printf("last id: %llu\n", (unsigned long long)w->lastId);
390 
391   IndexIterator *ir = NewNotIterator(NewReadIterator(r1), w->lastId + 5, 1);
392 
393   RSIndexResult *h = NULL;
394   int expected[] = {1,  2,  4,  5,  7,  8,  10, 11, 13, 14, 16, 17, 19,
395                     20, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 35};
396   int i = 0;
397   while (ir->Read(ir->ctx, &h) != INDEXREAD_EOF) {
398 
399     // printf("%d <=> %d\n", h->docId, expected[i]);
400     ASSERT_EQ(expected[i++], h->docId);
401   }
402   ir->Free(ir);
403   InvertedIndex_Free(w);
404 }
405 
406 // Note -- in test_index.c, this test was never actually run!
TEST_F(IndexTest,DISABLED_testOptional)407 TEST_F(IndexTest, DISABLED_testOptional) {
408   InvertedIndex *w = createIndex(16, 1);
409   // not all numbers that divide by 3
410   InvertedIndex *w2 = createIndex(10, 3);
411   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
412   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
413 
414   // printf("Reading!\n");
415   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
416   irs[0] = NewReadIterator(r1);
417   irs[1] = NewOptionalIterator(NewReadIterator(r2), w2->lastId, 1);
418 
419   IndexIterator *ui = NewIntersecIterator(irs, 2, NULL, RS_FIELDMASK_ALL, -1, 0, 1);
420   RSIndexResult *h = NULL;
421 
422   int i = 1;
423   while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
424     // printf("%d <=> %d\n", h->docId, i);
425     ASSERT_EQ(i, h->docId);
426     if (i > 0 && i % 3 == 0) {
427       ASSERT_EQ(1, h->agg.children[1]->freq);
428     } else {
429       ASSERT_EQ(0, h->agg.children[1]->freq);
430     }
431     // printf("%d, ", h.docId);
432   }
433 
434   ui->Free(ui);
435   // IndexResult_Free(&h);
436   InvertedIndex_Free(w);
437   InvertedIndex_Free(w2);
438 }
439 
TEST_F(IndexTest,testNumericInverted)440 TEST_F(IndexTest, testNumericInverted) {
441 
442   InvertedIndex *idx = NewInvertedIndex(Index_StoreNumeric, 1);
443 
444   for (int i = 0; i < 75; i++) {
445     size_t sz = InvertedIndex_WriteNumericEntry(idx, i + 1, (double)(i + 1));
446     // printf("written %zd bytes\n", sz);
447 
448     ASSERT_TRUE(sz > 1);
449   }
450   ASSERT_EQ(75, idx->lastId);
451 
452   // printf("written %zd bytes\n", IndexBlock_DataLen(&idx->blocks[0]));
453 
454   IndexReader *ir = NewNumericReader(NULL, idx, NULL, 0, 0);
455   IndexIterator *it = NewReadIterator(ir);
456   RSIndexResult *res;
457   t_docId i = 1;
458   while (INDEXREAD_EOF != it->Read(it->ctx, &res)) {
459     // printf("%d %f\n", res->docId, res->num.value);
460 
461     ASSERT_EQ(i++, res->docId);
462     ASSERT_EQ(res->num.value, (float)res->docId);
463   }
464   InvertedIndex_Free(idx);
465   it->Free(it);
466 }
467 
TEST_F(IndexTest,testNumericVaried)468 TEST_F(IndexTest, testNumericVaried) {
469   InvertedIndex *idx = NewInvertedIndex(Index_StoreNumeric, 1);
470 
471   static const double nums[] = {0,          0.13,          0.001,     -0.1,     1.0,
472                                 5.0,        4.323,         65535,     65535.53, 32768.432,
473                                 1LLU << 32, -(1LLU << 32), 1LLU << 40};
474   static const size_t numCount = sizeof(nums) / sizeof(double);
475 
476   for (size_t i = 0; i < numCount; i++) {
477     size_t sz = InvertedIndex_WriteNumericEntry(idx, i + 1, nums[i]);
478     ASSERT_GT(sz, 1);
479     // printf("[%lu]: Stored %lf\n", i, nums[i]);
480   }
481 
482   IndexReader *ir = NewNumericReader(NULL, idx, NULL, 0, 0);
483   IndexIterator *it = NewReadIterator(ir);
484   RSIndexResult *res;
485 
486   for (size_t i = 0; i < numCount; i++) {
487     // printf("Checking i=%lu. Expected=%lf\n", i, nums[i]);
488     int rv = it->Read(it->ctx, &res);
489     ASSERT_NE(INDEXREAD_EOF, rv);
490     ASSERT_LT(fabs(nums[i] - res->num.value), 0.01);
491   }
492 
493   ASSERT_EQ(INDEXREAD_EOF, it->Read(it->ctx, &res));
494   InvertedIndex_Free(idx);
495   it->Free(it);
496 }
497 
498 typedef struct {
499   double value;
500   size_t size;
501 } encodingInfo;
502 static const encodingInfo infos[] = {
503     {0, 2},                    // 0
504     {1, 2},                    // 1
505     {63, 3},                   // 2
506     {-1, 3},                   // 3
507     {-63, 3},                  // 4
508     {64, 3},                   // 5
509     {-64, 3},                  // 6
510     {255, 3},                  // 7
511     {-255, 3},                 // 8
512     {65535, 4},                // 9
513     {-65535, 4},               // 10
514     {16777215, 5},             // 11
515     {-16777215, 5},            // 12
516     {4294967295, 6},           // 13
517     {-4294967295, 6},          // 14
518     {4294967295 + 1, 7},       // 15
519     {4294967295 + 2, 7},       // 16
520     {549755813888.0, 7},       // 17
521     {549755813888.0 + 2, 7},   // 18
522     {549755813888.0 - 23, 7},  // 19
523     {-549755813888.0, 7},      // 20
524     {1503342028.957225, 10},   // 21
525     {42.4345, 10},              // 22
526     {(float)0.5, 6},           // 23
527     {DBL_MAX, 10},             // 24
528     {UINT64_MAX >> 12, 9},     // 25
529     {INFINITY, 2},             // 26
530     {-INFINITY, 2}             // 27
531 };
532 
TEST_F(IndexTest,testNumericEncoding)533 TEST_F(IndexTest, testNumericEncoding) {
534   static const size_t numInfos = sizeof(infos) / sizeof(infos[0]);
535   InvertedIndex *idx = NewInvertedIndex(Index_StoreNumeric, 1);
536   // printf("TestNumericEncoding\n");
537 
538   for (size_t ii = 0; ii < numInfos; ii++) {
539     // printf("\n[%lu]: Expecting Val=%lf, Sz=%lu\n", ii, infos[ii].value, infos[ii].size);
540     size_t sz = InvertedIndex_WriteNumericEntry(idx, ii + 1, infos[ii].value);
541     ASSERT_EQ(infos[ii].size, sz);
542   }
543 
544   IndexReader *ir = NewNumericReader(NULL, idx, NULL, 0, 0);
545   IndexIterator *it = NewReadIterator(ir);
546   RSIndexResult *res;
547 
548   for (size_t ii = 0; ii < numInfos; ii++) {
549     // printf("\nReading [%lu]\n", ii);
550 
551     int rc = it->Read(it->ctx, &res);
552     ASSERT_NE(rc, INDEXREAD_EOF);
553     // printf("%lf <-> %lf\n", infos[ii].value, res->num.value);
554     if (fabs(infos[ii].value) == INFINITY) {
555       ASSERT_EQ(infos[ii].value, res->num.value);
556     } else {
557       ASSERT_LT(fabs(infos[ii].value - res->num.value), 0.01);
558     }
559   }
560 
561   InvertedIndex_Free(idx);
562   it->Free(it);
563 }
564 
TEST_F(IndexTest,testAbort)565 TEST_F(IndexTest, testAbort) {
566 
567   InvertedIndex *w = createIndex(1000, 1);
568   IndexReader *r = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
569 
570   IndexIterator *it = NewReadIterator(r);
571   int n = 0;
572   RSIndexResult *res;
573   while (INDEXREAD_EOF != it->Read(it->ctx, &res)) {
574     if (n == 50) {
575       it->Abort(it->ctx);
576     }
577     n++;
578   }
579   ASSERT_EQ(51, n);
580   it->Free(it);
581   InvertedIndex_Free(w);
582 }
583 
TEST_F(IndexTest,testIntersection)584 TEST_F(IndexTest, testIntersection) {
585 
586   InvertedIndex *w = createIndex(100000, 4);
587   InvertedIndex *w2 = createIndex(100000, 2);
588   IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1);   //
589   IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1);  //
590 
591   IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
592   irs[0] = NewReadIterator(r1);
593   irs[1] = NewReadIterator(r2);
594 
595   int count = 0;
596   IndexIterator *ii = NewIntersecIterator(irs, 2, NULL, RS_FIELDMASK_ALL, -1, 0, 1);
597 
598   RSIndexResult *h = NULL;
599 
600   uint32_t topFreq = 0;
601   while (ii->Read(ii->ctx, &h) != INDEXREAD_EOF) {
602     ASSERT_EQ(h->type, RSResultType_Intersection);
603     ASSERT_TRUE(RSIndexResult_IsAggregate(h));
604     ASSERT_TRUE(RSIndexResult_HasOffsets(h));
605     topFreq = topFreq > h->freq ? topFreq : h->freq;
606 
607     RSIndexResult *copy = IndexResult_DeepCopy(h);
608     ASSERT_TRUE(copy != NULL);
609     ASSERT_TRUE(copy != h);
610     ASSERT_TRUE(copy->isCopy == 1);
611 
612     ASSERT_TRUE(copy->docId == h->docId);
613     ASSERT_TRUE(copy->type == RSResultType_Intersection);
614     ASSERT_EQ((count * 2 + 2) * 2, h->docId);
615     ASSERT_EQ(count * 2 + 2, h->freq);
616     IndexResult_Free(copy);
617     ++count;
618   }
619 
620   // int count = IR_Intersect(r1, r2, onIntersect, &ctx);
621 
622   // printf("%d intersections in %lldms, %.0fns per iteration\n", count,
623   // TimeSampler_DurationMS(&ts),
624   // 1000000 * TimeSampler_IterationMS(&ts));
625   // printf("top freq: %f\n", topFreq);
626   ASSERT_EQ(count, 50000);
627   ASSERT_EQ(topFreq, 100000.0);
628 
629   ii->Free(ii);
630   // IndexResult_Free(&h);
631   InvertedIndex_Free(w);
632   InvertedIndex_Free(w2);
633 }
634 
TEST_F(IndexTest,testBuffer)635 TEST_F(IndexTest, testBuffer) {
636   // TEST_START();
637   Buffer b = {0};
638   Buffer_Init(&b, 2);
639   BufferWriter w = NewBufferWriter(&b);
640   ASSERT_TRUE(w.buf->cap == 2) << "Wrong capacity";
641   ASSERT_TRUE(w.buf->data != NULL);
642   ASSERT_TRUE(Buffer_Offset(w.buf) == 0);
643   ASSERT_TRUE(w.buf->data == w.pos);
644 
645   const char *x = "helololoolo";
646   size_t l = Buffer_Write(&w, (void *)x, strlen(x) + 1);
647 
648   ASSERT_TRUE(l == strlen(x) + 1);
649   ASSERT_TRUE(Buffer_Offset(w.buf) == l);
650   ASSERT_EQ(Buffer_Capacity(w.buf), 14);
651 
652   l = WriteVarint(1337654, &w);
653   ASSERT_TRUE(l == 3);
654   ASSERT_EQ(Buffer_Offset(w.buf), 15);
655   ASSERT_EQ(Buffer_Capacity(w.buf), 17);
656 
657   Buffer_Truncate(w.buf, 0);
658 
659   ASSERT_TRUE(Buffer_Capacity(w.buf) == 15);
660 
661   BufferReader br = NewBufferReader(w.buf);
662   ASSERT_TRUE(br.pos == 0);
663 
664   char *y = (char *)malloc(strlen(x) + 1);
665   l = Buffer_Read(&br, y, strlen(x) + 1);
666   ASSERT_TRUE(l == strlen(x) + 1);
667 
668   ASSERT_TRUE(strcmp(y, x) == 0);
669   ASSERT_TRUE(BufferReader_Offset(&br) == l);
670 
671   free(y);
672 
673   int n = ReadVarint(&br);
674   ASSERT_TRUE(n == 1337654);
675 
676   Buffer_Free(w.buf);
677 }
678 
679 typedef struct {
680   int num;
681   char **expected;
682 
683 } tokenContext;
684 
tokenFunc(void * ctx,const Token * t)685 int tokenFunc(void *ctx, const Token *t) {
686   tokenContext *tx = (tokenContext *)ctx;
687   int ret = strncmp(t->tok, tx->expected[tx->num++], t->tokLen);
688   EXPECT_TRUE(ret == 0);
689   EXPECT_TRUE(t->pos > 0);
690   return 0;
691 }
692 
693 // int testTokenize() {
694 //   char *txt = strdup("Hello? world...   ? -WAZZ@UP? שלום");
695 //   tokenContext ctx = {0};
696 //   const char *expected[] = {"hello", "world", "wazz", "up", "שלום"};
697 //   ctx.expected = (char **)expected;
698 
699 //   tokenize(txt, &ctx, tokenFunc, NULL, 0, DefaultStopWordList(), 0);
700 //   ASSERT_TRUE(ctx.num == 5);
701 
702 //   free(txt);
703 
704 //   return 0;
705 // }
706 
707 // int testForwardIndex() {
708 
709 //   Document doc = NewDocument(NULL, 1, 1, "english");
710 //   doc.docId = 1;
711 //   doc.fields[0] = N
712 //   ForwardIndex *idx = NewForwardIndex(doc);
713 //   char *txt = strdup("Hello? world...  hello hello ? __WAZZ@UP? שלום");
714 //   tokenize(txt, 1, 1, idx, forwardIndexTokenFunc);
715 
716 //   return 0;
717 // }
718 
TEST_F(IndexTest,testIndexSpec)719 TEST_F(IndexTest, testIndexSpec) {
720   const char *title = "title", *body = "body", *foo = "foo", *bar = "bar", *name = "name";
721   const char *args[] = {"STOPWORDS", "2",      "hello", "world",    "SCHEMA", title,
722                         "text",      "weight", "0.1",   body,       "text",   "weight",
723                         "2.0",       foo,      "text",  "sortable", bar,      "numeric",
724                         "sortable",  name,     "text",  "nostem"};
725   QueryError err = {QUERY_OK};
726   IndexSpec *s = IndexSpec_Parse("idx", args, sizeof(args) / sizeof(const char *), &err);
727   ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
728   ASSERT_TRUE(s);
729   ASSERT_TRUE(s->numFields == 5);
730   ASSERT_TRUE(s->stopwords != NULL);
731   ASSERT_TRUE(s->stopwords != DefaultStopWordList());
732   ASSERT_TRUE(s->flags & Index_StoreFieldFlags);
733   ASSERT_TRUE(s->flags & Index_StoreTermOffsets);
734   ASSERT_TRUE(s->flags & Index_HasCustomStopwords);
735 
736   ASSERT_TRUE(IndexSpec_IsStopWord(s, "hello", 5));
737   ASSERT_TRUE(IndexSpec_IsStopWord(s, "world", 5));
738   ASSERT_TRUE(!IndexSpec_IsStopWord(s, "werld", 5));
739 
740   const FieldSpec *f = IndexSpec_GetField(s, body, strlen(body));
741   ASSERT_TRUE(f != NULL);
742   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
743   ASSERT_STREQ(f->name, body);
744   ASSERT_EQ(f->ftWeight, 2.0);
745   ASSERT_EQ(FIELD_BIT(f), 2);
746   ASSERT_EQ(f->options, 0);
747   ASSERT_EQ(f->sortIdx, -1);
748 
749   f = IndexSpec_GetField(s, title, strlen(title));
750   ASSERT_TRUE(f != NULL);
751   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
752   ASSERT_TRUE(strcmp(f->name, title) == 0);
753   ASSERT_TRUE(f->ftWeight == 0.1);
754   ASSERT_TRUE(FIELD_BIT(f) == 1);
755   ASSERT_TRUE(f->options == 0);
756   ASSERT_TRUE(f->sortIdx == -1);
757 
758   f = IndexSpec_GetField(s, foo, strlen(foo));
759   ASSERT_TRUE(f != NULL);
760   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
761   ASSERT_TRUE(strcmp(f->name, foo) == 0);
762   ASSERT_TRUE(f->ftWeight == 1);
763   ASSERT_TRUE(FIELD_BIT(f) == 4);
764   ASSERT_TRUE(f->options == FieldSpec_Sortable);
765   ASSERT_TRUE(f->sortIdx == 0);
766 
767   f = IndexSpec_GetField(s, bar, strlen(bar));
768   ASSERT_TRUE(f != NULL);
769   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_NUMERIC));
770 
771   ASSERT_TRUE(strcmp(f->name, bar) == 0);
772   ASSERT_TRUE(f->options == FieldSpec_Sortable);
773   ASSERT_TRUE(f->sortIdx == 1);
774   ASSERT_TRUE(IndexSpec_GetField(s, "fooz", 4) == NULL);
775 
776   f = IndexSpec_GetField(s, name, strlen(name));
777   ASSERT_TRUE(f != NULL);
778   ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
779   ASSERT_TRUE(strcmp(f->name, name) == 0);
780   ASSERT_TRUE(f->ftWeight == 1);
781   ASSERT_TRUE(FIELD_BIT(f) == 8);
782   ASSERT_TRUE(f->options == FieldSpec_NoStemming);
783   ASSERT_TRUE(f->sortIdx == -1);
784 
785   ASSERT_TRUE(s->sortables != NULL);
786   ASSERT_TRUE(s->sortables->len == 2);
787   int rc = IndexSpec_GetFieldSortingIndex(s, foo, strlen(foo));
788   ASSERT_EQ(0, rc);
789   rc = IndexSpec_GetFieldSortingIndex(s, bar, strlen(bar));
790   ASSERT_EQ(1, rc);
791   rc = IndexSpec_GetFieldSortingIndex(s, title, strlen(title));
792   ASSERT_EQ(-1, rc);
793 
794   IndexSpec_Free(s);
795 
796   QueryError_ClearError(&err);
797   const char *args2[] = {
798       "NOOFFSETS", "NOFIELDS", "SCHEMA", title, "text",
799   };
800   s = IndexSpec_Parse("idx", args2, sizeof(args2) / sizeof(const char *), &err);
801   ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
802   ASSERT_TRUE(s);
803   ASSERT_TRUE(s->numFields == 1);
804 
805   ASSERT_TRUE(!(s->flags & Index_StoreFieldFlags));
806   ASSERT_TRUE(!(s->flags & Index_StoreTermOffsets));
807   IndexSpec_Free(s);
808 
809   // User-reported bug
810   const char *args3[] = {"SCHEMA", "ha", "NUMERIC", "hb", "TEXT", "WEIGHT", "1", "NOSTEM"};
811   QueryError_ClearError(&err);
812   s = IndexSpec_Parse("idx", args3, sizeof(args3) / sizeof(args3[0]), &err);
813   ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
814   ASSERT_TRUE(s);
815   ASSERT_TRUE(FieldSpec_IsNoStem(s->fields + 1));
816   IndexSpec_Free(s);
817 }
818 
fillSchema(std::vector<char * > & args,size_t nfields)819 static void fillSchema(std::vector<char *> &args, size_t nfields) {
820   args.resize(1 + nfields * 3);
821   args[0] = strdup("SCHEMA");
822   size_t n = 1;
823   for (unsigned i = 0; i < nfields; i++) {
824     asprintf(&args[n++], "field%u", i);
825     if (i % 2 == 0) {
826       args[n++] = strdup("TEXT");
827     } else {
828       if (i < 40) {
829         // odd fields under 40 are TEXT noINDEX
830         args[n++] = strdup("TEXT");
831         args[n++] = strdup("NOINDEX");
832       } else {
833         // the rest are numeric
834         args[n++] = strdup("NUMERIC");
835       }
836     }
837   }
838   args.resize(n);
839 
840   // for (int i = 0; i < n; i++) {
841   //   printf("%s ", args[i]);
842   // }
843   // printf("\n");
844 }
845 
freeSchemaArgs(std::vector<char * > & args)846 static void freeSchemaArgs(std::vector<char *> &args) {
847   for (auto s : args) {
848     free(s);
849   }
850   args.clear();
851 }
852 
TEST_F(IndexTest,testHugeSpec)853 TEST_F(IndexTest, testHugeSpec) {
854   int N = 64;
855   std::vector<char *> args;
856   fillSchema(args, N);
857 
858   QueryError err = {QUERY_OK};
859   IndexSpec *s = IndexSpec_Parse("idx", (const char **)&args[0], args.size(), &err);
860   ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
861   ASSERT_TRUE(s);
862   ASSERT_TRUE(s->numFields == N);
863   IndexSpec_Free(s);
864   freeSchemaArgs(args);
865 
866   // test too big a schema
867   N = 300;
868   fillSchema(args, N);
869 
870   QueryError_ClearError(&err);
871   s = IndexSpec_Parse("idx", (const char **)&args[0], args.size(), &err);
872   ASSERT_TRUE(s == NULL);
873   ASSERT_TRUE(QueryError_HasError(&err));
874   ASSERT_STREQ("Schema is limited to 128 TEXT fields", QueryError_GetError(&err));
875   freeSchemaArgs(args);
876   QueryError_ClearError(&err);
877 }
878 
879 typedef union {
880 
881   int i;
882   float f;
883 } u;
884 
TEST_F(IndexTest,testIndexFlags)885 TEST_F(IndexTest, testIndexFlags) {
886 
887   ForwardIndexEntry h;
888   h.docId = 1234;
889   h.fieldMask = 0x01;
890   h.freq = 1;
891   h.vw = NewVarintVectorWriter(8);
892   for (int n = 0; n < 10; n++) {
893     VVW_Write(h.vw, n);
894   }
895   VVW_Truncate(h.vw);
896 
897   uint32_t flags = INDEX_DEFAULT_FLAGS;
898   InvertedIndex *w = NewInvertedIndex(IndexFlags(flags), 1);
899   IndexEncoder enc = InvertedIndex_GetEncoder(w->flags);
900   ASSERT_TRUE(w->flags == flags);
901   size_t sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
902   // printf("written %zd bytes. Offset=%zd\n", sz, h.vw->buf.offset);
903   ASSERT_EQ(15, sz);
904   InvertedIndex_Free(w);
905 
906   flags &= ~Index_StoreTermOffsets;
907   w = NewInvertedIndex(IndexFlags(flags), 1);
908   ASSERT_TRUE(!(w->flags & Index_StoreTermOffsets));
909   enc = InvertedIndex_GetEncoder(w->flags);
910   size_t sz2 = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
911   // printf("Wrote %zd bytes. Offset=%zd\n", sz2, h.vw->buf.offset);
912   ASSERT_EQ(sz2, sz - Buffer_Offset(&h.vw->buf) - 1);
913   InvertedIndex_Free(w);
914 
915   flags = INDEX_DEFAULT_FLAGS | Index_WideSchema;
916   w = NewInvertedIndex(IndexFlags(flags), 1);
917   ASSERT_TRUE((w->flags & Index_WideSchema));
918   enc = InvertedIndex_GetEncoder(w->flags);
919   h.fieldMask = 0xffffffffffff;
920   ASSERT_EQ(21, InvertedIndex_WriteForwardIndexEntry(w, enc, &h));
921   InvertedIndex_Free(w);
922 
923   flags |= Index_WideSchema;
924   w = NewInvertedIndex(IndexFlags(flags), 1);
925   ASSERT_TRUE((w->flags & Index_WideSchema));
926   enc = InvertedIndex_GetEncoder(w->flags);
927   h.fieldMask = 0xffffffffffff;
928   sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
929   ASSERT_EQ(21, sz);
930   InvertedIndex_Free(w);
931 
932   flags &= Index_StoreFreqs;
933   w = NewInvertedIndex(IndexFlags(flags), 1);
934   ASSERT_TRUE(!(w->flags & Index_StoreTermOffsets));
935   ASSERT_TRUE(!(w->flags & Index_StoreFieldFlags));
936   enc = InvertedIndex_GetEncoder(w->flags);
937   sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
938   ASSERT_EQ(3, sz);
939   InvertedIndex_Free(w);
940 
941   flags |= Index_StoreFieldFlags | Index_WideSchema;
942   w = NewInvertedIndex(IndexFlags(flags), 1);
943   ASSERT_TRUE((w->flags & Index_WideSchema));
944   ASSERT_TRUE((w->flags & Index_StoreFieldFlags));
945   enc = InvertedIndex_GetEncoder(w->flags);
946   h.fieldMask = 0xffffffffffff;
947   sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
948   ASSERT_EQ(10, sz);
949   InvertedIndex_Free(w);
950 
951   VVW_Free(h.vw);
952 }
953 
TEST_F(IndexTest,testDocTable)954 TEST_F(IndexTest, testDocTable) {
955   char buf[16];
956   DocTable dt = NewDocTable(10, 10);
957   t_docId did = 0;
958   // N is set to 100 and the max cap of the doc table is 10 so we surely will
959   // get overflow and check that everything works correctly
960   int N = 100;
961   for (int i = 0; i < N; i++) {
962     size_t nkey = sprintf(buf, "doc_%d", i);
963     RSDocumentMetadata *dmd = DocTable_Put(&dt, buf, nkey, (double)i, Document_DefaultFlags, buf, strlen(buf), DocumentType_Hash);
964     t_docId nd = dmd->id;
965     ASSERT_EQ(did + 1, nd);
966     did = nd;
967   }
968 
969   ASSERT_EQ(N + 1, dt.size);
970   ASSERT_EQ(N, dt.maxDocId);
971 #ifdef __x86_64__
972   ASSERT_EQ(10980, (int)dt.memsize);
973 #endif
974   for (int i = 0; i < N; i++) {
975     sprintf(buf, "doc_%d", i);
976     const char *key = DocTable_GetKey(&dt, i + 1, NULL);
977     ASSERT_STREQ(key, buf);
978 
979     float score = DocTable_GetScore(&dt, i + 1);
980     ASSERT_EQ((int)score, i);
981 
982     RSDocumentMetadata *dmd = DocTable_Get(&dt, i + 1);
983     DMD_Incref(dmd);
984     ASSERT_TRUE(dmd != NULL);
985     ASSERT_TRUE(dmd->flags & Document_HasPayload);
986     ASSERT_STREQ(dmd->keyPtr, buf);
987     char *pl = dmd->payload->data;
988     ASSERT_TRUE(!(strncmp(pl, (char *)buf, dmd->payload->len)));
989 
990     ASSERT_EQ((int)dmd->score, i);
991     ASSERT_EQ((int)dmd->flags, (int)(Document_DefaultFlags | Document_HasPayload));
992 
993     t_docId xid = DocIdMap_Get(&dt.dim, buf, strlen(buf));
994 
995     ASSERT_EQ((int)xid, i + 1);
996 
997     int rc = DocTable_Delete(&dt, dmd->keyPtr, sdslen(dmd->keyPtr));
998     ASSERT_EQ(1, rc);
999     ASSERT_TRUE((int)(dmd->flags & Document_Deleted));
1000     DMD_Decref(dmd);
1001     dmd = DocTable_Get(&dt, i + 1);
1002     ASSERT_TRUE(!dmd);
1003   }
1004 
1005   ASSERT_FALSE(DocIdMap_Get(&dt.dim, "foo bar", strlen("foo bar")));
1006   ASSERT_FALSE(DocTable_Get(&dt, N + 2));
1007 
1008   RSDocumentMetadata *dmd = DocTable_Put(&dt, "Hello", 5, 1.0, Document_DefaultFlags, NULL, 0, DocumentType_Hash);
1009   t_docId strDocId = dmd->id;
1010   ASSERT_TRUE(0 != strDocId);
1011 
1012   // Test that binary keys also work here
1013   static const char binBuf[] = {"Hello\x00World"};
1014   const size_t binBufLen = 11;
1015   ASSERT_FALSE(DocIdMap_Get(&dt.dim, binBuf, binBufLen));
1016   dmd = DocTable_Put(&dt, binBuf, binBufLen, 1.0, Document_DefaultFlags, NULL, 0, DocumentType_Hash);
1017   ASSERT_TRUE(dmd);
1018   ASSERT_NE(dmd->id, strDocId);
1019   ASSERT_EQ(dmd->id, DocIdMap_Get(&dt.dim, binBuf, binBufLen));
1020   ASSERT_EQ(strDocId, DocIdMap_Get(&dt.dim, "Hello", 5));
1021   DocTable_Free(&dt);
1022 }
1023 
TEST_F(IndexTest,testSortable)1024 TEST_F(IndexTest, testSortable) {
1025   RSSortingTable *tbl = NewSortingTable();
1026   RSSortingTable_Add(&tbl, "foo", RSValue_String);
1027   RSSortingTable_Add(&tbl, "bar", RSValue_String);
1028   RSSortingTable_Add(&tbl, "baz", RSValue_String);
1029   ASSERT_EQ(3, tbl->len);
1030 
1031   ASSERT_STREQ("foo", tbl->fields[0].name);
1032   ASSERT_EQ(RSValue_String, tbl->fields[0].type);
1033   ASSERT_STREQ("bar", tbl->fields[1].name);
1034   ASSERT_STREQ("baz", tbl->fields[2].name);
1035   ASSERT_EQ(0, RSSortingTable_GetFieldIdx(tbl, "foo"));
1036   ASSERT_EQ(0, RSSortingTable_GetFieldIdx(tbl, "FoO"));
1037   ASSERT_EQ(-1, RSSortingTable_GetFieldIdx(NULL, "FoO"));
1038 
1039   ASSERT_EQ(1, RSSortingTable_GetFieldIdx(tbl, "bar"));
1040   ASSERT_EQ(-1, RSSortingTable_GetFieldIdx(tbl, "barbar"));
1041 
1042   RSSortingVector *v = NewSortingVector(tbl->len);
1043   ASSERT_EQ(v->len, tbl->len);
1044 
1045   const char *str = "hello";
1046   const char *masse = "Maße";
1047   double num = 3.141;
1048   ASSERT_TRUE(RSValue_IsNull(v->values[0]));
1049   RSSortingVector_Put(v, 0, str, RS_SORTABLE_STR, 0);
1050   ASSERT_EQ(v->values[0]->t, RSValue_String);
1051   ASSERT_EQ(v->values[0]->strval.stype, RSString_RMAlloc);
1052 
1053   ASSERT_TRUE(RSValue_IsNull(v->values[1]));
1054   ASSERT_TRUE(RSValue_IsNull(v->values[2]));
1055   RSSortingVector_Put(v, 1, &num, RSValue_Number, 0);
1056   ASSERT_EQ(v->values[1]->t, RS_SORTABLE_NUM);
1057 
1058   RSSortingVector *v2 = NewSortingVector(tbl->len);
1059   RSSortingVector_Put(v2, 0, masse, RS_SORTABLE_STR, 0);
1060 
1061   /// test string unicode lowercase normalization
1062   ASSERT_STREQ("masse", v2->values[0]->strval.str);
1063 
1064   double s2 = 4.444;
1065   RSSortingVector_Put(v2, 1, &s2, RS_SORTABLE_NUM, 0);
1066 
1067   RSSortingKey sk = {.index = 0, .ascending = 0};
1068 
1069   QueryError qerr;
1070   QueryError_Init(&qerr);
1071 
1072   int rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1073   ASSERT_LT(0, rc);
1074   ASSERT_EQ(QUERY_OK, qerr.code);
1075   sk.ascending = 1;
1076   rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1077   ASSERT_GT(0, rc);
1078   ASSERT_EQ(QUERY_OK, qerr.code);
1079   rc = RSSortingVector_Cmp(v, v, &sk, &qerr);
1080   ASSERT_EQ(0, rc);
1081   ASSERT_EQ(QUERY_OK, qerr.code);
1082 
1083   sk.index = 1;
1084 
1085   rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1086   ASSERT_TRUE(-1 == rc && qerr.code == QUERY_OK);
1087   sk.ascending = 0;
1088   rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1089   ASSERT_TRUE(1 == rc && qerr.code == QUERY_OK);
1090 
1091   SortingTable_Free(tbl);
1092   SortingVector_Free(v);
1093   SortingVector_Free(v2);
1094 }
1095 
TEST_F(IndexTest,testVarintFieldMask)1096 TEST_F(IndexTest, testVarintFieldMask) {
1097   t_fieldMask x = 127;
1098   size_t expected[] = {1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19};
1099   Buffer b = {0};
1100   Buffer_Init(&b, 1);
1101   BufferWriter bw = NewBufferWriter(&b);
1102   for (int i = 0; i < sizeof(t_fieldMask); i++, x |= x << 8) {
1103     size_t sz = WriteVarintFieldMask(x, &bw);
1104     ASSERT_EQ(expected[i], sz);
1105     BufferWriter_Seek(&bw, 0);
1106     BufferReader br = NewBufferReader(bw.buf);
1107 
1108     t_fieldMask y = ReadVarintFieldMask(&br);
1109 
1110     ASSERT_EQ(y, x);
1111   }
1112   Buffer_Free(&b);
1113 }
1114 
TEST_F(IndexTest,testDeltaSplits)1115 TEST_F(IndexTest, testDeltaSplits) {
1116   InvertedIndex *idx = NewInvertedIndex((IndexFlags)(INDEX_DEFAULT_FLAGS), 1);
1117   ForwardIndexEntry ent = {0};
1118   ent.docId = 1;
1119   ent.fieldMask = RS_FIELDMASK_ALL;
1120 
1121   IndexEncoder enc = InvertedIndex_GetEncoder(idx->flags);
1122   InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1123   ASSERT_EQ(idx->size, 1);
1124 
1125   ent.docId = 200;
1126   InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1127   ASSERT_EQ(idx->size, 1);
1128 
1129   ent.docId = 1LLU << 48;
1130   InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1131   ASSERT_EQ(idx->size, 2);
1132   ent.docId++;
1133   InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1134   ASSERT_EQ(idx->size, 2);
1135 
1136   IndexReader *ir = NewTermIndexReader(idx, NULL, RS_FIELDMASK_ALL, NULL, 1);
1137   RSIndexResult *h = NULL;
1138   ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1139   ASSERT_EQ(1, h->docId);
1140 
1141   ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1142   ASSERT_EQ(200, h->docId);
1143 
1144   ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1145   ASSERT_EQ((1LLU << 48), h->docId);
1146 
1147   ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1148   ASSERT_EQ((1LLU << 48) + 1, h->docId);
1149 
1150   ASSERT_EQ(INDEXREAD_EOF, IR_Read(ir, &h));
1151 
1152   IR_Free(ir);
1153   InvertedIndex_Free(idx);
1154 }
1155