1 #include "../../src/buffer.h"
2 #include "../../src/index.h"
3 #include "../../src/inverted_index.h"
4 #include "../../src/index_result.h"
5 #include "../../src/query_parser/tokenizer.h"
6 #include "../../src/rmutil/alloc.h"
7 #include "../../src/spec.h"
8 #include "../../src/tokenize.h"
9 #include "../../src/varint.h"
10 #include "../../src/rmutil/alloc.h"
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14 #include <time.h>
15 #include <float.h>
16 #include <gtest/gtest.h>
17 #include <vector>
18 #include <cstdint>
19
20 class IndexTest : public ::testing::Test {};
21
offsetsFromVVW(const VarintVectorWriter * vvw)22 static RSOffsetVector offsetsFromVVW(const VarintVectorWriter *vvw) {
23 RSOffsetVector ret = {0};
24 ret.data = VVW_GetByteData(vvw);
25 ret.len = VVW_GetByteLength(vvw);
26 return ret;
27 }
28
TEST_F(IndexTest,testVarint)29 TEST_F(IndexTest, testVarint) {
30 VarintVectorWriter *vw = NewVarintVectorWriter(8);
31 uint32_t expected[5] = {10, 1000, 1020, 10000, 10020};
32 for (int i = 0; i < 5; i++) {
33 VVW_Write(vw, expected[i]);
34 }
35
36 // VVW_Write(vw, 100);
37 // printf("%ld %ld\n", BufferLen(vw->bw.buf), vw->bw.buf->cap);
38 VVW_Truncate(vw);
39
40 RSOffsetVector vec = offsetsFromVVW(vw);
41 // Buffer_Seek(vw->bw.buf, 0);
42 RSOffsetIterator it = RSOffsetVector_Iterate(&vec, NULL);
43 int x = 0;
44 uint32_t n = 0;
45 while (RS_OFFSETVECTOR_EOF != (n = it.Next(it.ctx, NULL))) {
46 auto curexp = expected[x++];
47 ASSERT_EQ(curexp, n) << "Wrong number decoded";
48 // printf("%d %d\n", x, n);
49 }
50 it.Free(it.ctx);
51 VVW_Free(vw);
52 }
53
TEST_F(IndexTest,testDistance)54 TEST_F(IndexTest, testDistance) {
55 VarintVectorWriter *vw = NewVarintVectorWriter(8);
56 VarintVectorWriter *vw2 = NewVarintVectorWriter(8);
57 VarintVectorWriter *vw3 = NewVarintVectorWriter(8);
58 VVW_Write(vw, 1);
59 VVW_Write(vw, 9);
60 VVW_Write(vw, 13);
61 VVW_Write(vw, 16);
62 VVW_Write(vw, 22);
63
64 VVW_Write(vw2, 4);
65 VVW_Write(vw2, 7);
66 VVW_Write(vw2, 32);
67
68 VVW_Write(vw3, 20);
69 VVW_Write(vw3, 25);
70
71 VVW_Truncate(vw);
72 VVW_Truncate(vw2);
73
74 RSIndexResult *tr1 = NewTokenRecord(NULL, 1);
75 tr1->docId = 1;
76 tr1->term.offsets = offsetsFromVVW(vw);
77
78 RSIndexResult *tr2 = NewTokenRecord(NULL, 1);
79 tr2->docId = 1;
80 tr2->term.offsets = offsetsFromVVW(vw2);
81
82 RSIndexResult *res = NewIntersectResult(2, 1);
83 AggregateResult_AddChild(res, tr1);
84 AggregateResult_AddChild(res, tr2);
85
86 int delta = IndexResult_MinOffsetDelta(res);
87 ASSERT_EQ(2, delta);
88
89 ASSERT_EQ(0, IndexResult_IsWithinRange(res, 0, 0));
90 ASSERT_EQ(0, IndexResult_IsWithinRange(res, 0, 1));
91 ASSERT_EQ(0, IndexResult_IsWithinRange(res, 1, 1));
92 ASSERT_EQ(1, IndexResult_IsWithinRange(res, 1, 0));
93 ASSERT_EQ(1, IndexResult_IsWithinRange(res, 2, 1));
94 ASSERT_EQ(1, IndexResult_IsWithinRange(res, 2, 0));
95 ASSERT_EQ(1, IndexResult_IsWithinRange(res, 3, 1));
96 ASSERT_EQ(1, IndexResult_IsWithinRange(res, 4, 0));
97 ASSERT_EQ(1, IndexResult_IsWithinRange(res, 4, 1));
98 ASSERT_EQ(1, IndexResult_IsWithinRange(res, 5, 1));
99
100 RSIndexResult *tr3 = NewTokenRecord(NULL, 1);
101 tr3->docId = 1;
102 tr3->term.offsets = offsetsFromVVW(vw3);
103 AggregateResult_AddChild(res, tr3);
104
105 delta = IndexResult_MinOffsetDelta(res);
106 ASSERT_EQ(7, delta);
107
108 // test merge iteration
109 RSOffsetIterator it = RSIndexResult_IterateOffsets(res);
110 uint32_t expected[] = {1, 4, 7, 9, 13, 16, 20, 22, 25, 32, RS_OFFSETVECTOR_EOF};
111
112 uint32_t rc;
113 int i = 0;
114 do {
115 rc = it.Next(it.ctx, NULL);
116 ASSERT_EQ(rc, (expected[i++]));
117 } while (rc != RS_OFFSETVECTOR_EOF);
118 it.Free(it.ctx);
119
120 IndexResult_Free(tr1);
121 IndexResult_Free(tr2);
122 IndexResult_Free(tr3);
123 IndexResult_Free(res);
124 VVW_Free(vw);
125 VVW_Free(vw2);
126 VVW_Free(vw3);
127 }
128
129 class IndexFlagsTest : public testing::TestWithParam<int> {};
130
TEST_P(IndexFlagsTest,testRWFlags)131 TEST_P(IndexFlagsTest, testRWFlags) {
132 IndexFlags indexFlags = (IndexFlags)GetParam();
133 InvertedIndex *idx = NewInvertedIndex(indexFlags, 1);
134
135 IndexEncoder enc = InvertedIndex_GetEncoder(indexFlags);
136 IndexEncoder docIdEnc = InvertedIndex_GetEncoder(Index_DocIdsOnly);
137 ASSERT_TRUE(enc != NULL);
138 ASSERT_TRUE(docIdEnc != NULL);
139
140 for (size_t i = 0; i < 200; i++) {
141 // if (i % 10000 == 1) {
142 // printf("iw cap: %ld, iw size: %d, numdocs: %d\n", w->cap, IW_Len(w),
143 // w->ndocs);
144 // }
145
146 ForwardIndexEntry h;
147 h.docId = i;
148 h.fieldMask = 1;
149 h.freq = (1 + i % 100) / (float)101;
150
151 h.vw = NewVarintVectorWriter(8);
152 for (int n = 0; n < i % 4; n++) {
153 VVW_Write(h.vw, n);
154 }
155 VVW_Truncate(h.vw);
156
157 InvertedIndex_WriteForwardIndexEntry(idx, enc, &h);
158
159 // printf("doc %d, score %f offset %zd\n", h.docId, h.docScore, w->bw.buf->offset);
160 VVW_Free(h.vw);
161 }
162
163 ASSERT_EQ(200, idx->numDocs);
164 if (enc != docIdEnc) {
165 ASSERT_EQ(2, idx->size);
166 } else {
167 ASSERT_EQ(1, idx->size);
168 }
169 ASSERT_EQ(199, idx->lastId);
170
171 // IW_MakeSkipIndex(w, NewMemoryBuffer(8, BUFFER_WRITE));
172
173 // for (int x = 0; x < w->skipIdx.len; x++) {
174 // printf("Skip entry %d: %d, %d\n", x, w->skipIdx.entries[x].docId,
175 // w->skipIdx.entries[x].offset);
176 // }
177 // printf("iw cap: %ld, iw size: %ld, numdocs: %d\n", w->bw.buf->cap, IW_Len(w), w->ndocs);
178
179 for (int xx = 0; xx < 1; xx++) {
180 // printf("si: %d\n", si->len);
181 IndexReader *ir = NewTermIndexReader(idx, NULL, RS_FIELDMASK_ALL, NULL, 1); //
182 RSIndexResult *h = NULL;
183
184 int n = 0;
185 int rc;
186 while (!ir->atEnd_) {
187 if ((rc = IR_Read(ir, &h)) == INDEXREAD_EOF) {
188 break;
189 }
190 ASSERT_EQ(INDEXREAD_OK, rc);
191 ASSERT_EQ(h->docId, n);
192 n++;
193 }
194 // for (int z= 0; z < 10; z++) {
195 // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time);
196
197 // IR_SkipTo(ir, 900001, &h);
198
199 // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end_time);
200 // long diffInNanos = end_time.tv_nsec - start_time.tv_nsec;
201
202 // printf("Time elapsed: %ldnano\n", diffInNanos);
203 // //IR_Free(ir);
204 // }
205 // IndexResult_Free(&h);
206 IR_Free(ir);
207 }
208
209 // IW_Free(w);
210 // // overriding the regular IW_Free because we already deleted the buffer
211 InvertedIndex_Free(idx);
212 }
213
214 INSTANTIATE_TEST_CASE_P(IndexFlagsP, IndexFlagsTest, ::testing::Range(1, 32));
215
createIndex(int size,int idStep)216 InvertedIndex *createIndex(int size, int idStep) {
217 InvertedIndex *idx = NewInvertedIndex((IndexFlags)(INDEX_DEFAULT_FLAGS), 1);
218
219 IndexEncoder enc = InvertedIndex_GetEncoder(idx->flags);
220 t_docId id = idStep;
221 for (int i = 0; i < size; i++) {
222 // if (i % 10000 == 1) {
223 // printf("iw cap: %ld, iw size: %d, numdocs: %d\n", w->cap, IW_Len(w),
224 // w->ndocs);
225 // }
226 ForwardIndexEntry h;
227 h.docId = id;
228 h.fieldMask = 1;
229 h.freq = 1;
230 h.term = "hello";
231 h.len = 5;
232
233 h.vw = NewVarintVectorWriter(8);
234 for (int n = idStep; n < idStep + i % 4; n++) {
235 VVW_Write(h.vw, n);
236 }
237
238 InvertedIndex_WriteForwardIndexEntry(idx, enc, &h);
239 VVW_Free(h.vw);
240
241 id += idStep;
242 }
243
244 // printf("BEFORE: iw cap: %ld, iw size: %zd, numdocs: %d\n", w->bw.buf->cap,
245 // IW_Len(w), w->ndocs);
246
247 return idx;
248 }
249
printIntersect(void * ctx,RSIndexResult * hits,int argc)250 int printIntersect(void *ctx, RSIndexResult *hits, int argc) {
251 printf("intersect: %llu\n", (unsigned long long)hits[0].docId);
252 return 0;
253 }
254
TEST_F(IndexTest,testReadIterator)255 TEST_F(IndexTest, testReadIterator) {
256 InvertedIndex *idx = createIndex(10, 1);
257
258 IndexReader *r1 = NewTermIndexReader(idx, NULL, RS_FIELDMASK_ALL, NULL, 1); //
259
260 RSIndexResult *h = NULL;
261
262 IndexIterator *it = NewReadIterator(r1);
263 int i = 1;
264 while (IITER_HAS_NEXT(it)) {
265 if (it->Read(it->ctx, &h) == INDEXREAD_EOF) {
266 break;
267 }
268
269 // printf("Iter got %d\n", h.docId);
270 ASSERT_EQ(h->docId, i);
271 i++;
272 }
273 ASSERT_EQ(11, i);
274
275 it->Free(it);
276
277 // IndexResult_Free(&h);
278 InvertedIndex_Free(idx);
279 }
280
TEST_F(IndexTest,testUnion)281 TEST_F(IndexTest, testUnion) {
282 InvertedIndex *w = createIndex(10, 2);
283 InvertedIndex *w2 = createIndex(10, 3);
284 IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1); //
285 IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1); //
286
287 // printf("Reading!\n");
288 IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
289 irs[0] = NewReadIterator(r1);
290 irs[1] = NewReadIterator(r2);
291
292 IndexIterator *ui = NewUnionIterator(irs, 2, NULL, 0, 1, QN_UNION, NULL);
293 RSIndexResult *h = NULL;
294 int expected[] = {2, 3, 4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 24, 27, 30};
295 int i = 0;
296 while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
297 // printf("%d <=> %d\n", h.docId, expected[i]);
298 ASSERT_EQ(expected[i], h->docId);
299 i++;
300
301 RSIndexResult *copy = IndexResult_DeepCopy(h);
302 ASSERT_TRUE(copy != NULL);
303 ASSERT_TRUE(copy != h);
304 ASSERT_TRUE(copy->isCopy);
305
306 ASSERT_EQ(copy->docId, h->docId);
307 ASSERT_EQ(copy->type, h->type);
308
309 IndexResult_Free(copy);
310
311 // printf("%d, ", h.docId);
312 }
313
314 ui->Free(ui);
315 // IndexResult_Free(&h);
316 InvertedIndex_Free(w);
317 InvertedIndex_Free(w2);
318 }
319
TEST_F(IndexTest,testWeight)320 TEST_F(IndexTest, testWeight) {
321 InvertedIndex *w = createIndex(10, 1);
322 InvertedIndex *w2 = createIndex(10, 2);
323 IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 0.5); //
324 IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1); //
325
326 // printf("Reading!\n");
327 IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
328 irs[0] = NewReadIterator(r1);
329 irs[1] = NewReadIterator(r2);
330
331 IndexIterator *ui = NewUnionIterator(irs, 2, NULL, 0, 0.8, QN_UNION, NULL);
332 RSIndexResult *h = NULL;
333 int expected[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20};
334 int i = 0;
335 while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
336 // printf("%d <=> %d\n", h.docId, expected[i]);
337 ASSERT_EQ(h->docId, expected[i++]);
338 ASSERT_EQ(h->weight, 0.8);
339 if (h->agg.numChildren == 2) {
340 ASSERT_EQ(h->agg.children[0]->weight, 0.5);
341 ASSERT_EQ(h->agg.children[1]->weight, 1);
342 } else {
343 if (i <= 10) {
344 ASSERT_EQ(h->agg.children[0]->weight, 0.5);
345 } else {
346 ASSERT_EQ(h->agg.children[0]->weight, 1);
347 }
348 }
349 }
350
351 ui->Free(ui);
352 // IndexResult_Free(&h);
353 InvertedIndex_Free(w);
354 InvertedIndex_Free(w2);
355 }
356
TEST_F(IndexTest,testNot)357 TEST_F(IndexTest, testNot) {
358 InvertedIndex *w = createIndex(16, 1);
359 // not all numbers that divide by 3
360 InvertedIndex *w2 = createIndex(10, 3);
361 IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1); //
362 IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1); //
363
364 // printf("Reading!\n");
365 IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
366 irs[0] = NewReadIterator(r1);
367 irs[1] = NewNotIterator(NewReadIterator(r2), w2->lastId, 1);
368
369 IndexIterator *ui = NewIntersecIterator(irs, 2, NULL, RS_FIELDMASK_ALL, -1, 0, 1);
370 RSIndexResult *h = NULL;
371 int expected[] = {1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16};
372 int i = 0;
373 while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
374 // printf("%d <=> %d\n", h->docId, expected[i]);
375 ASSERT_EQ(expected[i++], h->docId);
376 // printf("%d, ", h.docId);
377 }
378
379 ui->Free(ui);
380 // IndexResult_Free(&h);
381 InvertedIndex_Free(w);
382 InvertedIndex_Free(w2);
383 }
384
TEST_F(IndexTest,testPureNot)385 TEST_F(IndexTest, testPureNot) {
386 InvertedIndex *w = createIndex(10, 3);
387
388 IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1); //
389 printf("last id: %llu\n", (unsigned long long)w->lastId);
390
391 IndexIterator *ir = NewNotIterator(NewReadIterator(r1), w->lastId + 5, 1);
392
393 RSIndexResult *h = NULL;
394 int expected[] = {1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19,
395 20, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 35};
396 int i = 0;
397 while (ir->Read(ir->ctx, &h) != INDEXREAD_EOF) {
398
399 // printf("%d <=> %d\n", h->docId, expected[i]);
400 ASSERT_EQ(expected[i++], h->docId);
401 }
402 ir->Free(ir);
403 InvertedIndex_Free(w);
404 }
405
406 // Note -- in test_index.c, this test was never actually run!
TEST_F(IndexTest,DISABLED_testOptional)407 TEST_F(IndexTest, DISABLED_testOptional) {
408 InvertedIndex *w = createIndex(16, 1);
409 // not all numbers that divide by 3
410 InvertedIndex *w2 = createIndex(10, 3);
411 IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1); //
412 IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1); //
413
414 // printf("Reading!\n");
415 IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
416 irs[0] = NewReadIterator(r1);
417 irs[1] = NewOptionalIterator(NewReadIterator(r2), w2->lastId, 1);
418
419 IndexIterator *ui = NewIntersecIterator(irs, 2, NULL, RS_FIELDMASK_ALL, -1, 0, 1);
420 RSIndexResult *h = NULL;
421
422 int i = 1;
423 while (ui->Read(ui->ctx, &h) != INDEXREAD_EOF) {
424 // printf("%d <=> %d\n", h->docId, i);
425 ASSERT_EQ(i, h->docId);
426 if (i > 0 && i % 3 == 0) {
427 ASSERT_EQ(1, h->agg.children[1]->freq);
428 } else {
429 ASSERT_EQ(0, h->agg.children[1]->freq);
430 }
431 // printf("%d, ", h.docId);
432 }
433
434 ui->Free(ui);
435 // IndexResult_Free(&h);
436 InvertedIndex_Free(w);
437 InvertedIndex_Free(w2);
438 }
439
TEST_F(IndexTest,testNumericInverted)440 TEST_F(IndexTest, testNumericInverted) {
441
442 InvertedIndex *idx = NewInvertedIndex(Index_StoreNumeric, 1);
443
444 for (int i = 0; i < 75; i++) {
445 size_t sz = InvertedIndex_WriteNumericEntry(idx, i + 1, (double)(i + 1));
446 // printf("written %zd bytes\n", sz);
447
448 ASSERT_TRUE(sz > 1);
449 }
450 ASSERT_EQ(75, idx->lastId);
451
452 // printf("written %zd bytes\n", IndexBlock_DataLen(&idx->blocks[0]));
453
454 IndexReader *ir = NewNumericReader(NULL, idx, NULL, 0, 0);
455 IndexIterator *it = NewReadIterator(ir);
456 RSIndexResult *res;
457 t_docId i = 1;
458 while (INDEXREAD_EOF != it->Read(it->ctx, &res)) {
459 // printf("%d %f\n", res->docId, res->num.value);
460
461 ASSERT_EQ(i++, res->docId);
462 ASSERT_EQ(res->num.value, (float)res->docId);
463 }
464 InvertedIndex_Free(idx);
465 it->Free(it);
466 }
467
TEST_F(IndexTest,testNumericVaried)468 TEST_F(IndexTest, testNumericVaried) {
469 InvertedIndex *idx = NewInvertedIndex(Index_StoreNumeric, 1);
470
471 static const double nums[] = {0, 0.13, 0.001, -0.1, 1.0,
472 5.0, 4.323, 65535, 65535.53, 32768.432,
473 1LLU << 32, -(1LLU << 32), 1LLU << 40};
474 static const size_t numCount = sizeof(nums) / sizeof(double);
475
476 for (size_t i = 0; i < numCount; i++) {
477 size_t sz = InvertedIndex_WriteNumericEntry(idx, i + 1, nums[i]);
478 ASSERT_GT(sz, 1);
479 // printf("[%lu]: Stored %lf\n", i, nums[i]);
480 }
481
482 IndexReader *ir = NewNumericReader(NULL, idx, NULL, 0, 0);
483 IndexIterator *it = NewReadIterator(ir);
484 RSIndexResult *res;
485
486 for (size_t i = 0; i < numCount; i++) {
487 // printf("Checking i=%lu. Expected=%lf\n", i, nums[i]);
488 int rv = it->Read(it->ctx, &res);
489 ASSERT_NE(INDEXREAD_EOF, rv);
490 ASSERT_LT(fabs(nums[i] - res->num.value), 0.01);
491 }
492
493 ASSERT_EQ(INDEXREAD_EOF, it->Read(it->ctx, &res));
494 InvertedIndex_Free(idx);
495 it->Free(it);
496 }
497
498 typedef struct {
499 double value;
500 size_t size;
501 } encodingInfo;
502 static const encodingInfo infos[] = {
503 {0, 2}, // 0
504 {1, 2}, // 1
505 {63, 3}, // 2
506 {-1, 3}, // 3
507 {-63, 3}, // 4
508 {64, 3}, // 5
509 {-64, 3}, // 6
510 {255, 3}, // 7
511 {-255, 3}, // 8
512 {65535, 4}, // 9
513 {-65535, 4}, // 10
514 {16777215, 5}, // 11
515 {-16777215, 5}, // 12
516 {4294967295, 6}, // 13
517 {-4294967295, 6}, // 14
518 {4294967295 + 1, 7}, // 15
519 {4294967295 + 2, 7}, // 16
520 {549755813888.0, 7}, // 17
521 {549755813888.0 + 2, 7}, // 18
522 {549755813888.0 - 23, 7}, // 19
523 {-549755813888.0, 7}, // 20
524 {1503342028.957225, 10}, // 21
525 {42.4345, 10}, // 22
526 {(float)0.5, 6}, // 23
527 {DBL_MAX, 10}, // 24
528 {UINT64_MAX >> 12, 9}, // 25
529 {INFINITY, 2}, // 26
530 {-INFINITY, 2} // 27
531 };
532
TEST_F(IndexTest,testNumericEncoding)533 TEST_F(IndexTest, testNumericEncoding) {
534 static const size_t numInfos = sizeof(infos) / sizeof(infos[0]);
535 InvertedIndex *idx = NewInvertedIndex(Index_StoreNumeric, 1);
536 // printf("TestNumericEncoding\n");
537
538 for (size_t ii = 0; ii < numInfos; ii++) {
539 // printf("\n[%lu]: Expecting Val=%lf, Sz=%lu\n", ii, infos[ii].value, infos[ii].size);
540 size_t sz = InvertedIndex_WriteNumericEntry(idx, ii + 1, infos[ii].value);
541 ASSERT_EQ(infos[ii].size, sz);
542 }
543
544 IndexReader *ir = NewNumericReader(NULL, idx, NULL, 0, 0);
545 IndexIterator *it = NewReadIterator(ir);
546 RSIndexResult *res;
547
548 for (size_t ii = 0; ii < numInfos; ii++) {
549 // printf("\nReading [%lu]\n", ii);
550
551 int rc = it->Read(it->ctx, &res);
552 ASSERT_NE(rc, INDEXREAD_EOF);
553 // printf("%lf <-> %lf\n", infos[ii].value, res->num.value);
554 if (fabs(infos[ii].value) == INFINITY) {
555 ASSERT_EQ(infos[ii].value, res->num.value);
556 } else {
557 ASSERT_LT(fabs(infos[ii].value - res->num.value), 0.01);
558 }
559 }
560
561 InvertedIndex_Free(idx);
562 it->Free(it);
563 }
564
TEST_F(IndexTest,testAbort)565 TEST_F(IndexTest, testAbort) {
566
567 InvertedIndex *w = createIndex(1000, 1);
568 IndexReader *r = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1); //
569
570 IndexIterator *it = NewReadIterator(r);
571 int n = 0;
572 RSIndexResult *res;
573 while (INDEXREAD_EOF != it->Read(it->ctx, &res)) {
574 if (n == 50) {
575 it->Abort(it->ctx);
576 }
577 n++;
578 }
579 ASSERT_EQ(51, n);
580 it->Free(it);
581 InvertedIndex_Free(w);
582 }
583
TEST_F(IndexTest,testIntersection)584 TEST_F(IndexTest, testIntersection) {
585
586 InvertedIndex *w = createIndex(100000, 4);
587 InvertedIndex *w2 = createIndex(100000, 2);
588 IndexReader *r1 = NewTermIndexReader(w, NULL, RS_FIELDMASK_ALL, NULL, 1); //
589 IndexReader *r2 = NewTermIndexReader(w2, NULL, RS_FIELDMASK_ALL, NULL, 1); //
590
591 IndexIterator **irs = (IndexIterator **)calloc(2, sizeof(IndexIterator *));
592 irs[0] = NewReadIterator(r1);
593 irs[1] = NewReadIterator(r2);
594
595 int count = 0;
596 IndexIterator *ii = NewIntersecIterator(irs, 2, NULL, RS_FIELDMASK_ALL, -1, 0, 1);
597
598 RSIndexResult *h = NULL;
599
600 uint32_t topFreq = 0;
601 while (ii->Read(ii->ctx, &h) != INDEXREAD_EOF) {
602 ASSERT_EQ(h->type, RSResultType_Intersection);
603 ASSERT_TRUE(RSIndexResult_IsAggregate(h));
604 ASSERT_TRUE(RSIndexResult_HasOffsets(h));
605 topFreq = topFreq > h->freq ? topFreq : h->freq;
606
607 RSIndexResult *copy = IndexResult_DeepCopy(h);
608 ASSERT_TRUE(copy != NULL);
609 ASSERT_TRUE(copy != h);
610 ASSERT_TRUE(copy->isCopy == 1);
611
612 ASSERT_TRUE(copy->docId == h->docId);
613 ASSERT_TRUE(copy->type == RSResultType_Intersection);
614 ASSERT_EQ((count * 2 + 2) * 2, h->docId);
615 ASSERT_EQ(count * 2 + 2, h->freq);
616 IndexResult_Free(copy);
617 ++count;
618 }
619
620 // int count = IR_Intersect(r1, r2, onIntersect, &ctx);
621
622 // printf("%d intersections in %lldms, %.0fns per iteration\n", count,
623 // TimeSampler_DurationMS(&ts),
624 // 1000000 * TimeSampler_IterationMS(&ts));
625 // printf("top freq: %f\n", topFreq);
626 ASSERT_EQ(count, 50000);
627 ASSERT_EQ(topFreq, 100000.0);
628
629 ii->Free(ii);
630 // IndexResult_Free(&h);
631 InvertedIndex_Free(w);
632 InvertedIndex_Free(w2);
633 }
634
TEST_F(IndexTest,testBuffer)635 TEST_F(IndexTest, testBuffer) {
636 // TEST_START();
637 Buffer b = {0};
638 Buffer_Init(&b, 2);
639 BufferWriter w = NewBufferWriter(&b);
640 ASSERT_TRUE(w.buf->cap == 2) << "Wrong capacity";
641 ASSERT_TRUE(w.buf->data != NULL);
642 ASSERT_TRUE(Buffer_Offset(w.buf) == 0);
643 ASSERT_TRUE(w.buf->data == w.pos);
644
645 const char *x = "helololoolo";
646 size_t l = Buffer_Write(&w, (void *)x, strlen(x) + 1);
647
648 ASSERT_TRUE(l == strlen(x) + 1);
649 ASSERT_TRUE(Buffer_Offset(w.buf) == l);
650 ASSERT_EQ(Buffer_Capacity(w.buf), 14);
651
652 l = WriteVarint(1337654, &w);
653 ASSERT_TRUE(l == 3);
654 ASSERT_EQ(Buffer_Offset(w.buf), 15);
655 ASSERT_EQ(Buffer_Capacity(w.buf), 17);
656
657 Buffer_Truncate(w.buf, 0);
658
659 ASSERT_TRUE(Buffer_Capacity(w.buf) == 15);
660
661 BufferReader br = NewBufferReader(w.buf);
662 ASSERT_TRUE(br.pos == 0);
663
664 char *y = (char *)malloc(strlen(x) + 1);
665 l = Buffer_Read(&br, y, strlen(x) + 1);
666 ASSERT_TRUE(l == strlen(x) + 1);
667
668 ASSERT_TRUE(strcmp(y, x) == 0);
669 ASSERT_TRUE(BufferReader_Offset(&br) == l);
670
671 free(y);
672
673 int n = ReadVarint(&br);
674 ASSERT_TRUE(n == 1337654);
675
676 Buffer_Free(w.buf);
677 }
678
679 typedef struct {
680 int num;
681 char **expected;
682
683 } tokenContext;
684
tokenFunc(void * ctx,const Token * t)685 int tokenFunc(void *ctx, const Token *t) {
686 tokenContext *tx = (tokenContext *)ctx;
687 int ret = strncmp(t->tok, tx->expected[tx->num++], t->tokLen);
688 EXPECT_TRUE(ret == 0);
689 EXPECT_TRUE(t->pos > 0);
690 return 0;
691 }
692
693 // int testTokenize() {
694 // char *txt = strdup("Hello? world... ? -WAZZ@UP? שלום");
695 // tokenContext ctx = {0};
696 // const char *expected[] = {"hello", "world", "wazz", "up", "שלום"};
697 // ctx.expected = (char **)expected;
698
699 // tokenize(txt, &ctx, tokenFunc, NULL, 0, DefaultStopWordList(), 0);
700 // ASSERT_TRUE(ctx.num == 5);
701
702 // free(txt);
703
704 // return 0;
705 // }
706
707 // int testForwardIndex() {
708
709 // Document doc = NewDocument(NULL, 1, 1, "english");
710 // doc.docId = 1;
711 // doc.fields[0] = N
712 // ForwardIndex *idx = NewForwardIndex(doc);
713 // char *txt = strdup("Hello? world... hello hello ? __WAZZ@UP? שלום");
714 // tokenize(txt, 1, 1, idx, forwardIndexTokenFunc);
715
716 // return 0;
717 // }
718
TEST_F(IndexTest,testIndexSpec)719 TEST_F(IndexTest, testIndexSpec) {
720 const char *title = "title", *body = "body", *foo = "foo", *bar = "bar", *name = "name";
721 const char *args[] = {"STOPWORDS", "2", "hello", "world", "SCHEMA", title,
722 "text", "weight", "0.1", body, "text", "weight",
723 "2.0", foo, "text", "sortable", bar, "numeric",
724 "sortable", name, "text", "nostem"};
725 QueryError err = {QUERY_OK};
726 IndexSpec *s = IndexSpec_Parse("idx", args, sizeof(args) / sizeof(const char *), &err);
727 ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
728 ASSERT_TRUE(s);
729 ASSERT_TRUE(s->numFields == 5);
730 ASSERT_TRUE(s->stopwords != NULL);
731 ASSERT_TRUE(s->stopwords != DefaultStopWordList());
732 ASSERT_TRUE(s->flags & Index_StoreFieldFlags);
733 ASSERT_TRUE(s->flags & Index_StoreTermOffsets);
734 ASSERT_TRUE(s->flags & Index_HasCustomStopwords);
735
736 ASSERT_TRUE(IndexSpec_IsStopWord(s, "hello", 5));
737 ASSERT_TRUE(IndexSpec_IsStopWord(s, "world", 5));
738 ASSERT_TRUE(!IndexSpec_IsStopWord(s, "werld", 5));
739
740 const FieldSpec *f = IndexSpec_GetField(s, body, strlen(body));
741 ASSERT_TRUE(f != NULL);
742 ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
743 ASSERT_STREQ(f->name, body);
744 ASSERT_EQ(f->ftWeight, 2.0);
745 ASSERT_EQ(FIELD_BIT(f), 2);
746 ASSERT_EQ(f->options, 0);
747 ASSERT_EQ(f->sortIdx, -1);
748
749 f = IndexSpec_GetField(s, title, strlen(title));
750 ASSERT_TRUE(f != NULL);
751 ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
752 ASSERT_TRUE(strcmp(f->name, title) == 0);
753 ASSERT_TRUE(f->ftWeight == 0.1);
754 ASSERT_TRUE(FIELD_BIT(f) == 1);
755 ASSERT_TRUE(f->options == 0);
756 ASSERT_TRUE(f->sortIdx == -1);
757
758 f = IndexSpec_GetField(s, foo, strlen(foo));
759 ASSERT_TRUE(f != NULL);
760 ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
761 ASSERT_TRUE(strcmp(f->name, foo) == 0);
762 ASSERT_TRUE(f->ftWeight == 1);
763 ASSERT_TRUE(FIELD_BIT(f) == 4);
764 ASSERT_TRUE(f->options == FieldSpec_Sortable);
765 ASSERT_TRUE(f->sortIdx == 0);
766
767 f = IndexSpec_GetField(s, bar, strlen(bar));
768 ASSERT_TRUE(f != NULL);
769 ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_NUMERIC));
770
771 ASSERT_TRUE(strcmp(f->name, bar) == 0);
772 ASSERT_TRUE(f->options == FieldSpec_Sortable);
773 ASSERT_TRUE(f->sortIdx == 1);
774 ASSERT_TRUE(IndexSpec_GetField(s, "fooz", 4) == NULL);
775
776 f = IndexSpec_GetField(s, name, strlen(name));
777 ASSERT_TRUE(f != NULL);
778 ASSERT_TRUE(FIELD_IS(f, INDEXFLD_T_FULLTEXT));
779 ASSERT_TRUE(strcmp(f->name, name) == 0);
780 ASSERT_TRUE(f->ftWeight == 1);
781 ASSERT_TRUE(FIELD_BIT(f) == 8);
782 ASSERT_TRUE(f->options == FieldSpec_NoStemming);
783 ASSERT_TRUE(f->sortIdx == -1);
784
785 ASSERT_TRUE(s->sortables != NULL);
786 ASSERT_TRUE(s->sortables->len == 2);
787 int rc = IndexSpec_GetFieldSortingIndex(s, foo, strlen(foo));
788 ASSERT_EQ(0, rc);
789 rc = IndexSpec_GetFieldSortingIndex(s, bar, strlen(bar));
790 ASSERT_EQ(1, rc);
791 rc = IndexSpec_GetFieldSortingIndex(s, title, strlen(title));
792 ASSERT_EQ(-1, rc);
793
794 IndexSpec_Free(s);
795
796 QueryError_ClearError(&err);
797 const char *args2[] = {
798 "NOOFFSETS", "NOFIELDS", "SCHEMA", title, "text",
799 };
800 s = IndexSpec_Parse("idx", args2, sizeof(args2) / sizeof(const char *), &err);
801 ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
802 ASSERT_TRUE(s);
803 ASSERT_TRUE(s->numFields == 1);
804
805 ASSERT_TRUE(!(s->flags & Index_StoreFieldFlags));
806 ASSERT_TRUE(!(s->flags & Index_StoreTermOffsets));
807 IndexSpec_Free(s);
808
809 // User-reported bug
810 const char *args3[] = {"SCHEMA", "ha", "NUMERIC", "hb", "TEXT", "WEIGHT", "1", "NOSTEM"};
811 QueryError_ClearError(&err);
812 s = IndexSpec_Parse("idx", args3, sizeof(args3) / sizeof(args3[0]), &err);
813 ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
814 ASSERT_TRUE(s);
815 ASSERT_TRUE(FieldSpec_IsNoStem(s->fields + 1));
816 IndexSpec_Free(s);
817 }
818
fillSchema(std::vector<char * > & args,size_t nfields)819 static void fillSchema(std::vector<char *> &args, size_t nfields) {
820 args.resize(1 + nfields * 3);
821 args[0] = strdup("SCHEMA");
822 size_t n = 1;
823 for (unsigned i = 0; i < nfields; i++) {
824 asprintf(&args[n++], "field%u", i);
825 if (i % 2 == 0) {
826 args[n++] = strdup("TEXT");
827 } else {
828 if (i < 40) {
829 // odd fields under 40 are TEXT noINDEX
830 args[n++] = strdup("TEXT");
831 args[n++] = strdup("NOINDEX");
832 } else {
833 // the rest are numeric
834 args[n++] = strdup("NUMERIC");
835 }
836 }
837 }
838 args.resize(n);
839
840 // for (int i = 0; i < n; i++) {
841 // printf("%s ", args[i]);
842 // }
843 // printf("\n");
844 }
845
freeSchemaArgs(std::vector<char * > & args)846 static void freeSchemaArgs(std::vector<char *> &args) {
847 for (auto s : args) {
848 free(s);
849 }
850 args.clear();
851 }
852
TEST_F(IndexTest,testHugeSpec)853 TEST_F(IndexTest, testHugeSpec) {
854 int N = 64;
855 std::vector<char *> args;
856 fillSchema(args, N);
857
858 QueryError err = {QUERY_OK};
859 IndexSpec *s = IndexSpec_Parse("idx", (const char **)&args[0], args.size(), &err);
860 ASSERT_FALSE(QueryError_HasError(&err)) << QueryError_GetError(&err);
861 ASSERT_TRUE(s);
862 ASSERT_TRUE(s->numFields == N);
863 IndexSpec_Free(s);
864 freeSchemaArgs(args);
865
866 // test too big a schema
867 N = 300;
868 fillSchema(args, N);
869
870 QueryError_ClearError(&err);
871 s = IndexSpec_Parse("idx", (const char **)&args[0], args.size(), &err);
872 ASSERT_TRUE(s == NULL);
873 ASSERT_TRUE(QueryError_HasError(&err));
874 ASSERT_STREQ("Schema is limited to 128 TEXT fields", QueryError_GetError(&err));
875 freeSchemaArgs(args);
876 QueryError_ClearError(&err);
877 }
878
879 typedef union {
880
881 int i;
882 float f;
883 } u;
884
TEST_F(IndexTest,testIndexFlags)885 TEST_F(IndexTest, testIndexFlags) {
886
887 ForwardIndexEntry h;
888 h.docId = 1234;
889 h.fieldMask = 0x01;
890 h.freq = 1;
891 h.vw = NewVarintVectorWriter(8);
892 for (int n = 0; n < 10; n++) {
893 VVW_Write(h.vw, n);
894 }
895 VVW_Truncate(h.vw);
896
897 uint32_t flags = INDEX_DEFAULT_FLAGS;
898 InvertedIndex *w = NewInvertedIndex(IndexFlags(flags), 1);
899 IndexEncoder enc = InvertedIndex_GetEncoder(w->flags);
900 ASSERT_TRUE(w->flags == flags);
901 size_t sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
902 // printf("written %zd bytes. Offset=%zd\n", sz, h.vw->buf.offset);
903 ASSERT_EQ(15, sz);
904 InvertedIndex_Free(w);
905
906 flags &= ~Index_StoreTermOffsets;
907 w = NewInvertedIndex(IndexFlags(flags), 1);
908 ASSERT_TRUE(!(w->flags & Index_StoreTermOffsets));
909 enc = InvertedIndex_GetEncoder(w->flags);
910 size_t sz2 = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
911 // printf("Wrote %zd bytes. Offset=%zd\n", sz2, h.vw->buf.offset);
912 ASSERT_EQ(sz2, sz - Buffer_Offset(&h.vw->buf) - 1);
913 InvertedIndex_Free(w);
914
915 flags = INDEX_DEFAULT_FLAGS | Index_WideSchema;
916 w = NewInvertedIndex(IndexFlags(flags), 1);
917 ASSERT_TRUE((w->flags & Index_WideSchema));
918 enc = InvertedIndex_GetEncoder(w->flags);
919 h.fieldMask = 0xffffffffffff;
920 ASSERT_EQ(21, InvertedIndex_WriteForwardIndexEntry(w, enc, &h));
921 InvertedIndex_Free(w);
922
923 flags |= Index_WideSchema;
924 w = NewInvertedIndex(IndexFlags(flags), 1);
925 ASSERT_TRUE((w->flags & Index_WideSchema));
926 enc = InvertedIndex_GetEncoder(w->flags);
927 h.fieldMask = 0xffffffffffff;
928 sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
929 ASSERT_EQ(21, sz);
930 InvertedIndex_Free(w);
931
932 flags &= Index_StoreFreqs;
933 w = NewInvertedIndex(IndexFlags(flags), 1);
934 ASSERT_TRUE(!(w->flags & Index_StoreTermOffsets));
935 ASSERT_TRUE(!(w->flags & Index_StoreFieldFlags));
936 enc = InvertedIndex_GetEncoder(w->flags);
937 sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
938 ASSERT_EQ(3, sz);
939 InvertedIndex_Free(w);
940
941 flags |= Index_StoreFieldFlags | Index_WideSchema;
942 w = NewInvertedIndex(IndexFlags(flags), 1);
943 ASSERT_TRUE((w->flags & Index_WideSchema));
944 ASSERT_TRUE((w->flags & Index_StoreFieldFlags));
945 enc = InvertedIndex_GetEncoder(w->flags);
946 h.fieldMask = 0xffffffffffff;
947 sz = InvertedIndex_WriteForwardIndexEntry(w, enc, &h);
948 ASSERT_EQ(10, sz);
949 InvertedIndex_Free(w);
950
951 VVW_Free(h.vw);
952 }
953
TEST_F(IndexTest,testDocTable)954 TEST_F(IndexTest, testDocTable) {
955 char buf[16];
956 DocTable dt = NewDocTable(10, 10);
957 t_docId did = 0;
958 // N is set to 100 and the max cap of the doc table is 10 so we surely will
959 // get overflow and check that everything works correctly
960 int N = 100;
961 for (int i = 0; i < N; i++) {
962 size_t nkey = sprintf(buf, "doc_%d", i);
963 RSDocumentMetadata *dmd = DocTable_Put(&dt, buf, nkey, (double)i, Document_DefaultFlags, buf, strlen(buf), DocumentType_Hash);
964 t_docId nd = dmd->id;
965 ASSERT_EQ(did + 1, nd);
966 did = nd;
967 }
968
969 ASSERT_EQ(N + 1, dt.size);
970 ASSERT_EQ(N, dt.maxDocId);
971 #ifdef __x86_64__
972 ASSERT_EQ(10980, (int)dt.memsize);
973 #endif
974 for (int i = 0; i < N; i++) {
975 sprintf(buf, "doc_%d", i);
976 const char *key = DocTable_GetKey(&dt, i + 1, NULL);
977 ASSERT_STREQ(key, buf);
978
979 float score = DocTable_GetScore(&dt, i + 1);
980 ASSERT_EQ((int)score, i);
981
982 RSDocumentMetadata *dmd = DocTable_Get(&dt, i + 1);
983 DMD_Incref(dmd);
984 ASSERT_TRUE(dmd != NULL);
985 ASSERT_TRUE(dmd->flags & Document_HasPayload);
986 ASSERT_STREQ(dmd->keyPtr, buf);
987 char *pl = dmd->payload->data;
988 ASSERT_TRUE(!(strncmp(pl, (char *)buf, dmd->payload->len)));
989
990 ASSERT_EQ((int)dmd->score, i);
991 ASSERT_EQ((int)dmd->flags, (int)(Document_DefaultFlags | Document_HasPayload));
992
993 t_docId xid = DocIdMap_Get(&dt.dim, buf, strlen(buf));
994
995 ASSERT_EQ((int)xid, i + 1);
996
997 int rc = DocTable_Delete(&dt, dmd->keyPtr, sdslen(dmd->keyPtr));
998 ASSERT_EQ(1, rc);
999 ASSERT_TRUE((int)(dmd->flags & Document_Deleted));
1000 DMD_Decref(dmd);
1001 dmd = DocTable_Get(&dt, i + 1);
1002 ASSERT_TRUE(!dmd);
1003 }
1004
1005 ASSERT_FALSE(DocIdMap_Get(&dt.dim, "foo bar", strlen("foo bar")));
1006 ASSERT_FALSE(DocTable_Get(&dt, N + 2));
1007
1008 RSDocumentMetadata *dmd = DocTable_Put(&dt, "Hello", 5, 1.0, Document_DefaultFlags, NULL, 0, DocumentType_Hash);
1009 t_docId strDocId = dmd->id;
1010 ASSERT_TRUE(0 != strDocId);
1011
1012 // Test that binary keys also work here
1013 static const char binBuf[] = {"Hello\x00World"};
1014 const size_t binBufLen = 11;
1015 ASSERT_FALSE(DocIdMap_Get(&dt.dim, binBuf, binBufLen));
1016 dmd = DocTable_Put(&dt, binBuf, binBufLen, 1.0, Document_DefaultFlags, NULL, 0, DocumentType_Hash);
1017 ASSERT_TRUE(dmd);
1018 ASSERT_NE(dmd->id, strDocId);
1019 ASSERT_EQ(dmd->id, DocIdMap_Get(&dt.dim, binBuf, binBufLen));
1020 ASSERT_EQ(strDocId, DocIdMap_Get(&dt.dim, "Hello", 5));
1021 DocTable_Free(&dt);
1022 }
1023
TEST_F(IndexTest,testSortable)1024 TEST_F(IndexTest, testSortable) {
1025 RSSortingTable *tbl = NewSortingTable();
1026 RSSortingTable_Add(&tbl, "foo", RSValue_String);
1027 RSSortingTable_Add(&tbl, "bar", RSValue_String);
1028 RSSortingTable_Add(&tbl, "baz", RSValue_String);
1029 ASSERT_EQ(3, tbl->len);
1030
1031 ASSERT_STREQ("foo", tbl->fields[0].name);
1032 ASSERT_EQ(RSValue_String, tbl->fields[0].type);
1033 ASSERT_STREQ("bar", tbl->fields[1].name);
1034 ASSERT_STREQ("baz", tbl->fields[2].name);
1035 ASSERT_EQ(0, RSSortingTable_GetFieldIdx(tbl, "foo"));
1036 ASSERT_EQ(0, RSSortingTable_GetFieldIdx(tbl, "FoO"));
1037 ASSERT_EQ(-1, RSSortingTable_GetFieldIdx(NULL, "FoO"));
1038
1039 ASSERT_EQ(1, RSSortingTable_GetFieldIdx(tbl, "bar"));
1040 ASSERT_EQ(-1, RSSortingTable_GetFieldIdx(tbl, "barbar"));
1041
1042 RSSortingVector *v = NewSortingVector(tbl->len);
1043 ASSERT_EQ(v->len, tbl->len);
1044
1045 const char *str = "hello";
1046 const char *masse = "Maße";
1047 double num = 3.141;
1048 ASSERT_TRUE(RSValue_IsNull(v->values[0]));
1049 RSSortingVector_Put(v, 0, str, RS_SORTABLE_STR, 0);
1050 ASSERT_EQ(v->values[0]->t, RSValue_String);
1051 ASSERT_EQ(v->values[0]->strval.stype, RSString_RMAlloc);
1052
1053 ASSERT_TRUE(RSValue_IsNull(v->values[1]));
1054 ASSERT_TRUE(RSValue_IsNull(v->values[2]));
1055 RSSortingVector_Put(v, 1, &num, RSValue_Number, 0);
1056 ASSERT_EQ(v->values[1]->t, RS_SORTABLE_NUM);
1057
1058 RSSortingVector *v2 = NewSortingVector(tbl->len);
1059 RSSortingVector_Put(v2, 0, masse, RS_SORTABLE_STR, 0);
1060
1061 /// test string unicode lowercase normalization
1062 ASSERT_STREQ("masse", v2->values[0]->strval.str);
1063
1064 double s2 = 4.444;
1065 RSSortingVector_Put(v2, 1, &s2, RS_SORTABLE_NUM, 0);
1066
1067 RSSortingKey sk = {.index = 0, .ascending = 0};
1068
1069 QueryError qerr;
1070 QueryError_Init(&qerr);
1071
1072 int rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1073 ASSERT_LT(0, rc);
1074 ASSERT_EQ(QUERY_OK, qerr.code);
1075 sk.ascending = 1;
1076 rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1077 ASSERT_GT(0, rc);
1078 ASSERT_EQ(QUERY_OK, qerr.code);
1079 rc = RSSortingVector_Cmp(v, v, &sk, &qerr);
1080 ASSERT_EQ(0, rc);
1081 ASSERT_EQ(QUERY_OK, qerr.code);
1082
1083 sk.index = 1;
1084
1085 rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1086 ASSERT_TRUE(-1 == rc && qerr.code == QUERY_OK);
1087 sk.ascending = 0;
1088 rc = RSSortingVector_Cmp(v, v2, &sk, &qerr);
1089 ASSERT_TRUE(1 == rc && qerr.code == QUERY_OK);
1090
1091 SortingTable_Free(tbl);
1092 SortingVector_Free(v);
1093 SortingVector_Free(v2);
1094 }
1095
TEST_F(IndexTest,testVarintFieldMask)1096 TEST_F(IndexTest, testVarintFieldMask) {
1097 t_fieldMask x = 127;
1098 size_t expected[] = {1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 19};
1099 Buffer b = {0};
1100 Buffer_Init(&b, 1);
1101 BufferWriter bw = NewBufferWriter(&b);
1102 for (int i = 0; i < sizeof(t_fieldMask); i++, x |= x << 8) {
1103 size_t sz = WriteVarintFieldMask(x, &bw);
1104 ASSERT_EQ(expected[i], sz);
1105 BufferWriter_Seek(&bw, 0);
1106 BufferReader br = NewBufferReader(bw.buf);
1107
1108 t_fieldMask y = ReadVarintFieldMask(&br);
1109
1110 ASSERT_EQ(y, x);
1111 }
1112 Buffer_Free(&b);
1113 }
1114
TEST_F(IndexTest,testDeltaSplits)1115 TEST_F(IndexTest, testDeltaSplits) {
1116 InvertedIndex *idx = NewInvertedIndex((IndexFlags)(INDEX_DEFAULT_FLAGS), 1);
1117 ForwardIndexEntry ent = {0};
1118 ent.docId = 1;
1119 ent.fieldMask = RS_FIELDMASK_ALL;
1120
1121 IndexEncoder enc = InvertedIndex_GetEncoder(idx->flags);
1122 InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1123 ASSERT_EQ(idx->size, 1);
1124
1125 ent.docId = 200;
1126 InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1127 ASSERT_EQ(idx->size, 1);
1128
1129 ent.docId = 1LLU << 48;
1130 InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1131 ASSERT_EQ(idx->size, 2);
1132 ent.docId++;
1133 InvertedIndex_WriteForwardIndexEntry(idx, enc, &ent);
1134 ASSERT_EQ(idx->size, 2);
1135
1136 IndexReader *ir = NewTermIndexReader(idx, NULL, RS_FIELDMASK_ALL, NULL, 1);
1137 RSIndexResult *h = NULL;
1138 ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1139 ASSERT_EQ(1, h->docId);
1140
1141 ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1142 ASSERT_EQ(200, h->docId);
1143
1144 ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1145 ASSERT_EQ((1LLU << 48), h->docId);
1146
1147 ASSERT_EQ(INDEXREAD_OK, IR_Read(ir, &h));
1148 ASSERT_EQ((1LLU << 48) + 1, h->docId);
1149
1150 ASSERT_EQ(INDEXREAD_EOF, IR_Read(ir, &h));
1151
1152 IR_Free(ir);
1153 InvertedIndex_Free(idx);
1154 }
1155