1//  Copyright (c) 2017 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package scorch
16
17import (
18	"reflect"
19	"testing"
20
21	"github.com/blevesearch/bleve/document"
22	"github.com/blevesearch/bleve/index"
23)
24
25func TestIndexReader(t *testing.T) {
26	defer func() {
27		err := DestroyTest()
28		if err != nil {
29			t.Fatal(err)
30		}
31	}()
32
33	analysisQueue := index.NewAnalysisQueue(1)
34	idx, err := NewScorch(Name, testConfig, analysisQueue)
35	if err != nil {
36		t.Fatal(err)
37	}
38	err = idx.Open()
39	if err != nil {
40		t.Fatalf("error opening index: %v", err)
41	}
42	defer func() {
43		err := idx.Close()
44		if err != nil {
45			t.Fatal(err)
46		}
47	}()
48
49	var expectedCount uint64
50	doc := document.NewDocument("1")
51	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
52	err = idx.Update(doc)
53	if err != nil {
54		t.Errorf("Error updating index: %v", err)
55	}
56	expectedCount++
57
58	doc = document.NewDocument("2")
59	doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
60	doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer))
61	err = idx.Update(doc)
62	if err != nil {
63		t.Errorf("Error updating index: %v", err)
64	}
65	expectedCount++
66
67	indexReader, err := idx.Reader()
68	if err != nil {
69		t.Error(err)
70	}
71	defer func() {
72		err := indexReader.Close()
73		if err != nil {
74			t.Fatal(err)
75		}
76	}()
77
78	// first look for a term that doesn't exist
79	reader, err := indexReader.TermFieldReader([]byte("nope"), "name", true, true, true)
80	if err != nil {
81		t.Errorf("Error accessing term field reader: %v", err)
82	}
83	count := reader.Count()
84	if count != 0 {
85		t.Errorf("Expected doc count to be: %d got: %d", 0, count)
86	}
87	err = reader.Close()
88	if err != nil {
89		t.Fatal(err)
90	}
91
92	reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true)
93	if err != nil {
94		t.Errorf("Error accessing term field reader: %v", err)
95	}
96
97	expectedCount = 2
98	count = reader.Count()
99	if count != expectedCount {
100		t.Errorf("Exptected doc count to be: %d got: %d", expectedCount, count)
101	}
102
103	var match *index.TermFieldDoc
104	var actualCount uint64
105	match, err = reader.Next(nil)
106	for err == nil && match != nil {
107		match, err = reader.Next(nil)
108		if err != nil {
109			t.Errorf("unexpected error reading next")
110		}
111		actualCount++
112	}
113	if actualCount != count {
114		t.Errorf("count was 2, but only saw %d", actualCount)
115	}
116
117	internalIDBogus, err := indexReader.InternalID("a-bogus-docId")
118	if err != nil {
119		t.Fatal(err)
120	}
121	if internalIDBogus != nil {
122		t.Errorf("expected bogus docId to have nil InternalID")
123	}
124
125	internalID2, err := indexReader.InternalID("2")
126	if err != nil {
127		t.Fatal(err)
128	}
129	expectedMatch := &index.TermFieldDoc{
130		ID:   internalID2,
131		Freq: 1,
132		Norm: 0.5773502588272095,
133		Vectors: []*index.TermFieldVector{
134			{
135				Field: "desc",
136				Pos:   3,
137				Start: 9,
138				End:   13,
139			},
140		},
141	}
142	tfr, err := indexReader.TermFieldReader([]byte("rice"), "desc", true, true, true)
143	if err != nil {
144		t.Errorf("unexpected error: %v", err)
145	}
146	match, err = tfr.Next(nil)
147	if err != nil {
148		t.Errorf("unexpected error: %v", err)
149	}
150	if !reflect.DeepEqual(expectedMatch, match) {
151		t.Errorf("got %#v, expected %#v", match, expectedMatch)
152	}
153	err = reader.Close()
154	if err != nil {
155		t.Fatal(err)
156	}
157
158	// now test usage of advance
159	reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true)
160	if err != nil {
161		t.Errorf("Error accessing term field reader: %v", err)
162	}
163
164	match, err = reader.Advance(internalID2, nil)
165	if err != nil {
166		t.Errorf("unexpected error: %v", err)
167	}
168	if match == nil {
169		t.Fatalf("Expected match, got nil")
170	}
171	if !match.ID.Equals(internalID2) {
172		t.Errorf("Expected ID '2', got '%s'", match.ID)
173	}
174	// NOTE: no point in changing this to internal id 3, there is no id 3
175	// the test is looking for something that doens't exist and this doesn't
176	match, err = reader.Advance(index.IndexInternalID("3"), nil)
177	if err != nil {
178		t.Errorf("unexpected error: %v", err)
179	}
180	if match != nil {
181		t.Errorf("expected nil, got %v", match)
182	}
183	err = reader.Close()
184	if err != nil {
185		t.Fatal(err)
186	}
187
188	// now test creating a reader for a field that doesn't exist
189	reader, err = indexReader.TermFieldReader([]byte("water"), "doesnotexist", true, true, true)
190	if err != nil {
191		t.Errorf("Error accessing term field reader: %v", err)
192	}
193	count = reader.Count()
194	if count != 0 {
195		t.Errorf("expected count 0 for reader of non-existent field")
196	}
197	match, err = reader.Next(nil)
198	if err != nil {
199		t.Errorf("unexpected error: %v", err)
200	}
201	if match != nil {
202		t.Errorf("expected nil, got %v", match)
203	}
204	match, err = reader.Advance(index.IndexInternalID("anywhere"), nil)
205	if err != nil {
206		t.Errorf("unexpected error: %v", err)
207	}
208	if match != nil {
209		t.Errorf("expected nil, got %v", match)
210	}
211
212}
213
214func TestIndexDocIdReader(t *testing.T) {
215	defer func() {
216		err := DestroyTest()
217		if err != nil {
218			t.Fatal(err)
219		}
220	}()
221
222	analysisQueue := index.NewAnalysisQueue(1)
223	idx, err := NewScorch(Name, testConfig, analysisQueue)
224	if err != nil {
225		t.Fatal(err)
226	}
227	err = idx.Open()
228	if err != nil {
229		t.Fatalf("error opening index: %v", err)
230	}
231	defer func() {
232		err := idx.Close()
233		if err != nil {
234			t.Fatal(err)
235		}
236	}()
237
238	var expectedCount uint64
239	doc := document.NewDocument("1")
240	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
241	err = idx.Update(doc)
242	if err != nil {
243		t.Errorf("Error updating index: %v", err)
244	}
245	expectedCount++
246
247	doc = document.NewDocument("2")
248	doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test")))
249	doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors))
250	err = idx.Update(doc)
251	if err != nil {
252		t.Errorf("Error updating index: %v", err)
253	}
254	expectedCount++
255
256	indexReader, err := idx.Reader()
257	if err != nil {
258		t.Error(err)
259	}
260	defer func() {
261		err := indexReader.Close()
262		if err != nil {
263			t.Error(err)
264		}
265	}()
266
267	// first get all doc ids
268	reader, err := indexReader.DocIDReaderAll()
269	if err != nil {
270		t.Errorf("Error accessing doc id reader: %v", err)
271	}
272	defer func() {
273		err := reader.Close()
274		if err != nil {
275			t.Fatal(err)
276		}
277	}()
278
279	id, err := reader.Next()
280	count := uint64(0)
281	for id != nil {
282		count++
283		id, err = reader.Next()
284	}
285	if count != expectedCount {
286		t.Errorf("expected %d, got %d", expectedCount, count)
287	}
288
289	// try it again, but jump to the second doc this time
290	reader2, err := indexReader.DocIDReaderAll()
291	if err != nil {
292		t.Errorf("Error accessing doc id reader: %v", err)
293	}
294	defer func() {
295		err := reader2.Close()
296		if err != nil {
297			t.Error(err)
298		}
299	}()
300
301	internalID2, err := indexReader.InternalID("2")
302	if err != nil {
303		t.Fatal(err)
304	}
305
306	id, err = reader2.Advance(internalID2)
307	if err != nil {
308		t.Error(err)
309	}
310	if !id.Equals(internalID2) {
311		t.Errorf("expected to find id '2', got '%s'", id)
312	}
313
314	// again 3 doesn't exist cannot use internal id for 3 as there is none
315	// the important aspect is that this id doesn't exist, so its ok
316	id, err = reader2.Advance(index.IndexInternalID("3"))
317	if err != nil {
318		t.Error(err)
319	}
320	if id != nil {
321		t.Errorf("expected to find id '', got '%s'", id)
322	}
323}
324
325func TestIndexDocIdOnlyReader(t *testing.T) {
326	defer func() {
327		err := DestroyTest()
328		if err != nil {
329			t.Fatal(err)
330		}
331	}()
332
333	analysisQueue := index.NewAnalysisQueue(1)
334	idx, err := NewScorch(Name, testConfig, analysisQueue)
335	if err != nil {
336		t.Fatal(err)
337	}
338	err = idx.Open()
339	if err != nil {
340		t.Fatalf("error opening index: %v", err)
341	}
342	defer func() {
343		err := idx.Close()
344		if err != nil {
345			t.Fatal(err)
346		}
347	}()
348
349	doc := document.NewDocument("1")
350	err = idx.Update(doc)
351	if err != nil {
352		t.Errorf("Error updating index: %v", err)
353	}
354
355	doc = document.NewDocument("3")
356	err = idx.Update(doc)
357	if err != nil {
358		t.Errorf("Error updating index: %v", err)
359	}
360
361	doc = document.NewDocument("5")
362	err = idx.Update(doc)
363	if err != nil {
364		t.Errorf("Error updating index: %v", err)
365	}
366
367	doc = document.NewDocument("7")
368	err = idx.Update(doc)
369	if err != nil {
370		t.Errorf("Error updating index: %v", err)
371	}
372
373	doc = document.NewDocument("9")
374	err = idx.Update(doc)
375	if err != nil {
376		t.Errorf("Error updating index: %v", err)
377	}
378
379	indexReader, err := idx.Reader()
380	if err != nil {
381		t.Error(err)
382	}
383	defer func() {
384		err := indexReader.Close()
385		if err != nil {
386			t.Error(err)
387		}
388	}()
389
390	onlyIds := []string{"1", "5", "9"}
391	reader, err := indexReader.DocIDReaderOnly(onlyIds)
392	if err != nil {
393		t.Errorf("Error accessing doc id reader: %v", err)
394	}
395	defer func() {
396		err := reader.Close()
397		if err != nil {
398			t.Fatal(err)
399		}
400	}()
401
402	id, err := reader.Next()
403	count := uint64(0)
404	for id != nil {
405		count++
406		id, err = reader.Next()
407		if err != nil {
408			t.Fatal(err)
409		}
410	}
411	if count != 3 {
412		t.Errorf("expected 3, got %d", count)
413	}
414
415	// commented out because advance works with internal ids
416	// this test presumes we see items in external doc id order
417	// which is no longer the case, so simply converting external ids
418	// to internal ones is not logically correct
419	// not removing though because we need some way to test Advance()
420
421	// // try it again, but jump
422	// reader2, err := indexReader.DocIDReaderOnly(onlyIds)
423	// if err != nil {
424	// 	t.Errorf("Error accessing doc id reader: %v", err)
425	// }
426	// defer func() {
427	// 	err := reader2.Close()
428	// 	if err != nil {
429	// 		t.Error(err)
430	// 	}
431	// }()
432	//
433	// id, err = reader2.Advance(index.IndexInternalID("5"))
434	// if err != nil {
435	// 	t.Error(err)
436	// }
437	// if !id.Equals(index.IndexInternalID("5")) {
438	// 	t.Errorf("expected to find id '5', got '%s'", id)
439	// }
440	//
441	// id, err = reader2.Advance(index.IndexInternalID("a"))
442	// if err != nil {
443	// 	t.Error(err)
444	// }
445	// if id != nil {
446	// 	t.Errorf("expected to find id '', got '%s'", id)
447	// }
448
449	// some keys aren't actually there
450	onlyIds = []string{"0", "2", "4", "5", "6", "8", "a"}
451	reader3, err := indexReader.DocIDReaderOnly(onlyIds)
452	if err != nil {
453		t.Errorf("Error accessing doc id reader: %v", err)
454	}
455	defer func() {
456		err := reader3.Close()
457		if err != nil {
458			t.Error(err)
459		}
460	}()
461
462	id, err = reader3.Next()
463	count = uint64(0)
464	for id != nil {
465		count++
466		id, err = reader3.Next()
467	}
468	if count != 1 {
469		t.Errorf("expected 1, got %d", count)
470	}
471
472	// commented out because advance works with internal ids
473	// this test presumes we see items in external doc id order
474	// which is no longer the case, so simply converting external ids
475	// to internal ones is not logically correct
476	// not removing though because we need some way to test Advance()
477
478	// // mix advance and next
479	// onlyIds = []string{"0", "1", "3", "5", "6", "9"}
480	// reader4, err := indexReader.DocIDReaderOnly(onlyIds)
481	// if err != nil {
482	// 	t.Errorf("Error accessing doc id reader: %v", err)
483	// }
484	// defer func() {
485	// 	err := reader4.Close()
486	// 	if err != nil {
487	// 		t.Error(err)
488	// 	}
489	// }()
490	//
491	// // first key is "1"
492	// id, err = reader4.Next()
493	// if err != nil {
494	// 	t.Error(err)
495	// }
496	// if !id.Equals(index.IndexInternalID("1")) {
497	// 	t.Errorf("expected to find id '1', got '%s'", id)
498	// }
499	//
500	// // advancing to key we dont have gives next
501	// id, err = reader4.Advance(index.IndexInternalID("2"))
502	// if err != nil {
503	// 	t.Error(err)
504	// }
505	// if !id.Equals(index.IndexInternalID("3")) {
506	// 	t.Errorf("expected to find id '3', got '%s'", id)
507	// }
508	//
509	// // next after advance works
510	// id, err = reader4.Next()
511	// if err != nil {
512	// 	t.Error(err)
513	// }
514	// if !id.Equals(index.IndexInternalID("5")) {
515	// 	t.Errorf("expected to find id '5', got '%s'", id)
516	// }
517	//
518	// // advancing to key we do have works
519	// id, err = reader4.Advance(index.IndexInternalID("9"))
520	// if err != nil {
521	// 	t.Error(err)
522	// }
523	// if !id.Equals(index.IndexInternalID("9")) {
524	// 	t.Errorf("expected to find id '9', got '%s'", id)
525	// }
526	//
527	// // advance backwards at end
528	// id, err = reader4.Advance(index.IndexInternalID("4"))
529	// if err != nil {
530	// 	t.Error(err)
531	// }
532	// if !id.Equals(index.IndexInternalID("5")) {
533	// 	t.Errorf("expected to find id '5', got '%s'", id)
534	// }
535	//
536	// // next after advance works
537	// id, err = reader4.Next()
538	// if err != nil {
539	// 	t.Error(err)
540	// }
541	// if !id.Equals(index.IndexInternalID("9")) {
542	// 	t.Errorf("expected to find id '9', got '%s'", id)
543	// }
544	//
545	// // advance backwards to key that exists, but not in only set
546	// id, err = reader4.Advance(index.IndexInternalID("7"))
547	// if err != nil {
548	// 	t.Error(err)
549	// }
550	// if !id.Equals(index.IndexInternalID("9")) {
551	// 	t.Errorf("expected to find id '9', got '%s'", id)
552	// }
553
554}
555
556func TestSegmentIndexAndLocalDocNumFromGlobal(t *testing.T) {
557	tests := []struct {
558		offsets      []uint64
559		globalDocNum uint64
560		segmentIndex int
561		localDocNum  uint64
562	}{
563		// just 1 segment
564		{
565			offsets:      []uint64{0},
566			globalDocNum: 0,
567			segmentIndex: 0,
568			localDocNum:  0,
569		},
570		{
571			offsets:      []uint64{0},
572			globalDocNum: 1,
573			segmentIndex: 0,
574			localDocNum:  1,
575		},
576		{
577			offsets:      []uint64{0},
578			globalDocNum: 25,
579			segmentIndex: 0,
580			localDocNum:  25,
581		},
582		// now 2 segments, 30 docs in first
583		{
584			offsets:      []uint64{0, 30},
585			globalDocNum: 0,
586			segmentIndex: 0,
587			localDocNum:  0,
588		},
589		{
590			offsets:      []uint64{0, 30},
591			globalDocNum: 1,
592			segmentIndex: 0,
593			localDocNum:  1,
594		},
595		{
596			offsets:      []uint64{0, 30},
597			globalDocNum: 25,
598			segmentIndex: 0,
599			localDocNum:  25,
600		},
601		{
602			offsets:      []uint64{0, 30},
603			globalDocNum: 30,
604			segmentIndex: 1,
605			localDocNum:  0,
606		},
607		{
608			offsets:      []uint64{0, 30},
609			globalDocNum: 35,
610			segmentIndex: 1,
611			localDocNum:  5,
612		},
613		// lots of segments
614		{
615			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
616			globalDocNum: 0,
617			segmentIndex: 0,
618			localDocNum:  0,
619		},
620		{
621			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
622			globalDocNum: 25,
623			segmentIndex: 0,
624			localDocNum:  25,
625		},
626		{
627			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
628			globalDocNum: 35,
629			segmentIndex: 1,
630			localDocNum:  5,
631		},
632		{
633			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
634			globalDocNum: 100,
635			segmentIndex: 4,
636			localDocNum:  1,
637		},
638		{
639			offsets:      []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
640			globalDocNum: 825,
641			segmentIndex: 6,
642			localDocNum:  25,
643		},
644	}
645
646	for _, test := range tests {
647		i := &IndexSnapshot{
648			offsets: test.offsets,
649			refs:    1,
650		}
651		gotSegmentIndex, gotLocalDocNum := i.segmentIndexAndLocalDocNumFromGlobal(test.globalDocNum)
652		if gotSegmentIndex != test.segmentIndex {
653			t.Errorf("got segment index %d expected %d for offsets %v globalDocNum %d", gotSegmentIndex, test.segmentIndex, test.offsets, test.globalDocNum)
654		}
655		if gotLocalDocNum != test.localDocNum {
656			t.Errorf("got localDocNum %d expected %d for offsets %v globalDocNum %d", gotLocalDocNum, test.localDocNum, test.offsets, test.globalDocNum)
657		}
658		err := i.DecRef()
659		if err != nil {
660			t.Errorf("expected no err, got: %v", err)
661		}
662	}
663}
664