1//  Copyright (c) 2013 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package searcher
16
17import (
18	"reflect"
19	"testing"
20
21	"github.com/blevesearch/bleve/index"
22	"github.com/blevesearch/bleve/search"
23)
24
25func TestPhraseSearch(t *testing.T) {
26
27	twoDocIndexReader, err := twoDocIndex.Reader()
28	if err != nil {
29		t.Error(err)
30	}
31	defer func() {
32		err := twoDocIndexReader.Close()
33		if err != nil {
34			t.Fatal(err)
35		}
36	}()
37
38	soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true}
39	phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, []string{"angst", "beer"}, "desc", soptions)
40	if err != nil {
41		t.Fatal(err)
42	}
43
44	tests := []struct {
45		searcher   search.Searcher
46		results    []*search.DocumentMatch
47		locations  map[string]map[string][]search.Location
48		fieldterms [][2]string
49	}{
50		{
51			searcher: phraseSearcher,
52			results: []*search.DocumentMatch{
53				{
54					IndexInternalID: index.IndexInternalID("2"),
55					Score:           1.0807601687084403,
56				},
57			},
58			locations:  map[string]map[string][]search.Location{"desc": map[string][]search.Location{"beer": []search.Location{search.Location{Pos: 2, Start: 6, End: 10}}, "angst": []search.Location{search.Location{Pos: 1, Start: 0, End: 5}}}},
59			fieldterms: [][2]string{[2]string{"desc", "beer"}, [2]string{"desc", "angst"}},
60		},
61	}
62
63	for testIndex, test := range tests {
64		defer func() {
65			err := test.searcher.Close()
66			if err != nil {
67				t.Fatal(err)
68			}
69		}()
70
71		ctx := &search.SearchContext{
72			DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
73		}
74		next, err := test.searcher.Next(ctx)
75		i := 0
76		for err == nil && next != nil {
77			if i < len(test.results) {
78				if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
79					t.Errorf("expected result %d to have id %s got %s for test %d\n", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
80				}
81				if next.Score != test.results[i].Score {
82					t.Errorf("expected result %d to have score %v got %v for test %d\n", i, test.results[i].Score, next.Score, testIndex)
83					t.Logf("scoring explanation: %s\n", next.Expl)
84				}
85				for _, ft := range test.fieldterms {
86					locs := next.Locations[ft[0]][ft[1]]
87					explocs := test.locations[ft[0]][ft[1]]
88					if len(explocs) != len(locs) {
89						t.Fatalf("expected result %d to have %d Locations (%#v) but got %d (%#v) for test %d with field %q and term %q\n", i, len(explocs), explocs, len(locs), locs, testIndex, ft[0], ft[1])
90					}
91					for ind, exploc := range explocs {
92						if !reflect.DeepEqual(*locs[ind], exploc) {
93							t.Errorf("expected result %d to have Location %v got %v for test %d\n", i, exploc, locs[ind], testIndex)
94						}
95					}
96				}
97			}
98
99			ctx.DocumentMatchPool.Put(next)
100			next, err = test.searcher.Next(ctx)
101			i++
102		}
103		if err != nil {
104			t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
105		}
106		if len(test.results) != i {
107			t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
108		}
109	}
110}
111
112func TestMultiPhraseSearch(t *testing.T) {
113
114	soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true}
115
116	tests := []struct {
117		phrase [][]string
118		docids [][]byte
119	}{
120		{
121			phrase: [][]string{[]string{"angst", "what"}, []string{"beer"}},
122			docids: [][]byte{[]byte("2")},
123		},
124	}
125
126	for i, test := range tests {
127
128		reader, err := twoDocIndex.Reader()
129		if err != nil {
130			t.Error(err)
131		}
132		searcher, err := NewMultiPhraseSearcher(reader, test.phrase, "desc", soptions)
133		if err != nil {
134			t.Error(err)
135		}
136		ctx := &search.SearchContext{
137			DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0),
138		}
139		next, err := searcher.Next(ctx)
140		var actualIds [][]byte
141		for err == nil && next != nil {
142			actualIds = append(actualIds, next.IndexInternalID)
143			ctx.DocumentMatchPool.Put(next)
144			next, err = searcher.Next(ctx)
145		}
146		if err != nil {
147			t.Fatalf("error iterating searcher: %v for test %d", err, i)
148		}
149		if !reflect.DeepEqual(test.docids, actualIds) {
150			t.Fatalf("expected ids: %v, got %v", test.docids, actualIds)
151		}
152
153		err = searcher.Close()
154		if err != nil {
155			t.Error(err)
156		}
157
158		err = reader.Close()
159		if err != nil {
160			t.Error(err)
161		}
162	}
163}
164
165func TestFindPhrasePaths(t *testing.T) {
166	tests := []struct {
167		phrase [][]string
168		tlm    search.TermLocationMap
169		paths  []phrasePath
170	}{
171		// simplest matching case
172		{
173			phrase: [][]string{[]string{"cat"}, []string{"dog"}},
174			tlm: search.TermLocationMap{
175				"cat": search.Locations{
176					&search.Location{
177						Pos: 1,
178					},
179				},
180				"dog": search.Locations{
181					&search.Location{
182						Pos: 2,
183					},
184				},
185			},
186			paths: []phrasePath{
187				phrasePath{
188					&phrasePart{"cat", &search.Location{Pos: 1}},
189					&phrasePart{"dog", &search.Location{Pos: 2}},
190				},
191			},
192		},
193		// second term missing, no match
194		{
195			phrase: [][]string{[]string{"cat"}, []string{"dog"}},
196			tlm: search.TermLocationMap{
197				"cat": search.Locations{
198					&search.Location{
199						Pos: 1,
200					},
201				},
202			},
203			paths: nil,
204		},
205		// second term exists but in wrong position
206		{
207			phrase: [][]string{[]string{"cat"}, []string{"dog"}},
208			tlm: search.TermLocationMap{
209				"cat": search.Locations{
210					&search.Location{
211						Pos: 1,
212					},
213				},
214				"dog": search.Locations{
215					&search.Location{
216						Pos: 3,
217					},
218				},
219			},
220			paths: nil,
221		},
222		// matches multiple times
223		{
224			phrase: [][]string{[]string{"cat"}, []string{"dog"}},
225			tlm: search.TermLocationMap{
226				"cat": search.Locations{
227					&search.Location{
228						Pos: 1,
229					},
230					&search.Location{
231						Pos: 8,
232					},
233				},
234				"dog": search.Locations{
235					&search.Location{
236						Pos: 2,
237					},
238					&search.Location{
239						Pos: 9,
240					},
241				},
242			},
243			paths: []phrasePath{
244				phrasePath{
245					&phrasePart{"cat", &search.Location{Pos: 1}},
246					&phrasePart{"dog", &search.Location{Pos: 2}},
247				},
248				phrasePath{
249					&phrasePart{"cat", &search.Location{Pos: 8}},
250					&phrasePart{"dog", &search.Location{Pos: 9}},
251				},
252			},
253		},
254		// match over gaps
255		{
256			phrase: [][]string{[]string{"cat"}, []string{""}, []string{"dog"}},
257			tlm: search.TermLocationMap{
258				"cat": search.Locations{
259					&search.Location{
260						Pos: 1,
261					},
262				},
263				"dog": search.Locations{
264					&search.Location{
265						Pos: 3,
266					},
267				},
268			},
269			paths: []phrasePath{
270				phrasePath{
271					&phrasePart{"cat", &search.Location{Pos: 1}},
272					&phrasePart{"dog", &search.Location{Pos: 3}},
273				},
274			},
275		},
276		// match with leading ""
277		{
278			phrase: [][]string{[]string{""}, []string{"cat"}, []string{"dog"}},
279			tlm: search.TermLocationMap{
280				"cat": search.Locations{
281					&search.Location{
282						Pos: 2,
283					},
284				},
285				"dog": search.Locations{
286					&search.Location{
287						Pos: 3,
288					},
289				},
290			},
291			paths: []phrasePath{
292				phrasePath{
293					&phrasePart{"cat", &search.Location{Pos: 2}},
294					&phrasePart{"dog", &search.Location{Pos: 3}},
295				},
296			},
297		},
298		// match with trailing ""
299		{
300			phrase: [][]string{[]string{"cat"}, []string{"dog"}, []string{""}},
301			tlm: search.TermLocationMap{
302				"cat": search.Locations{
303					&search.Location{
304						Pos: 2,
305					},
306				},
307				"dog": search.Locations{
308					&search.Location{
309						Pos: 3,
310					},
311				},
312			},
313			paths: []phrasePath{
314				phrasePath{
315					&phrasePart{"cat", &search.Location{Pos: 2}},
316					&phrasePart{"dog", &search.Location{Pos: 3}},
317				},
318			},
319		},
320	}
321
322	for i, test := range tests {
323		actualPaths := findPhrasePaths(0, nil, test.phrase, test.tlm, nil, 0)
324		if !reflect.DeepEqual(actualPaths, test.paths) {
325			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
326		}
327	}
328}
329
330func TestFindPhrasePathsSloppy(t *testing.T) {
331	tlm := search.TermLocationMap{
332		"one": search.Locations{
333			&search.Location{
334				Pos: 1,
335			},
336		},
337		"two": search.Locations{
338			&search.Location{
339				Pos: 2,
340			},
341		},
342		"three": search.Locations{
343			&search.Location{
344				Pos: 3,
345			},
346		},
347		"four": search.Locations{
348			&search.Location{
349				Pos: 4,
350			},
351		},
352		"five": search.Locations{
353			&search.Location{
354				Pos: 5,
355			},
356		},
357	}
358
359	tests := []struct {
360		phrase [][]string
361		paths  []phrasePath
362		slop   int
363	}{
364		// no match
365		{
366			phrase: [][]string{[]string{"one"}, []string{"five"}},
367			slop:   2,
368		},
369		// should match
370		{
371			phrase: [][]string{[]string{"one"}, []string{"five"}},
372			slop:   3,
373			paths: []phrasePath{
374				phrasePath{
375					&phrasePart{"one", &search.Location{Pos: 1}},
376					&phrasePart{"five", &search.Location{Pos: 5}},
377				},
378			},
379		},
380		// slop 0 finds exact match
381		{
382			phrase: [][]string{[]string{"four"}, []string{"five"}},
383			slop:   0,
384			paths: []phrasePath{
385				phrasePath{
386					&phrasePart{"four", &search.Location{Pos: 4}},
387					&phrasePart{"five", &search.Location{Pos: 5}},
388				},
389			},
390		},
391		// slop 0 does not find exact match (reversed)
392		{
393			phrase: [][]string{[]string{"two"}, []string{"one"}},
394			slop:   0,
395		},
396		// slop 1 finds exact match
397		{
398			phrase: [][]string{[]string{"one"}, []string{"two"}},
399			slop:   1,
400			paths: []phrasePath{
401				phrasePath{
402					&phrasePart{"one", &search.Location{Pos: 1}},
403					&phrasePart{"two", &search.Location{Pos: 2}},
404				},
405			},
406		},
407		// slop 1 *still* does not find exact match (reversed) requires at least 2
408		{
409			phrase: [][]string{[]string{"two"}, []string{"one"}},
410			slop:   1,
411		},
412		// slop 2 does finds exact match reversed
413		{
414			phrase: [][]string{[]string{"two"}, []string{"one"}},
415			slop:   2,
416			paths: []phrasePath{
417				phrasePath{
418					&phrasePart{"two", &search.Location{Pos: 2}},
419					&phrasePart{"one", &search.Location{Pos: 1}},
420				},
421			},
422		},
423		// slop 2 not enough for this
424		{
425			phrase: [][]string{[]string{"three"}, []string{"one"}},
426			slop:   2,
427		},
428		// slop should be cumulative
429		{
430			phrase: [][]string{[]string{"one"}, []string{"three"}, []string{"five"}},
431			slop:   2,
432			paths: []phrasePath{
433				phrasePath{
434					&phrasePart{"one", &search.Location{Pos: 1}},
435					&phrasePart{"three", &search.Location{Pos: 3}},
436					&phrasePart{"five", &search.Location{Pos: 5}},
437				},
438			},
439		},
440		// should require 6
441		{
442			phrase: [][]string{[]string{"five"}, []string{"three"}, []string{"one"}},
443			slop:   5,
444		},
445		// so lets try 6
446		{
447			phrase: [][]string{[]string{"five"}, []string{"three"}, []string{"one"}},
448			slop:   6,
449			paths: []phrasePath{
450				phrasePath{
451					&phrasePart{"five", &search.Location{Pos: 5}},
452					&phrasePart{"three", &search.Location{Pos: 3}},
453					&phrasePart{"one", &search.Location{Pos: 1}},
454				},
455			},
456		},
457	}
458
459	for i, test := range tests {
460		actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, test.slop)
461		if !reflect.DeepEqual(actualPaths, test.paths) {
462			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
463		}
464	}
465}
466
467func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
468	tlm := search.TermLocationMap{
469		"one": search.Locations{
470			&search.Location{
471				Pos: 1,
472			},
473			&search.Location{
474				Pos: 5,
475			},
476		},
477		"two": search.Locations{
478			&search.Location{
479				Pos: 2,
480			},
481			&search.Location{
482				Pos: 4,
483			},
484		},
485		"three": search.Locations{
486			&search.Location{
487				Pos: 3,
488			},
489		},
490	}
491
492	tests := []struct {
493		phrase [][]string
494		paths  []phrasePath
495		slop   int
496	}{
497		// search non palyndrone, exact match
498		{
499			phrase: [][]string{[]string{"two"}, []string{"three"}},
500			slop:   0,
501			paths: []phrasePath{
502				phrasePath{
503					&phrasePart{"two", &search.Location{Pos: 2}},
504					&phrasePart{"three", &search.Location{Pos: 3}},
505				},
506			},
507		},
508		// same with slop 2 (not required) (find it twice)
509		{
510			phrase: [][]string{[]string{"two"}, []string{"three"}},
511			slop:   2,
512			paths: []phrasePath{
513				phrasePath{
514					&phrasePart{"two", &search.Location{Pos: 2}},
515					&phrasePart{"three", &search.Location{Pos: 3}},
516				},
517				phrasePath{
518					&phrasePart{"two", &search.Location{Pos: 4}},
519					&phrasePart{"three", &search.Location{Pos: 3}},
520				},
521			},
522		},
523		// palyndrone reversed
524		{
525			phrase: [][]string{[]string{"three"}, []string{"two"}},
526			slop:   2,
527			paths: []phrasePath{
528				phrasePath{
529					&phrasePart{"three", &search.Location{Pos: 3}},
530					&phrasePart{"two", &search.Location{Pos: 2}},
531				},
532				phrasePath{
533					&phrasePart{"three", &search.Location{Pos: 3}},
534					&phrasePart{"two", &search.Location{Pos: 4}},
535				},
536			},
537		},
538	}
539
540	for i, test := range tests {
541		actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, test.slop)
542		if !reflect.DeepEqual(actualPaths, test.paths) {
543			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
544		}
545	}
546}
547
548func TestFindMultiPhrasePaths(t *testing.T) {
549
550	tlm := search.TermLocationMap{
551		"cat": search.Locations{
552			&search.Location{
553				Pos: 1,
554			},
555		},
556		"dog": search.Locations{
557			&search.Location{
558				Pos: 2,
559			},
560		},
561		"frog": search.Locations{
562			&search.Location{
563				Pos: 3,
564			},
565		},
566	}
567
568	tests := []struct {
569		phrase [][]string
570		paths  []phrasePath
571	}{
572		// simplest, one of two possible terms matches
573		{
574			phrase: [][]string{[]string{"cat", "rat"}, []string{"dog"}},
575			paths: []phrasePath{
576				phrasePath{
577					&phrasePart{"cat", &search.Location{Pos: 1}},
578					&phrasePart{"dog", &search.Location{Pos: 2}},
579				},
580			},
581		},
582		// two possible terms, neither work
583		{
584			phrase: [][]string{[]string{"cat", "rat"}, []string{"chicken"}},
585		},
586		// two possible terms, one works, but out of position with next
587		{
588			phrase: [][]string{[]string{"cat", "rat"}, []string{"frog"}},
589		},
590		// matches multiple times, with different pairing
591		{
592			phrase: [][]string{[]string{"cat", "dog"}, []string{"dog", "frog"}},
593			paths: []phrasePath{
594				phrasePath{
595					&phrasePart{"cat", &search.Location{Pos: 1}},
596					&phrasePart{"dog", &search.Location{Pos: 2}},
597				},
598				phrasePath{
599					&phrasePart{"dog", &search.Location{Pos: 2}},
600					&phrasePart{"frog", &search.Location{Pos: 3}},
601				},
602			},
603		},
604		// multi-match over a gap
605		{
606			phrase: [][]string{[]string{"cat", "rat"}, []string{""}, []string{"frog"}},
607			paths: []phrasePath{
608				phrasePath{
609					&phrasePart{"cat", &search.Location{Pos: 1}},
610					&phrasePart{"frog", &search.Location{Pos: 3}},
611				},
612			},
613		},
614		// multi-match over a gap (same as before, but with empty term list)
615		{
616			phrase: [][]string{[]string{"cat", "rat"}, []string{}, []string{"frog"}},
617			paths: []phrasePath{
618				phrasePath{
619					&phrasePart{"cat", &search.Location{Pos: 1}},
620					&phrasePart{"frog", &search.Location{Pos: 3}},
621				},
622			},
623		},
624		// multi-match over a gap (same once again, but nil term list)
625		{
626			phrase: [][]string{[]string{"cat", "rat"}, nil, []string{"frog"}},
627			paths: []phrasePath{
628				phrasePath{
629					&phrasePart{"cat", &search.Location{Pos: 1}},
630					&phrasePart{"frog", &search.Location{Pos: 3}},
631				},
632			},
633		},
634	}
635
636	for i, test := range tests {
637		actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, 0)
638		if !reflect.DeepEqual(actualPaths, test.paths) {
639			t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
640		}
641	}
642}
643