1//  Copyright (c) 2014 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package test
16
17import (
18	"bytes"
19	"encoding/json"
20	"fmt"
21	"math"
22	"math/rand"
23	"os"
24	"reflect"
25	"strconv"
26	"strings"
27	"testing"
28	"text/template"
29
30	"github.com/blevesearch/bleve"
31	"github.com/blevesearch/bleve/index/scorch"
32	"github.com/blevesearch/bleve/index/store/boltdb"
33	"github.com/blevesearch/bleve/index/upsidedown"
34	"github.com/blevesearch/bleve/mapping"
35	"github.com/blevesearch/bleve/search"
36)
37
38// Tests scorch indexer versus upsidedown/bolt indexer against various
39// templated queries.  Example usage from the bleve top-level directory...
40//
41//     go test -v -run TestScorchVersusUpsideDownBolt ./test
42//     VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test
43//
44func TestScorchVersusUpsideDownBoltAll(t *testing.T) {
45	(&VersusTest{
46		t:                    t,
47		NumDocs:              1000,
48		MaxWordsPerDoc:       20,
49		NumWords:             10,
50		BatchSize:            10,
51		NumAttemptsPerSearch: 100,
52	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
53}
54
55func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) {
56	(&VersusTest{
57		t:                    t,
58		Focus:                "must-not-same-as-must",
59		NumDocs:              5,
60		MaxWordsPerDoc:       2,
61		NumWords:             1,
62		BatchSize:            1,
63		NumAttemptsPerSearch: 1,
64	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
65}
66
67func TestScorchVersusUpsideDownBoltSmallCMP11(t *testing.T) {
68	(&VersusTest{
69		t:                    t,
70		Focus:                "conjuncts-match-phrase-1-1",
71		NumDocs:              30,
72		MaxWordsPerDoc:       8,
73		NumWords:             2,
74		BatchSize:            1,
75		NumAttemptsPerSearch: 1,
76	}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
77}
78
79// -------------------------------------------------------
80
81// Templates used to compare search results in the "versus" tests.
82var testVersusSearchTemplates = []string{
83	`{
84      "about": "expected to return zero hits",
85      "query": {
86       "query": "title:notARealTitle"
87      }
88     }`,
89	`{
90      "about": "try straight word()'s",
91      "query": {
92       "query": "body:{{word}}"
93      }
94     }`,
95	`{
96      "about": "conjuncts on same term",
97      "query": {
98        "conjuncts": [
99          { "field": "body", "term": "{{word}}", "boost": 1.0 },
100          { "field": "body", "term": "{{word}}", "boost": 1.0 }
101        ]
102      }
103     }`,
104	`{
105      "about": "disjuncts on same term",
106      "query": {
107        "disjuncts": [
108          { "field": "body", "term": "{{word}}", "boost": 1.0 },
109          { "field": "body", "term": "{{word}}", "boost": 1.0 }
110        ]
111      }
112     }`,
113	`{
114      "about": "never-matching-title-conjuncts",
115      "query": {
116        "conjuncts": [
117          {"field": "body", "match": "{{word}}"},
118          {"field": "body", "match": "{{word}}"},
119          {"field": "title", "match": "notAnActualTitle"}
120        ]
121      }
122     }`,
123	`{
124      "about": "never-matching-title-disjuncts",
125      "query": {
126        "disjuncts": [
127          {"field": "body", "match": "{{word}}"},
128          {"field": "body", "match": "{{word}}"},
129          {"field": "title", "match": "notAnActualTitle"}
130        ]
131      }
132     }`,
133	`{
134      "about": "must-not-never-matches",
135      "query": {
136        "must_not": {"disjuncts": [
137          {"field": "title", "match": "notAnActualTitle"}
138        ]},
139        "should": {"disjuncts": [
140          {"field": "body", "match": "{{word}}"}
141        ]}
142      }
143     }`,
144	`{
145      "about": "must-not-only",
146      "query": {
147        "must_not": {"disjuncts": [
148          {"field": "body", "term": "{{word}}"}
149        ]}
150      }
151     }`,
152	`{
153      "about": "must-not-same-as-must -- see: MB-27291",
154      "query": {
155        "must_not": {"disjuncts": [
156          {"field": "body", "match": "{{word}}"}
157        ]},
158        "must": {"conjuncts": [
159          {"field": "body", "match": "{{word}}"}
160        ]}
161      }
162     }`,
163	`{
164      "about": "must-not-same-as-should",
165      "query": {
166        "must_not": {"disjuncts": [
167          {"field": "body", "match": "{{word}}"}
168        ]},
169        "should": {"disjuncts": [
170          {"field": "body", "match": "{{word}}"}
171        ]}
172      }
173     }`,
174	`{
175      "about": "inspired by testrunner RQG issue -- see: MB-27291",
176      "query": {
177        "must_not": {"disjuncts": [
178          {"field": "title", "match": "Trista Allen"},
179          {"field": "body", "match": "{{word}}"}
180        ]},
181        "should": {"disjuncts": [
182          {"field": "title", "match": "Kallie Safiya Amara"},
183          {"field": "body", "match": "{{word}}"}
184        ]}
185      }
186     }`,
187	`{
188      "about": "conjuncts-match-phrase-1-1 inspired by testrunner RQG issue -- see: MB-27291",
189      "query": {
190        "conjuncts": [
191          {"field": "body", "match": "{{bodyWord 0}}"},
192          {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 1}}"}
193        ]
194      }
195     }`,
196	`{
197      "about": "conjuncts-match-phrase-1-2 inspired by testrunner RQG issue -- see: MB-27291 -- FAILS!!",
198      "query": {
199        "conjuncts": [
200          {"field": "body", "match": "{{bodyWord 0}}"},
201          {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 2}}"}
202        ]
203      }
204     }`,
205}
206
207// -------------------------------------------------------
208
209type VersusTest struct {
210	t *testing.T
211
212	// Use environment variable VERBOSE=<integer> that's > 0 for more
213	// verbose output.
214	Verbose int
215
216	// Allow user to focus on particular search templates, where
217	// where the search template must contain the Focus string.
218	Focus string
219
220	NumDocs              int // Number of docs to insert.
221	MaxWordsPerDoc       int // Max number words in each doc's Body field.
222	NumWords             int // Total number of words in the dictionary.
223	BatchSize            int // Batch size when inserting docs.
224	NumAttemptsPerSearch int // For each search template, number of searches to try.
225
226	// The Bodies is an array with length NumDocs, where each entry
227	// is the words in a doc's Body field.
228	Bodies [][]string
229
230	CurAttempt  int
231	TotAttempts int
232}
233
234// -------------------------------------------------------
235
236func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB bleve.Index) {
237	t := vt.t
238
239	funcMap := template.FuncMap{
240		// Returns a word.  The word may or may not be in any
241		// document's body.
242		"word": func() string {
243			return vt.genWord(vt.CurAttempt % vt.NumWords)
244		},
245		// Picks a document and returns the i'th word in that
246		// document's body.  You can use this in searches to
247		// definitely find at least one document.
248		"bodyWord": func(i int) string {
249			body := vt.Bodies[vt.CurAttempt%len(vt.Bodies)]
250			if len(body) <= 0 {
251				return ""
252			}
253			return body[i%len(body)]
254		},
255	}
256
257	// Optionally allow call to focus on a particular search templates,
258	// where the search template must contain the vt.Focus string.
259	if vt.Focus == "" {
260		vt.Focus = os.Getenv("FOCUS")
261	}
262
263	for i, searchTemplate := range searchTemplates {
264		if vt.Focus != "" && !strings.Contains(searchTemplate, vt.Focus) {
265			continue
266		}
267
268		tmpl, err := template.New("search").Funcs(funcMap).Parse(searchTemplate)
269		if err != nil {
270			t.Fatalf("could not parse search template: %s, err: %v", searchTemplate, err)
271		}
272
273		for j := 0; j < vt.NumAttemptsPerSearch; j++ {
274			vt.CurAttempt = j
275
276			var buf bytes.Buffer
277			err = tmpl.Execute(&buf, vt)
278			if err != nil {
279				t.Fatalf("could not execute search template: %s, err: %v", searchTemplate, err)
280			}
281
282			bufBytes := buf.Bytes()
283
284			if vt.Verbose > 0 {
285				fmt.Printf("  %s\n", bufBytes)
286			}
287
288			var search bleve.SearchRequest
289			err = json.Unmarshal(bufBytes, &search)
290			if err != nil {
291				t.Fatalf("could not unmarshal search: %s, err: %v", bufBytes, err)
292			}
293
294			search.Size = vt.NumDocs * 10 // Crank up limit to get all results.
295
296			searchA := search
297			searchB := search
298
299			resA, errA := idxA.Search(&searchA)
300			resB, errB := idxB.Search(&searchB)
301			if errA != errB {
302				t.Errorf("search: (%d) %s,\n err mismatch, errA: %v, errB: %v",
303					i, bufBytes, errA, errB)
304			}
305
306			// Scores might have float64 vs float32 wobbles, so truncate precision.
307			resA.MaxScore = math.Trunc(resA.MaxScore*1000.0) / 1000.0
308			resB.MaxScore = math.Trunc(resB.MaxScore*1000.0) / 1000.0
309
310			// Timings may be different between A & B, so force equality.
311			resA.Took = resB.Took
312
313			// Hits might have different ordering since some indexers
314			// (like upsidedown) have a natural secondary sort on id
315			// while others (like scorch) don't.  So, we compare by
316			// putting the hits from A & B into maps.
317			hitsA := hitsById(resA)
318			hitsB := hitsById(resB)
319			if !reflect.DeepEqual(hitsA, hitsB) {
320				t.Errorf("=========\nsearch: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d",
321					i, bufBytes, len(hitsA), len(hitsB))
322				t.Errorf("\n  hitsA: %#v,\n  hitsB: %#v",
323					hitsA, hitsB)
324				for id, hitA := range hitsA {
325					hitB := hitsB[id]
326					if !reflect.DeepEqual(hitA, hitB) {
327						t.Errorf("\n  driving from hitsA\n    hitA: %#v,\n    hitB: %#v", hitA, hitB)
328						idx, _ := strconv.Atoi(id)
329						t.Errorf("\n    doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
330					}
331				}
332				for id, hitB := range hitsB {
333					hitA := hitsA[id]
334					if !reflect.DeepEqual(hitA, hitB) {
335						t.Errorf("\n  driving from hitsB\n    hitA: %#v,\n    hitB: %#v", hitA, hitB)
336						idx, _ := strconv.Atoi(id)
337						t.Errorf("\n    doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
338					}
339				}
340			}
341
342			resA.Hits = nil
343			resB.Hits = nil
344
345			if !reflect.DeepEqual(resA, resB) {
346				resAj, _ := json.Marshal(resA)
347				resBj, _ := json.Marshal(resB)
348				t.Errorf("search: (%d) %s,\n  res mismatch,\n  resA: %s,\n  resB: %s",
349					i, bufBytes, resAj, resBj)
350			}
351
352			if vt.Verbose > 0 {
353				fmt.Printf("  Total: (%t) %d\n", resA.Total == resB.Total, resA.Total)
354			}
355
356			vt.TotAttempts++
357		}
358	}
359}
360
361// Organizes the hits into a map keyed by id.
362func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch {
363	rv := make(map[string]*search.DocumentMatch, len(res.Hits))
364
365	for _, hit := range res.Hits {
366		// Clear out or truncate precision of hit fields that might be
367		// different across different indexer implementations.
368		hit.Index = ""
369		hit.Score = math.Trunc(hit.Score*1000.0) / 1000.0
370		hit.IndexInternalID = nil
371		hit.HitNumber = 0
372
373		rv[hit.ID] = hit
374	}
375
376	return rv
377}
378
379// -------------------------------------------------------
380
381func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string,
382	cb func(versusTest *VersusTest, searchTemplates []string, idxA, idxB bleve.Index),
383	searchTemplates []string) {
384	if cb == nil {
385		cb = testVersusSearches
386	}
387
388	if searchTemplates == nil {
389		searchTemplates = testVersusSearchTemplates
390	}
391
392	if vt.Verbose <= 0 {
393		vt.Verbose, _ = strconv.Atoi(os.Getenv("VERBOSE"))
394	}
395
396	dirA := "/tmp/bleve-versus-test-a"
397	dirB := "/tmp/bleve-versus-test-b"
398
399	defer func() {
400		_ = os.RemoveAll(dirA)
401		_ = os.RemoveAll(dirB)
402	}()
403
404	_ = os.RemoveAll(dirA)
405	_ = os.RemoveAll(dirB)
406
407	imA := vt.makeIndexMapping()
408	imB := vt.makeIndexMapping()
409
410	kvConfigA := map[string]interface{}{}
411	kvConfigB := map[string]interface{}{}
412
413	idxA, err := bleve.NewUsing(dirA, imA, indexTypeA, kvStoreA, kvConfigA)
414	if err != nil || idxA == nil {
415		vt.t.Fatalf("new using err: %v", err)
416	}
417	defer func() { _ = idxA.Close() }()
418
419	idxB, err := bleve.NewUsing(dirB, imB, indexTypeB, kvStoreB, kvConfigB)
420	if err != nil || idxB == nil {
421		vt.t.Fatalf("new using err: %v", err)
422	}
423	defer func() { _ = idxB.Close() }()
424
425	rand.Seed(0)
426
427	if vt.Bodies == nil {
428		vt.Bodies = vt.genBodies()
429	}
430
431	vt.insertBodies(idxA)
432	vt.insertBodies(idxB)
433
434	cb(vt, searchTemplates, idxA, idxB)
435}
436
437// -------------------------------------------------------
438
439func (vt *VersusTest) makeIndexMapping() mapping.IndexMapping {
440	standardFM := bleve.NewTextFieldMapping()
441	standardFM.Store = false
442	standardFM.IncludeInAll = false
443	standardFM.IncludeTermVectors = true
444	standardFM.Analyzer = "standard"
445
446	dm := bleve.NewDocumentMapping()
447	dm.AddFieldMappingsAt("title", standardFM)
448	dm.AddFieldMappingsAt("body", standardFM)
449
450	im := bleve.NewIndexMapping()
451	im.DefaultMapping = dm
452	im.DefaultAnalyzer = "standard"
453
454	return im
455}
456
457func (vt *VersusTest) insertBodies(idx bleve.Index) {
458	batch := idx.NewBatch()
459	for i, bodyWords := range vt.Bodies {
460		title := fmt.Sprintf("%d", i)
461		body := strings.Join(bodyWords, " ")
462		err := batch.Index(title, map[string]interface{}{"title": title, "body": body})
463		if err != nil {
464			vt.t.Fatalf("batch.Index err: %v", err)
465		}
466		if i%vt.BatchSize == 0 {
467			err = idx.Batch(batch)
468			if err != nil {
469				vt.t.Fatalf("batch err: %v", err)
470			}
471			batch.Reset()
472		}
473	}
474	err := idx.Batch(batch)
475	if err != nil {
476		vt.t.Fatalf("last batch err: %v", err)
477	}
478}
479
480func (vt *VersusTest) genBodies() (rv [][]string) {
481	for i := 0; i < vt.NumDocs; i++ {
482		rv = append(rv, vt.genBody())
483	}
484	return rv
485}
486
487func (vt *VersusTest) genBody() (rv []string) {
488	m := rand.Intn(vt.MaxWordsPerDoc)
489	for j := 0; j < m; j++ {
490		rv = append(rv, vt.genWord(rand.Intn(vt.NumWords)))
491	}
492	return rv
493}
494
495func (vt *VersusTest) genWord(i int) string {
496	return fmt.Sprintf("%x", i)
497}
498