1// Copyright (c) 2014 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package test 16 17import ( 18 "bytes" 19 "encoding/json" 20 "fmt" 21 "math" 22 "math/rand" 23 "os" 24 "reflect" 25 "strconv" 26 "strings" 27 "testing" 28 "text/template" 29 30 "github.com/blevesearch/bleve" 31 "github.com/blevesearch/bleve/index/scorch" 32 "github.com/blevesearch/bleve/index/store/boltdb" 33 "github.com/blevesearch/bleve/index/upsidedown" 34 "github.com/blevesearch/bleve/mapping" 35 "github.com/blevesearch/bleve/search" 36) 37 38// Tests scorch indexer versus upsidedown/bolt indexer against various 39// templated queries. Example usage from the bleve top-level directory... 40// 41// go test -v -run TestScorchVersusUpsideDownBolt ./test 42// VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test 43// 44func TestScorchVersusUpsideDownBoltAll(t *testing.T) { 45 (&VersusTest{ 46 t: t, 47 NumDocs: 1000, 48 MaxWordsPerDoc: 20, 49 NumWords: 10, 50 BatchSize: 10, 51 NumAttemptsPerSearch: 100, 52 }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil) 53} 54 55func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) { 56 (&VersusTest{ 57 t: t, 58 Focus: "must-not-same-as-must", 59 NumDocs: 5, 60 MaxWordsPerDoc: 2, 61 NumWords: 1, 62 BatchSize: 1, 63 NumAttemptsPerSearch: 1, 64 }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil) 65} 66 67func TestScorchVersusUpsideDownBoltSmallCMP11(t *testing.T) { 68 (&VersusTest{ 69 t: t, 70 Focus: "conjuncts-match-phrase-1-1", 71 NumDocs: 30, 72 MaxWordsPerDoc: 8, 73 NumWords: 2, 74 BatchSize: 1, 75 NumAttemptsPerSearch: 1, 76 }).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil) 77} 78 79// ------------------------------------------------------- 80 81// Templates used to compare search results in the "versus" tests. 82var testVersusSearchTemplates = []string{ 83 `{ 84 "about": "expected to return zero hits", 85 "query": { 86 "query": "title:notARealTitle" 87 } 88 }`, 89 `{ 90 "about": "try straight word()'s", 91 "query": { 92 "query": "body:{{word}}" 93 } 94 }`, 95 `{ 96 "about": "conjuncts on same term", 97 "query": { 98 "conjuncts": [ 99 { "field": "body", "term": "{{word}}", "boost": 1.0 }, 100 { "field": "body", "term": "{{word}}", "boost": 1.0 } 101 ] 102 } 103 }`, 104 `{ 105 "about": "disjuncts on same term", 106 "query": { 107 "disjuncts": [ 108 { "field": "body", "term": "{{word}}", "boost": 1.0 }, 109 { "field": "body", "term": "{{word}}", "boost": 1.0 } 110 ] 111 } 112 }`, 113 `{ 114 "about": "never-matching-title-conjuncts", 115 "query": { 116 "conjuncts": [ 117 {"field": "body", "match": "{{word}}"}, 118 {"field": "body", "match": "{{word}}"}, 119 {"field": "title", "match": "notAnActualTitle"} 120 ] 121 } 122 }`, 123 `{ 124 "about": "never-matching-title-disjuncts", 125 "query": { 126 "disjuncts": [ 127 {"field": "body", "match": "{{word}}"}, 128 {"field": "body", "match": "{{word}}"}, 129 {"field": "title", "match": "notAnActualTitle"} 130 ] 131 } 132 }`, 133 `{ 134 "about": "must-not-never-matches", 135 "query": { 136 "must_not": {"disjuncts": [ 137 {"field": "title", "match": "notAnActualTitle"} 138 ]}, 139 "should": {"disjuncts": [ 140 {"field": "body", "match": "{{word}}"} 141 ]} 142 } 143 }`, 144 `{ 145 "about": "must-not-only", 146 "query": { 147 "must_not": {"disjuncts": [ 148 {"field": "body", "term": "{{word}}"} 149 ]} 150 } 151 }`, 152 `{ 153 "about": "must-not-same-as-must -- see: MB-27291", 154 "query": { 155 "must_not": {"disjuncts": [ 156 {"field": "body", "match": "{{word}}"} 157 ]}, 158 "must": {"conjuncts": [ 159 {"field": "body", "match": "{{word}}"} 160 ]} 161 } 162 }`, 163 `{ 164 "about": "must-not-same-as-should", 165 "query": { 166 "must_not": {"disjuncts": [ 167 {"field": "body", "match": "{{word}}"} 168 ]}, 169 "should": {"disjuncts": [ 170 {"field": "body", "match": "{{word}}"} 171 ]} 172 } 173 }`, 174 `{ 175 "about": "inspired by testrunner RQG issue -- see: MB-27291", 176 "query": { 177 "must_not": {"disjuncts": [ 178 {"field": "title", "match": "Trista Allen"}, 179 {"field": "body", "match": "{{word}}"} 180 ]}, 181 "should": {"disjuncts": [ 182 {"field": "title", "match": "Kallie Safiya Amara"}, 183 {"field": "body", "match": "{{word}}"} 184 ]} 185 } 186 }`, 187 `{ 188 "about": "conjuncts-match-phrase-1-1 inspired by testrunner RQG issue -- see: MB-27291", 189 "query": { 190 "conjuncts": [ 191 {"field": "body", "match": "{{bodyWord 0}}"}, 192 {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 1}}"} 193 ] 194 } 195 }`, 196 `{ 197 "about": "conjuncts-match-phrase-1-2 inspired by testrunner RQG issue -- see: MB-27291 -- FAILS!!", 198 "query": { 199 "conjuncts": [ 200 {"field": "body", "match": "{{bodyWord 0}}"}, 201 {"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 2}}"} 202 ] 203 } 204 }`, 205} 206 207// ------------------------------------------------------- 208 209type VersusTest struct { 210 t *testing.T 211 212 // Use environment variable VERBOSE=<integer> that's > 0 for more 213 // verbose output. 214 Verbose int 215 216 // Allow user to focus on particular search templates, where 217 // where the search template must contain the Focus string. 218 Focus string 219 220 NumDocs int // Number of docs to insert. 221 MaxWordsPerDoc int // Max number words in each doc's Body field. 222 NumWords int // Total number of words in the dictionary. 223 BatchSize int // Batch size when inserting docs. 224 NumAttemptsPerSearch int // For each search template, number of searches to try. 225 226 // The Bodies is an array with length NumDocs, where each entry 227 // is the words in a doc's Body field. 228 Bodies [][]string 229 230 CurAttempt int 231 TotAttempts int 232} 233 234// ------------------------------------------------------- 235 236func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB bleve.Index) { 237 t := vt.t 238 239 funcMap := template.FuncMap{ 240 // Returns a word. The word may or may not be in any 241 // document's body. 242 "word": func() string { 243 return vt.genWord(vt.CurAttempt % vt.NumWords) 244 }, 245 // Picks a document and returns the i'th word in that 246 // document's body. You can use this in searches to 247 // definitely find at least one document. 248 "bodyWord": func(i int) string { 249 body := vt.Bodies[vt.CurAttempt%len(vt.Bodies)] 250 if len(body) <= 0 { 251 return "" 252 } 253 return body[i%len(body)] 254 }, 255 } 256 257 // Optionally allow call to focus on a particular search templates, 258 // where the search template must contain the vt.Focus string. 259 if vt.Focus == "" { 260 vt.Focus = os.Getenv("FOCUS") 261 } 262 263 for i, searchTemplate := range searchTemplates { 264 if vt.Focus != "" && !strings.Contains(searchTemplate, vt.Focus) { 265 continue 266 } 267 268 tmpl, err := template.New("search").Funcs(funcMap).Parse(searchTemplate) 269 if err != nil { 270 t.Fatalf("could not parse search template: %s, err: %v", searchTemplate, err) 271 } 272 273 for j := 0; j < vt.NumAttemptsPerSearch; j++ { 274 vt.CurAttempt = j 275 276 var buf bytes.Buffer 277 err = tmpl.Execute(&buf, vt) 278 if err != nil { 279 t.Fatalf("could not execute search template: %s, err: %v", searchTemplate, err) 280 } 281 282 bufBytes := buf.Bytes() 283 284 if vt.Verbose > 0 { 285 fmt.Printf(" %s\n", bufBytes) 286 } 287 288 var search bleve.SearchRequest 289 err = json.Unmarshal(bufBytes, &search) 290 if err != nil { 291 t.Fatalf("could not unmarshal search: %s, err: %v", bufBytes, err) 292 } 293 294 search.Size = vt.NumDocs * 10 // Crank up limit to get all results. 295 296 searchA := search 297 searchB := search 298 299 resA, errA := idxA.Search(&searchA) 300 resB, errB := idxB.Search(&searchB) 301 if errA != errB { 302 t.Errorf("search: (%d) %s,\n err mismatch, errA: %v, errB: %v", 303 i, bufBytes, errA, errB) 304 } 305 306 // Scores might have float64 vs float32 wobbles, so truncate precision. 307 resA.MaxScore = math.Trunc(resA.MaxScore*1000.0) / 1000.0 308 resB.MaxScore = math.Trunc(resB.MaxScore*1000.0) / 1000.0 309 310 // Timings may be different between A & B, so force equality. 311 resA.Took = resB.Took 312 313 // Hits might have different ordering since some indexers 314 // (like upsidedown) have a natural secondary sort on id 315 // while others (like scorch) don't. So, we compare by 316 // putting the hits from A & B into maps. 317 hitsA := hitsById(resA) 318 hitsB := hitsById(resB) 319 if !reflect.DeepEqual(hitsA, hitsB) { 320 t.Errorf("=========\nsearch: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d", 321 i, bufBytes, len(hitsA), len(hitsB)) 322 t.Errorf("\n hitsA: %#v,\n hitsB: %#v", 323 hitsA, hitsB) 324 for id, hitA := range hitsA { 325 hitB := hitsB[id] 326 if !reflect.DeepEqual(hitA, hitB) { 327 t.Errorf("\n driving from hitsA\n hitA: %#v,\n hitB: %#v", hitA, hitB) 328 idx, _ := strconv.Atoi(id) 329 t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " ")) 330 } 331 } 332 for id, hitB := range hitsB { 333 hitA := hitsA[id] 334 if !reflect.DeepEqual(hitA, hitB) { 335 t.Errorf("\n driving from hitsB\n hitA: %#v,\n hitB: %#v", hitA, hitB) 336 idx, _ := strconv.Atoi(id) 337 t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " ")) 338 } 339 } 340 } 341 342 resA.Hits = nil 343 resB.Hits = nil 344 345 if !reflect.DeepEqual(resA, resB) { 346 resAj, _ := json.Marshal(resA) 347 resBj, _ := json.Marshal(resB) 348 t.Errorf("search: (%d) %s,\n res mismatch,\n resA: %s,\n resB: %s", 349 i, bufBytes, resAj, resBj) 350 } 351 352 if vt.Verbose > 0 { 353 fmt.Printf(" Total: (%t) %d\n", resA.Total == resB.Total, resA.Total) 354 } 355 356 vt.TotAttempts++ 357 } 358 } 359} 360 361// Organizes the hits into a map keyed by id. 362func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch { 363 rv := make(map[string]*search.DocumentMatch, len(res.Hits)) 364 365 for _, hit := range res.Hits { 366 // Clear out or truncate precision of hit fields that might be 367 // different across different indexer implementations. 368 hit.Index = "" 369 hit.Score = math.Trunc(hit.Score*1000.0) / 1000.0 370 hit.IndexInternalID = nil 371 hit.HitNumber = 0 372 373 rv[hit.ID] = hit 374 } 375 376 return rv 377} 378 379// ------------------------------------------------------- 380 381func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string, 382 cb func(versusTest *VersusTest, searchTemplates []string, idxA, idxB bleve.Index), 383 searchTemplates []string) { 384 if cb == nil { 385 cb = testVersusSearches 386 } 387 388 if searchTemplates == nil { 389 searchTemplates = testVersusSearchTemplates 390 } 391 392 if vt.Verbose <= 0 { 393 vt.Verbose, _ = strconv.Atoi(os.Getenv("VERBOSE")) 394 } 395 396 dirA := "/tmp/bleve-versus-test-a" 397 dirB := "/tmp/bleve-versus-test-b" 398 399 defer func() { 400 _ = os.RemoveAll(dirA) 401 _ = os.RemoveAll(dirB) 402 }() 403 404 _ = os.RemoveAll(dirA) 405 _ = os.RemoveAll(dirB) 406 407 imA := vt.makeIndexMapping() 408 imB := vt.makeIndexMapping() 409 410 kvConfigA := map[string]interface{}{} 411 kvConfigB := map[string]interface{}{} 412 413 idxA, err := bleve.NewUsing(dirA, imA, indexTypeA, kvStoreA, kvConfigA) 414 if err != nil || idxA == nil { 415 vt.t.Fatalf("new using err: %v", err) 416 } 417 defer func() { _ = idxA.Close() }() 418 419 idxB, err := bleve.NewUsing(dirB, imB, indexTypeB, kvStoreB, kvConfigB) 420 if err != nil || idxB == nil { 421 vt.t.Fatalf("new using err: %v", err) 422 } 423 defer func() { _ = idxB.Close() }() 424 425 rand.Seed(0) 426 427 if vt.Bodies == nil { 428 vt.Bodies = vt.genBodies() 429 } 430 431 vt.insertBodies(idxA) 432 vt.insertBodies(idxB) 433 434 cb(vt, searchTemplates, idxA, idxB) 435} 436 437// ------------------------------------------------------- 438 439func (vt *VersusTest) makeIndexMapping() mapping.IndexMapping { 440 standardFM := bleve.NewTextFieldMapping() 441 standardFM.Store = false 442 standardFM.IncludeInAll = false 443 standardFM.IncludeTermVectors = true 444 standardFM.Analyzer = "standard" 445 446 dm := bleve.NewDocumentMapping() 447 dm.AddFieldMappingsAt("title", standardFM) 448 dm.AddFieldMappingsAt("body", standardFM) 449 450 im := bleve.NewIndexMapping() 451 im.DefaultMapping = dm 452 im.DefaultAnalyzer = "standard" 453 454 return im 455} 456 457func (vt *VersusTest) insertBodies(idx bleve.Index) { 458 batch := idx.NewBatch() 459 for i, bodyWords := range vt.Bodies { 460 title := fmt.Sprintf("%d", i) 461 body := strings.Join(bodyWords, " ") 462 err := batch.Index(title, map[string]interface{}{"title": title, "body": body}) 463 if err != nil { 464 vt.t.Fatalf("batch.Index err: %v", err) 465 } 466 if i%vt.BatchSize == 0 { 467 err = idx.Batch(batch) 468 if err != nil { 469 vt.t.Fatalf("batch err: %v", err) 470 } 471 batch.Reset() 472 } 473 } 474 err := idx.Batch(batch) 475 if err != nil { 476 vt.t.Fatalf("last batch err: %v", err) 477 } 478} 479 480func (vt *VersusTest) genBodies() (rv [][]string) { 481 for i := 0; i < vt.NumDocs; i++ { 482 rv = append(rv, vt.genBody()) 483 } 484 return rv 485} 486 487func (vt *VersusTest) genBody() (rv []string) { 488 m := rand.Intn(vt.MaxWordsPerDoc) 489 for j := 0; j < m; j++ { 490 rv = append(rv, vt.genWord(rand.Intn(vt.NumWords))) 491 } 492 return rv 493} 494 495func (vt *VersusTest) genWord(i int) string { 496 return fmt.Sprintf("%x", i) 497} 498