1// Copyright (c) 2013 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package searcher 16 17import ( 18 "reflect" 19 "testing" 20 21 "github.com/blevesearch/bleve/index" 22 "github.com/blevesearch/bleve/search" 23) 24 25func TestPhraseSearch(t *testing.T) { 26 27 twoDocIndexReader, err := twoDocIndex.Reader() 28 if err != nil { 29 t.Error(err) 30 } 31 defer func() { 32 err := twoDocIndexReader.Close() 33 if err != nil { 34 t.Fatal(err) 35 } 36 }() 37 38 soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true} 39 phraseSearcher, err := NewPhraseSearcher(twoDocIndexReader, []string{"angst", "beer"}, "desc", soptions) 40 if err != nil { 41 t.Fatal(err) 42 } 43 44 tests := []struct { 45 searcher search.Searcher 46 results []*search.DocumentMatch 47 locations map[string]map[string][]search.Location 48 fieldterms [][2]string 49 }{ 50 { 51 searcher: phraseSearcher, 52 results: []*search.DocumentMatch{ 53 { 54 IndexInternalID: index.IndexInternalID("2"), 55 Score: 1.0807601687084403, 56 }, 57 }, 58 locations: map[string]map[string][]search.Location{"desc": map[string][]search.Location{"beer": []search.Location{search.Location{Pos: 2, Start: 6, End: 10}}, "angst": []search.Location{search.Location{Pos: 1, Start: 0, End: 5}}}}, 59 fieldterms: [][2]string{[2]string{"desc", "beer"}, [2]string{"desc", "angst"}}, 60 }, 61 } 62 63 for testIndex, test := range tests { 64 defer func() { 65 err := test.searcher.Close() 66 if err != nil { 67 t.Fatal(err) 68 } 69 }() 70 71 ctx := &search.SearchContext{ 72 DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0), 73 } 74 next, err := test.searcher.Next(ctx) 75 i := 0 76 for err == nil && next != nil { 77 if i < len(test.results) { 78 if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) { 79 t.Errorf("expected result %d to have id %s got %s for test %d\n", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex) 80 } 81 if next.Score != test.results[i].Score { 82 t.Errorf("expected result %d to have score %v got %v for test %d\n", i, test.results[i].Score, next.Score, testIndex) 83 t.Logf("scoring explanation: %s\n", next.Expl) 84 } 85 for _, ft := range test.fieldterms { 86 locs := next.Locations[ft[0]][ft[1]] 87 explocs := test.locations[ft[0]][ft[1]] 88 if len(explocs) != len(locs) { 89 t.Fatalf("expected result %d to have %d Locations (%#v) but got %d (%#v) for test %d with field %q and term %q\n", i, len(explocs), explocs, len(locs), locs, testIndex, ft[0], ft[1]) 90 } 91 for ind, exploc := range explocs { 92 if !reflect.DeepEqual(*locs[ind], exploc) { 93 t.Errorf("expected result %d to have Location %v got %v for test %d\n", i, exploc, locs[ind], testIndex) 94 } 95 } 96 } 97 } 98 99 ctx.DocumentMatchPool.Put(next) 100 next, err = test.searcher.Next(ctx) 101 i++ 102 } 103 if err != nil { 104 t.Fatalf("error iterating searcher: %v for test %d", err, testIndex) 105 } 106 if len(test.results) != i { 107 t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex) 108 } 109 } 110} 111 112func TestMultiPhraseSearch(t *testing.T) { 113 114 soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true} 115 116 tests := []struct { 117 phrase [][]string 118 docids [][]byte 119 }{ 120 { 121 phrase: [][]string{[]string{"angst", "what"}, []string{"beer"}}, 122 docids: [][]byte{[]byte("2")}, 123 }, 124 } 125 126 for i, test := range tests { 127 128 reader, err := twoDocIndex.Reader() 129 if err != nil { 130 t.Error(err) 131 } 132 searcher, err := NewMultiPhraseSearcher(reader, test.phrase, "desc", soptions) 133 if err != nil { 134 t.Error(err) 135 } 136 ctx := &search.SearchContext{ 137 DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0), 138 } 139 next, err := searcher.Next(ctx) 140 var actualIds [][]byte 141 for err == nil && next != nil { 142 actualIds = append(actualIds, next.IndexInternalID) 143 ctx.DocumentMatchPool.Put(next) 144 next, err = searcher.Next(ctx) 145 } 146 if err != nil { 147 t.Fatalf("error iterating searcher: %v for test %d", err, i) 148 } 149 if !reflect.DeepEqual(test.docids, actualIds) { 150 t.Fatalf("expected ids: %v, got %v", test.docids, actualIds) 151 } 152 153 err = searcher.Close() 154 if err != nil { 155 t.Error(err) 156 } 157 158 err = reader.Close() 159 if err != nil { 160 t.Error(err) 161 } 162 } 163} 164 165func TestFindPhrasePaths(t *testing.T) { 166 tests := []struct { 167 phrase [][]string 168 tlm search.TermLocationMap 169 paths []phrasePath 170 }{ 171 // simplest matching case 172 { 173 phrase: [][]string{[]string{"cat"}, []string{"dog"}}, 174 tlm: search.TermLocationMap{ 175 "cat": search.Locations{ 176 &search.Location{ 177 Pos: 1, 178 }, 179 }, 180 "dog": search.Locations{ 181 &search.Location{ 182 Pos: 2, 183 }, 184 }, 185 }, 186 paths: []phrasePath{ 187 phrasePath{ 188 &phrasePart{"cat", &search.Location{Pos: 1}}, 189 &phrasePart{"dog", &search.Location{Pos: 2}}, 190 }, 191 }, 192 }, 193 // second term missing, no match 194 { 195 phrase: [][]string{[]string{"cat"}, []string{"dog"}}, 196 tlm: search.TermLocationMap{ 197 "cat": search.Locations{ 198 &search.Location{ 199 Pos: 1, 200 }, 201 }, 202 }, 203 paths: nil, 204 }, 205 // second term exists but in wrong position 206 { 207 phrase: [][]string{[]string{"cat"}, []string{"dog"}}, 208 tlm: search.TermLocationMap{ 209 "cat": search.Locations{ 210 &search.Location{ 211 Pos: 1, 212 }, 213 }, 214 "dog": search.Locations{ 215 &search.Location{ 216 Pos: 3, 217 }, 218 }, 219 }, 220 paths: nil, 221 }, 222 // matches multiple times 223 { 224 phrase: [][]string{[]string{"cat"}, []string{"dog"}}, 225 tlm: search.TermLocationMap{ 226 "cat": search.Locations{ 227 &search.Location{ 228 Pos: 1, 229 }, 230 &search.Location{ 231 Pos: 8, 232 }, 233 }, 234 "dog": search.Locations{ 235 &search.Location{ 236 Pos: 2, 237 }, 238 &search.Location{ 239 Pos: 9, 240 }, 241 }, 242 }, 243 paths: []phrasePath{ 244 phrasePath{ 245 &phrasePart{"cat", &search.Location{Pos: 1}}, 246 &phrasePart{"dog", &search.Location{Pos: 2}}, 247 }, 248 phrasePath{ 249 &phrasePart{"cat", &search.Location{Pos: 8}}, 250 &phrasePart{"dog", &search.Location{Pos: 9}}, 251 }, 252 }, 253 }, 254 // match over gaps 255 { 256 phrase: [][]string{[]string{"cat"}, []string{""}, []string{"dog"}}, 257 tlm: search.TermLocationMap{ 258 "cat": search.Locations{ 259 &search.Location{ 260 Pos: 1, 261 }, 262 }, 263 "dog": search.Locations{ 264 &search.Location{ 265 Pos: 3, 266 }, 267 }, 268 }, 269 paths: []phrasePath{ 270 phrasePath{ 271 &phrasePart{"cat", &search.Location{Pos: 1}}, 272 &phrasePart{"dog", &search.Location{Pos: 3}}, 273 }, 274 }, 275 }, 276 // match with leading "" 277 { 278 phrase: [][]string{[]string{""}, []string{"cat"}, []string{"dog"}}, 279 tlm: search.TermLocationMap{ 280 "cat": search.Locations{ 281 &search.Location{ 282 Pos: 2, 283 }, 284 }, 285 "dog": search.Locations{ 286 &search.Location{ 287 Pos: 3, 288 }, 289 }, 290 }, 291 paths: []phrasePath{ 292 phrasePath{ 293 &phrasePart{"cat", &search.Location{Pos: 2}}, 294 &phrasePart{"dog", &search.Location{Pos: 3}}, 295 }, 296 }, 297 }, 298 // match with trailing "" 299 { 300 phrase: [][]string{[]string{"cat"}, []string{"dog"}, []string{""}}, 301 tlm: search.TermLocationMap{ 302 "cat": search.Locations{ 303 &search.Location{ 304 Pos: 2, 305 }, 306 }, 307 "dog": search.Locations{ 308 &search.Location{ 309 Pos: 3, 310 }, 311 }, 312 }, 313 paths: []phrasePath{ 314 phrasePath{ 315 &phrasePart{"cat", &search.Location{Pos: 2}}, 316 &phrasePart{"dog", &search.Location{Pos: 3}}, 317 }, 318 }, 319 }, 320 } 321 322 for i, test := range tests { 323 actualPaths := findPhrasePaths(0, nil, test.phrase, test.tlm, nil, 0) 324 if !reflect.DeepEqual(actualPaths, test.paths) { 325 t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i) 326 } 327 } 328} 329 330func TestFindPhrasePathsSloppy(t *testing.T) { 331 tlm := search.TermLocationMap{ 332 "one": search.Locations{ 333 &search.Location{ 334 Pos: 1, 335 }, 336 }, 337 "two": search.Locations{ 338 &search.Location{ 339 Pos: 2, 340 }, 341 }, 342 "three": search.Locations{ 343 &search.Location{ 344 Pos: 3, 345 }, 346 }, 347 "four": search.Locations{ 348 &search.Location{ 349 Pos: 4, 350 }, 351 }, 352 "five": search.Locations{ 353 &search.Location{ 354 Pos: 5, 355 }, 356 }, 357 } 358 359 tests := []struct { 360 phrase [][]string 361 paths []phrasePath 362 slop int 363 }{ 364 // no match 365 { 366 phrase: [][]string{[]string{"one"}, []string{"five"}}, 367 slop: 2, 368 }, 369 // should match 370 { 371 phrase: [][]string{[]string{"one"}, []string{"five"}}, 372 slop: 3, 373 paths: []phrasePath{ 374 phrasePath{ 375 &phrasePart{"one", &search.Location{Pos: 1}}, 376 &phrasePart{"five", &search.Location{Pos: 5}}, 377 }, 378 }, 379 }, 380 // slop 0 finds exact match 381 { 382 phrase: [][]string{[]string{"four"}, []string{"five"}}, 383 slop: 0, 384 paths: []phrasePath{ 385 phrasePath{ 386 &phrasePart{"four", &search.Location{Pos: 4}}, 387 &phrasePart{"five", &search.Location{Pos: 5}}, 388 }, 389 }, 390 }, 391 // slop 0 does not find exact match (reversed) 392 { 393 phrase: [][]string{[]string{"two"}, []string{"one"}}, 394 slop: 0, 395 }, 396 // slop 1 finds exact match 397 { 398 phrase: [][]string{[]string{"one"}, []string{"two"}}, 399 slop: 1, 400 paths: []phrasePath{ 401 phrasePath{ 402 &phrasePart{"one", &search.Location{Pos: 1}}, 403 &phrasePart{"two", &search.Location{Pos: 2}}, 404 }, 405 }, 406 }, 407 // slop 1 *still* does not find exact match (reversed) requires at least 2 408 { 409 phrase: [][]string{[]string{"two"}, []string{"one"}}, 410 slop: 1, 411 }, 412 // slop 2 does finds exact match reversed 413 { 414 phrase: [][]string{[]string{"two"}, []string{"one"}}, 415 slop: 2, 416 paths: []phrasePath{ 417 phrasePath{ 418 &phrasePart{"two", &search.Location{Pos: 2}}, 419 &phrasePart{"one", &search.Location{Pos: 1}}, 420 }, 421 }, 422 }, 423 // slop 2 not enough for this 424 { 425 phrase: [][]string{[]string{"three"}, []string{"one"}}, 426 slop: 2, 427 }, 428 // slop should be cumulative 429 { 430 phrase: [][]string{[]string{"one"}, []string{"three"}, []string{"five"}}, 431 slop: 2, 432 paths: []phrasePath{ 433 phrasePath{ 434 &phrasePart{"one", &search.Location{Pos: 1}}, 435 &phrasePart{"three", &search.Location{Pos: 3}}, 436 &phrasePart{"five", &search.Location{Pos: 5}}, 437 }, 438 }, 439 }, 440 // should require 6 441 { 442 phrase: [][]string{[]string{"five"}, []string{"three"}, []string{"one"}}, 443 slop: 5, 444 }, 445 // so lets try 6 446 { 447 phrase: [][]string{[]string{"five"}, []string{"three"}, []string{"one"}}, 448 slop: 6, 449 paths: []phrasePath{ 450 phrasePath{ 451 &phrasePart{"five", &search.Location{Pos: 5}}, 452 &phrasePart{"three", &search.Location{Pos: 3}}, 453 &phrasePart{"one", &search.Location{Pos: 1}}, 454 }, 455 }, 456 }, 457 } 458 459 for i, test := range tests { 460 actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, test.slop) 461 if !reflect.DeepEqual(actualPaths, test.paths) { 462 t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i) 463 } 464 } 465} 466 467func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) { 468 tlm := search.TermLocationMap{ 469 "one": search.Locations{ 470 &search.Location{ 471 Pos: 1, 472 }, 473 &search.Location{ 474 Pos: 5, 475 }, 476 }, 477 "two": search.Locations{ 478 &search.Location{ 479 Pos: 2, 480 }, 481 &search.Location{ 482 Pos: 4, 483 }, 484 }, 485 "three": search.Locations{ 486 &search.Location{ 487 Pos: 3, 488 }, 489 }, 490 } 491 492 tests := []struct { 493 phrase [][]string 494 paths []phrasePath 495 slop int 496 }{ 497 // search non palyndrone, exact match 498 { 499 phrase: [][]string{[]string{"two"}, []string{"three"}}, 500 slop: 0, 501 paths: []phrasePath{ 502 phrasePath{ 503 &phrasePart{"two", &search.Location{Pos: 2}}, 504 &phrasePart{"three", &search.Location{Pos: 3}}, 505 }, 506 }, 507 }, 508 // same with slop 2 (not required) (find it twice) 509 { 510 phrase: [][]string{[]string{"two"}, []string{"three"}}, 511 slop: 2, 512 paths: []phrasePath{ 513 phrasePath{ 514 &phrasePart{"two", &search.Location{Pos: 2}}, 515 &phrasePart{"three", &search.Location{Pos: 3}}, 516 }, 517 phrasePath{ 518 &phrasePart{"two", &search.Location{Pos: 4}}, 519 &phrasePart{"three", &search.Location{Pos: 3}}, 520 }, 521 }, 522 }, 523 // palyndrone reversed 524 { 525 phrase: [][]string{[]string{"three"}, []string{"two"}}, 526 slop: 2, 527 paths: []phrasePath{ 528 phrasePath{ 529 &phrasePart{"three", &search.Location{Pos: 3}}, 530 &phrasePart{"two", &search.Location{Pos: 2}}, 531 }, 532 phrasePath{ 533 &phrasePart{"three", &search.Location{Pos: 3}}, 534 &phrasePart{"two", &search.Location{Pos: 4}}, 535 }, 536 }, 537 }, 538 } 539 540 for i, test := range tests { 541 actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, test.slop) 542 if !reflect.DeepEqual(actualPaths, test.paths) { 543 t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i) 544 } 545 } 546} 547 548func TestFindMultiPhrasePaths(t *testing.T) { 549 550 tlm := search.TermLocationMap{ 551 "cat": search.Locations{ 552 &search.Location{ 553 Pos: 1, 554 }, 555 }, 556 "dog": search.Locations{ 557 &search.Location{ 558 Pos: 2, 559 }, 560 }, 561 "frog": search.Locations{ 562 &search.Location{ 563 Pos: 3, 564 }, 565 }, 566 } 567 568 tests := []struct { 569 phrase [][]string 570 paths []phrasePath 571 }{ 572 // simplest, one of two possible terms matches 573 { 574 phrase: [][]string{[]string{"cat", "rat"}, []string{"dog"}}, 575 paths: []phrasePath{ 576 phrasePath{ 577 &phrasePart{"cat", &search.Location{Pos: 1}}, 578 &phrasePart{"dog", &search.Location{Pos: 2}}, 579 }, 580 }, 581 }, 582 // two possible terms, neither work 583 { 584 phrase: [][]string{[]string{"cat", "rat"}, []string{"chicken"}}, 585 }, 586 // two possible terms, one works, but out of position with next 587 { 588 phrase: [][]string{[]string{"cat", "rat"}, []string{"frog"}}, 589 }, 590 // matches multiple times, with different pairing 591 { 592 phrase: [][]string{[]string{"cat", "dog"}, []string{"dog", "frog"}}, 593 paths: []phrasePath{ 594 phrasePath{ 595 &phrasePart{"cat", &search.Location{Pos: 1}}, 596 &phrasePart{"dog", &search.Location{Pos: 2}}, 597 }, 598 phrasePath{ 599 &phrasePart{"dog", &search.Location{Pos: 2}}, 600 &phrasePart{"frog", &search.Location{Pos: 3}}, 601 }, 602 }, 603 }, 604 // multi-match over a gap 605 { 606 phrase: [][]string{[]string{"cat", "rat"}, []string{""}, []string{"frog"}}, 607 paths: []phrasePath{ 608 phrasePath{ 609 &phrasePart{"cat", &search.Location{Pos: 1}}, 610 &phrasePart{"frog", &search.Location{Pos: 3}}, 611 }, 612 }, 613 }, 614 // multi-match over a gap (same as before, but with empty term list) 615 { 616 phrase: [][]string{[]string{"cat", "rat"}, []string{}, []string{"frog"}}, 617 paths: []phrasePath{ 618 phrasePath{ 619 &phrasePart{"cat", &search.Location{Pos: 1}}, 620 &phrasePart{"frog", &search.Location{Pos: 3}}, 621 }, 622 }, 623 }, 624 // multi-match over a gap (same once again, but nil term list) 625 { 626 phrase: [][]string{[]string{"cat", "rat"}, nil, []string{"frog"}}, 627 paths: []phrasePath{ 628 phrasePath{ 629 &phrasePart{"cat", &search.Location{Pos: 1}}, 630 &phrasePart{"frog", &search.Location{Pos: 3}}, 631 }, 632 }, 633 }, 634 } 635 636 for i, test := range tests { 637 actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, 0) 638 if !reflect.DeepEqual(actualPaths, test.paths) { 639 t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i) 640 } 641 } 642} 643