1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package scorch 16 17import ( 18 "reflect" 19 "testing" 20 21 "github.com/blevesearch/bleve/document" 22 "github.com/blevesearch/bleve/index" 23) 24 25func TestIndexReader(t *testing.T) { 26 defer func() { 27 err := DestroyTest() 28 if err != nil { 29 t.Fatal(err) 30 } 31 }() 32 33 analysisQueue := index.NewAnalysisQueue(1) 34 idx, err := NewScorch(Name, testConfig, analysisQueue) 35 if err != nil { 36 t.Fatal(err) 37 } 38 err = idx.Open() 39 if err != nil { 40 t.Fatalf("error opening index: %v", err) 41 } 42 defer func() { 43 err := idx.Close() 44 if err != nil { 45 t.Fatal(err) 46 } 47 }() 48 49 var expectedCount uint64 50 doc := document.NewDocument("1") 51 doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) 52 err = idx.Update(doc) 53 if err != nil { 54 t.Errorf("Error updating index: %v", err) 55 } 56 expectedCount++ 57 58 doc = document.NewDocument("2") 59 doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer)) 60 doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer)) 61 err = idx.Update(doc) 62 if err != nil { 63 t.Errorf("Error updating index: %v", err) 64 } 65 expectedCount++ 66 67 indexReader, err := idx.Reader() 68 if err != nil { 69 t.Error(err) 70 } 71 defer func() { 72 err := indexReader.Close() 73 if err != nil { 74 t.Fatal(err) 75 } 76 }() 77 78 // first look for a term that doesn't exist 79 reader, err := indexReader.TermFieldReader([]byte("nope"), "name", true, true, true) 80 if err != nil { 81 t.Errorf("Error accessing term field reader: %v", err) 82 } 83 count := reader.Count() 84 if count != 0 { 85 t.Errorf("Expected doc count to be: %d got: %d", 0, count) 86 } 87 err = reader.Close() 88 if err != nil { 89 t.Fatal(err) 90 } 91 92 reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true) 93 if err != nil { 94 t.Errorf("Error accessing term field reader: %v", err) 95 } 96 97 expectedCount = 2 98 count = reader.Count() 99 if count != expectedCount { 100 t.Errorf("Exptected doc count to be: %d got: %d", expectedCount, count) 101 } 102 103 var match *index.TermFieldDoc 104 var actualCount uint64 105 match, err = reader.Next(nil) 106 for err == nil && match != nil { 107 match, err = reader.Next(nil) 108 if err != nil { 109 t.Errorf("unexpected error reading next") 110 } 111 actualCount++ 112 } 113 if actualCount != count { 114 t.Errorf("count was 2, but only saw %d", actualCount) 115 } 116 117 internalIDBogus, err := indexReader.InternalID("a-bogus-docId") 118 if err != nil { 119 t.Fatal(err) 120 } 121 if internalIDBogus != nil { 122 t.Errorf("expected bogus docId to have nil InternalID") 123 } 124 125 internalID2, err := indexReader.InternalID("2") 126 if err != nil { 127 t.Fatal(err) 128 } 129 expectedMatch := &index.TermFieldDoc{ 130 ID: internalID2, 131 Freq: 1, 132 Norm: 0.5773502588272095, 133 Vectors: []*index.TermFieldVector{ 134 { 135 Field: "desc", 136 Pos: 3, 137 Start: 9, 138 End: 13, 139 }, 140 }, 141 } 142 tfr, err := indexReader.TermFieldReader([]byte("rice"), "desc", true, true, true) 143 if err != nil { 144 t.Errorf("unexpected error: %v", err) 145 } 146 match, err = tfr.Next(nil) 147 if err != nil { 148 t.Errorf("unexpected error: %v", err) 149 } 150 if !reflect.DeepEqual(expectedMatch, match) { 151 t.Errorf("got %#v, expected %#v", match, expectedMatch) 152 } 153 err = reader.Close() 154 if err != nil { 155 t.Fatal(err) 156 } 157 158 // now test usage of advance 159 reader, err = indexReader.TermFieldReader([]byte("test"), "name", true, true, true) 160 if err != nil { 161 t.Errorf("Error accessing term field reader: %v", err) 162 } 163 164 match, err = reader.Advance(internalID2, nil) 165 if err != nil { 166 t.Errorf("unexpected error: %v", err) 167 } 168 if match == nil { 169 t.Fatalf("Expected match, got nil") 170 } 171 if !match.ID.Equals(internalID2) { 172 t.Errorf("Expected ID '2', got '%s'", match.ID) 173 } 174 // NOTE: no point in changing this to internal id 3, there is no id 3 175 // the test is looking for something that doens't exist and this doesn't 176 match, err = reader.Advance(index.IndexInternalID("3"), nil) 177 if err != nil { 178 t.Errorf("unexpected error: %v", err) 179 } 180 if match != nil { 181 t.Errorf("expected nil, got %v", match) 182 } 183 err = reader.Close() 184 if err != nil { 185 t.Fatal(err) 186 } 187 188 // now test creating a reader for a field that doesn't exist 189 reader, err = indexReader.TermFieldReader([]byte("water"), "doesnotexist", true, true, true) 190 if err != nil { 191 t.Errorf("Error accessing term field reader: %v", err) 192 } 193 count = reader.Count() 194 if count != 0 { 195 t.Errorf("expected count 0 for reader of non-existent field") 196 } 197 match, err = reader.Next(nil) 198 if err != nil { 199 t.Errorf("unexpected error: %v", err) 200 } 201 if match != nil { 202 t.Errorf("expected nil, got %v", match) 203 } 204 match, err = reader.Advance(index.IndexInternalID("anywhere"), nil) 205 if err != nil { 206 t.Errorf("unexpected error: %v", err) 207 } 208 if match != nil { 209 t.Errorf("expected nil, got %v", match) 210 } 211 212} 213 214func TestIndexDocIdReader(t *testing.T) { 215 defer func() { 216 err := DestroyTest() 217 if err != nil { 218 t.Fatal(err) 219 } 220 }() 221 222 analysisQueue := index.NewAnalysisQueue(1) 223 idx, err := NewScorch(Name, testConfig, analysisQueue) 224 if err != nil { 225 t.Fatal(err) 226 } 227 err = idx.Open() 228 if err != nil { 229 t.Fatalf("error opening index: %v", err) 230 } 231 defer func() { 232 err := idx.Close() 233 if err != nil { 234 t.Fatal(err) 235 } 236 }() 237 238 var expectedCount uint64 239 doc := document.NewDocument("1") 240 doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) 241 err = idx.Update(doc) 242 if err != nil { 243 t.Errorf("Error updating index: %v", err) 244 } 245 expectedCount++ 246 247 doc = document.NewDocument("2") 248 doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test"))) 249 doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors)) 250 err = idx.Update(doc) 251 if err != nil { 252 t.Errorf("Error updating index: %v", err) 253 } 254 expectedCount++ 255 256 indexReader, err := idx.Reader() 257 if err != nil { 258 t.Error(err) 259 } 260 defer func() { 261 err := indexReader.Close() 262 if err != nil { 263 t.Error(err) 264 } 265 }() 266 267 // first get all doc ids 268 reader, err := indexReader.DocIDReaderAll() 269 if err != nil { 270 t.Errorf("Error accessing doc id reader: %v", err) 271 } 272 defer func() { 273 err := reader.Close() 274 if err != nil { 275 t.Fatal(err) 276 } 277 }() 278 279 id, err := reader.Next() 280 count := uint64(0) 281 for id != nil { 282 count++ 283 id, err = reader.Next() 284 } 285 if count != expectedCount { 286 t.Errorf("expected %d, got %d", expectedCount, count) 287 } 288 289 // try it again, but jump to the second doc this time 290 reader2, err := indexReader.DocIDReaderAll() 291 if err != nil { 292 t.Errorf("Error accessing doc id reader: %v", err) 293 } 294 defer func() { 295 err := reader2.Close() 296 if err != nil { 297 t.Error(err) 298 } 299 }() 300 301 internalID2, err := indexReader.InternalID("2") 302 if err != nil { 303 t.Fatal(err) 304 } 305 306 id, err = reader2.Advance(internalID2) 307 if err != nil { 308 t.Error(err) 309 } 310 if !id.Equals(internalID2) { 311 t.Errorf("expected to find id '2', got '%s'", id) 312 } 313 314 // again 3 doesn't exist cannot use internal id for 3 as there is none 315 // the important aspect is that this id doesn't exist, so its ok 316 id, err = reader2.Advance(index.IndexInternalID("3")) 317 if err != nil { 318 t.Error(err) 319 } 320 if id != nil { 321 t.Errorf("expected to find id '', got '%s'", id) 322 } 323} 324 325func TestIndexDocIdOnlyReader(t *testing.T) { 326 defer func() { 327 err := DestroyTest() 328 if err != nil { 329 t.Fatal(err) 330 } 331 }() 332 333 analysisQueue := index.NewAnalysisQueue(1) 334 idx, err := NewScorch(Name, testConfig, analysisQueue) 335 if err != nil { 336 t.Fatal(err) 337 } 338 err = idx.Open() 339 if err != nil { 340 t.Fatalf("error opening index: %v", err) 341 } 342 defer func() { 343 err := idx.Close() 344 if err != nil { 345 t.Fatal(err) 346 } 347 }() 348 349 doc := document.NewDocument("1") 350 err = idx.Update(doc) 351 if err != nil { 352 t.Errorf("Error updating index: %v", err) 353 } 354 355 doc = document.NewDocument("3") 356 err = idx.Update(doc) 357 if err != nil { 358 t.Errorf("Error updating index: %v", err) 359 } 360 361 doc = document.NewDocument("5") 362 err = idx.Update(doc) 363 if err != nil { 364 t.Errorf("Error updating index: %v", err) 365 } 366 367 doc = document.NewDocument("7") 368 err = idx.Update(doc) 369 if err != nil { 370 t.Errorf("Error updating index: %v", err) 371 } 372 373 doc = document.NewDocument("9") 374 err = idx.Update(doc) 375 if err != nil { 376 t.Errorf("Error updating index: %v", err) 377 } 378 379 indexReader, err := idx.Reader() 380 if err != nil { 381 t.Error(err) 382 } 383 defer func() { 384 err := indexReader.Close() 385 if err != nil { 386 t.Error(err) 387 } 388 }() 389 390 onlyIds := []string{"1", "5", "9"} 391 reader, err := indexReader.DocIDReaderOnly(onlyIds) 392 if err != nil { 393 t.Errorf("Error accessing doc id reader: %v", err) 394 } 395 defer func() { 396 err := reader.Close() 397 if err != nil { 398 t.Fatal(err) 399 } 400 }() 401 402 id, err := reader.Next() 403 count := uint64(0) 404 for id != nil { 405 count++ 406 id, err = reader.Next() 407 if err != nil { 408 t.Fatal(err) 409 } 410 } 411 if count != 3 { 412 t.Errorf("expected 3, got %d", count) 413 } 414 415 // commented out because advance works with internal ids 416 // this test presumes we see items in external doc id order 417 // which is no longer the case, so simply converting external ids 418 // to internal ones is not logically correct 419 // not removing though because we need some way to test Advance() 420 421 // // try it again, but jump 422 // reader2, err := indexReader.DocIDReaderOnly(onlyIds) 423 // if err != nil { 424 // t.Errorf("Error accessing doc id reader: %v", err) 425 // } 426 // defer func() { 427 // err := reader2.Close() 428 // if err != nil { 429 // t.Error(err) 430 // } 431 // }() 432 // 433 // id, err = reader2.Advance(index.IndexInternalID("5")) 434 // if err != nil { 435 // t.Error(err) 436 // } 437 // if !id.Equals(index.IndexInternalID("5")) { 438 // t.Errorf("expected to find id '5', got '%s'", id) 439 // } 440 // 441 // id, err = reader2.Advance(index.IndexInternalID("a")) 442 // if err != nil { 443 // t.Error(err) 444 // } 445 // if id != nil { 446 // t.Errorf("expected to find id '', got '%s'", id) 447 // } 448 449 // some keys aren't actually there 450 onlyIds = []string{"0", "2", "4", "5", "6", "8", "a"} 451 reader3, err := indexReader.DocIDReaderOnly(onlyIds) 452 if err != nil { 453 t.Errorf("Error accessing doc id reader: %v", err) 454 } 455 defer func() { 456 err := reader3.Close() 457 if err != nil { 458 t.Error(err) 459 } 460 }() 461 462 id, err = reader3.Next() 463 count = uint64(0) 464 for id != nil { 465 count++ 466 id, err = reader3.Next() 467 } 468 if count != 1 { 469 t.Errorf("expected 1, got %d", count) 470 } 471 472 // commented out because advance works with internal ids 473 // this test presumes we see items in external doc id order 474 // which is no longer the case, so simply converting external ids 475 // to internal ones is not logically correct 476 // not removing though because we need some way to test Advance() 477 478 // // mix advance and next 479 // onlyIds = []string{"0", "1", "3", "5", "6", "9"} 480 // reader4, err := indexReader.DocIDReaderOnly(onlyIds) 481 // if err != nil { 482 // t.Errorf("Error accessing doc id reader: %v", err) 483 // } 484 // defer func() { 485 // err := reader4.Close() 486 // if err != nil { 487 // t.Error(err) 488 // } 489 // }() 490 // 491 // // first key is "1" 492 // id, err = reader4.Next() 493 // if err != nil { 494 // t.Error(err) 495 // } 496 // if !id.Equals(index.IndexInternalID("1")) { 497 // t.Errorf("expected to find id '1', got '%s'", id) 498 // } 499 // 500 // // advancing to key we dont have gives next 501 // id, err = reader4.Advance(index.IndexInternalID("2")) 502 // if err != nil { 503 // t.Error(err) 504 // } 505 // if !id.Equals(index.IndexInternalID("3")) { 506 // t.Errorf("expected to find id '3', got '%s'", id) 507 // } 508 // 509 // // next after advance works 510 // id, err = reader4.Next() 511 // if err != nil { 512 // t.Error(err) 513 // } 514 // if !id.Equals(index.IndexInternalID("5")) { 515 // t.Errorf("expected to find id '5', got '%s'", id) 516 // } 517 // 518 // // advancing to key we do have works 519 // id, err = reader4.Advance(index.IndexInternalID("9")) 520 // if err != nil { 521 // t.Error(err) 522 // } 523 // if !id.Equals(index.IndexInternalID("9")) { 524 // t.Errorf("expected to find id '9', got '%s'", id) 525 // } 526 // 527 // // advance backwards at end 528 // id, err = reader4.Advance(index.IndexInternalID("4")) 529 // if err != nil { 530 // t.Error(err) 531 // } 532 // if !id.Equals(index.IndexInternalID("5")) { 533 // t.Errorf("expected to find id '5', got '%s'", id) 534 // } 535 // 536 // // next after advance works 537 // id, err = reader4.Next() 538 // if err != nil { 539 // t.Error(err) 540 // } 541 // if !id.Equals(index.IndexInternalID("9")) { 542 // t.Errorf("expected to find id '9', got '%s'", id) 543 // } 544 // 545 // // advance backwards to key that exists, but not in only set 546 // id, err = reader4.Advance(index.IndexInternalID("7")) 547 // if err != nil { 548 // t.Error(err) 549 // } 550 // if !id.Equals(index.IndexInternalID("9")) { 551 // t.Errorf("expected to find id '9', got '%s'", id) 552 // } 553 554} 555 556func TestSegmentIndexAndLocalDocNumFromGlobal(t *testing.T) { 557 tests := []struct { 558 offsets []uint64 559 globalDocNum uint64 560 segmentIndex int 561 localDocNum uint64 562 }{ 563 // just 1 segment 564 { 565 offsets: []uint64{0}, 566 globalDocNum: 0, 567 segmentIndex: 0, 568 localDocNum: 0, 569 }, 570 { 571 offsets: []uint64{0}, 572 globalDocNum: 1, 573 segmentIndex: 0, 574 localDocNum: 1, 575 }, 576 { 577 offsets: []uint64{0}, 578 globalDocNum: 25, 579 segmentIndex: 0, 580 localDocNum: 25, 581 }, 582 // now 2 segments, 30 docs in first 583 { 584 offsets: []uint64{0, 30}, 585 globalDocNum: 0, 586 segmentIndex: 0, 587 localDocNum: 0, 588 }, 589 { 590 offsets: []uint64{0, 30}, 591 globalDocNum: 1, 592 segmentIndex: 0, 593 localDocNum: 1, 594 }, 595 { 596 offsets: []uint64{0, 30}, 597 globalDocNum: 25, 598 segmentIndex: 0, 599 localDocNum: 25, 600 }, 601 { 602 offsets: []uint64{0, 30}, 603 globalDocNum: 30, 604 segmentIndex: 1, 605 localDocNum: 0, 606 }, 607 { 608 offsets: []uint64{0, 30}, 609 globalDocNum: 35, 610 segmentIndex: 1, 611 localDocNum: 5, 612 }, 613 // lots of segments 614 { 615 offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000}, 616 globalDocNum: 0, 617 segmentIndex: 0, 618 localDocNum: 0, 619 }, 620 { 621 offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000}, 622 globalDocNum: 25, 623 segmentIndex: 0, 624 localDocNum: 25, 625 }, 626 { 627 offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000}, 628 globalDocNum: 35, 629 segmentIndex: 1, 630 localDocNum: 5, 631 }, 632 { 633 offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000}, 634 globalDocNum: 100, 635 segmentIndex: 4, 636 localDocNum: 1, 637 }, 638 { 639 offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000}, 640 globalDocNum: 825, 641 segmentIndex: 6, 642 localDocNum: 25, 643 }, 644 } 645 646 for _, test := range tests { 647 i := &IndexSnapshot{ 648 offsets: test.offsets, 649 refs: 1, 650 } 651 gotSegmentIndex, gotLocalDocNum := i.segmentIndexAndLocalDocNumFromGlobal(test.globalDocNum) 652 if gotSegmentIndex != test.segmentIndex { 653 t.Errorf("got segment index %d expected %d for offsets %v globalDocNum %d", gotSegmentIndex, test.segmentIndex, test.offsets, test.globalDocNum) 654 } 655 if gotLocalDocNum != test.localDocNum { 656 t.Errorf("got localDocNum %d expected %d for offsets %v globalDocNum %d", gotLocalDocNum, test.localDocNum, test.offsets, test.globalDocNum) 657 } 658 err := i.DecRef() 659 if err != nil { 660 t.Errorf("expected no err, got: %v", err) 661 } 662 } 663} 664