1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package vellum 16 17import ( 18 "bytes" 19 "reflect" 20 "testing" 21 22 "github.com/couchbase/vellum/levenshtein" 23 "github.com/couchbase/vellum/regexp" 24) 25 26func TestIterator(t *testing.T) { 27 var buf bytes.Buffer 28 b, err := New(&buf, nil) 29 if err != nil { 30 t.Fatalf("error creating builder: %v", err) 31 } 32 33 err = insertStringMap(b, smallSample) 34 if err != nil { 35 t.Fatalf("error building: %v", err) 36 } 37 38 err = b.Close() 39 if err != nil { 40 t.Fatalf("error closing: %v", err) 41 } 42 43 fst, err := Load(buf.Bytes()) 44 if err != nil { 45 t.Fatalf("error loading set: %v", err) 46 } 47 48 got := map[string]uint64{} 49 itr, err := fst.Iterator(nil, nil) 50 for err == nil { 51 key, val := itr.Current() 52 got[string(key)] = val 53 err = itr.Next() 54 } 55 if err != ErrIteratorDone { 56 t.Errorf("iterator error: %v", err) 57 } 58 if !reflect.DeepEqual(smallSample, got) { 59 t.Errorf("expected %v, got: %v", smallSample, got) 60 } 61} 62 63func TestIteratorReset(t *testing.T) { 64 var buf bytes.Buffer 65 b, err := New(&buf, nil) 66 if err != nil { 67 t.Fatalf("error creating builder: %v", err) 68 } 69 70 err = insertStringMap(b, smallSample) 71 if err != nil { 72 t.Fatalf("error building: %v", err) 73 } 74 75 err = b.Close() 76 if err != nil { 77 t.Fatalf("error closing: %v", err) 78 } 79 80 fst, err := Load(buf.Bytes()) 81 if err != nil { 82 t.Fatalf("error loading set: %v", err) 83 } 84 85 itr, err := fst.Iterator(nil, nil) 86 if err != nil { 87 t.Fatalf("error creating an iterator: %v", err) 88 } 89 90 buf.Reset() 91 b, err = New(&buf, nil) 92 if err != nil { 93 t.Fatalf("error creating builder: %v", err) 94 } 95 96 smallSample2 := map[string]uint64{ 97 "bold": 25, 98 "last": 1, 99 "next": 500, 100 "tank": 0, 101 } 102 err = insertStringMap(b, smallSample2) 103 if err != nil { 104 t.Fatalf("error building: %v", err) 105 } 106 107 err = b.Close() 108 if err != nil { 109 t.Fatalf("error closing: %v", err) 110 } 111 112 fst, err = Load(buf.Bytes()) 113 if err != nil { 114 t.Fatalf("error loading set: %v", err) 115 } 116 117 got := map[string]uint64{} 118 err = itr.Reset(fst, nil, nil, nil) 119 for err == nil { 120 key, val := itr.Current() 121 got[string(key)] = val 122 err = itr.Next() 123 } 124 if err != ErrIteratorDone { 125 t.Errorf("iterator error: %v", err) 126 } 127 if !reflect.DeepEqual(smallSample2, got) { 128 t.Errorf("expected %v, got: %v", smallSample2, got) 129 } 130 131} 132 133func TestIteratorStartKey(t *testing.T) { 134 var buf bytes.Buffer 135 b, err := New(&buf, nil) 136 if err != nil { 137 t.Fatalf("error creating builder: %v", err) 138 } 139 140 err = insertStringMap(b, smallSample) 141 if err != nil { 142 t.Fatalf("error building: %v", err) 143 } 144 145 err = b.Close() 146 if err != nil { 147 t.Fatalf("error closing: %v", err) 148 } 149 150 fst, err := Load(buf.Bytes()) 151 if err != nil { 152 t.Fatalf("error loading set: %v", err) 153 } 154 155 // with start key < "mon", we should still get it 156 got := map[string]uint64{} 157 itr, err := fst.Iterator([]byte("a"), nil) 158 for err == nil { 159 key, val := itr.Current() 160 got[string(key)] = val 161 err = itr.Next() 162 } 163 if err != ErrIteratorDone { 164 t.Errorf("iterator error: %v", err) 165 } 166 if !reflect.DeepEqual(smallSample, got) { 167 t.Errorf("expected %v, got: %v", smallSample, got) 168 } 169 170 // with start key = "mon", we should still get it 171 got = map[string]uint64{} 172 itr, err = fst.Iterator([]byte("mon"), nil) 173 for err == nil { 174 key, val := itr.Current() 175 got[string(key)] = val 176 err = itr.Next() 177 } 178 if err != ErrIteratorDone { 179 t.Errorf("iterator error: %v", err) 180 } 181 if !reflect.DeepEqual(smallSample, got) { 182 t.Errorf("expected %v, got: %v", smallSample, got) 183 } 184 185 // with start key > "mon", we don't expect to get it 186 expect := map[string]uint64{ 187 "tues": smallSample["tues"], 188 "thurs": smallSample["thurs"], 189 "tye": smallSample["tye"], 190 } 191 got = map[string]uint64{} 192 itr, err = fst.Iterator([]byte("mona"), nil) 193 for err == nil { 194 key, val := itr.Current() 195 got[string(key)] = val 196 err = itr.Next() 197 } 198 if err != ErrIteratorDone { 199 t.Errorf("iterator error: %v", err) 200 } 201 if !reflect.DeepEqual(expect, got) { 202 t.Errorf("expected %v, got: %v", expect, got) 203 } 204 205 // with start key > "mon", we don't expect to get it 206 expect = map[string]uint64{ 207 "tues": smallSample["tues"], 208 "thurs": smallSample["thurs"], 209 "tye": smallSample["tye"], 210 } 211 got = map[string]uint64{} 212 itr, err = fst.Iterator([]byte("my"), nil) 213 for err == nil { 214 key, val := itr.Current() 215 got[string(key)] = val 216 err = itr.Next() 217 } 218 if err != ErrIteratorDone { 219 t.Errorf("iterator error: %v", err) 220 } 221 if !reflect.DeepEqual(expect, got) { 222 t.Errorf("expected %v, got: %v", expect, got) 223 } 224} 225 226func TestIteratorEndKey(t *testing.T) { 227 var buf bytes.Buffer 228 b, err := New(&buf, nil) 229 if err != nil { 230 t.Fatalf("error creating builder: %v", err) 231 } 232 233 err = insertStringMap(b, smallSample) 234 if err != nil { 235 t.Fatalf("error building: %v", err) 236 } 237 238 err = b.Close() 239 if err != nil { 240 t.Fatalf("error closing: %v", err) 241 } 242 243 fst, err := Load(buf.Bytes()) 244 if err != nil { 245 t.Fatalf("error loading set: %v", err) 246 } 247 248 // with end key > "tye", we should still get it 249 got := map[string]uint64{} 250 itr, err := fst.Iterator(nil, []byte("zeus")) 251 for err == nil { 252 key, val := itr.Current() 253 got[string(key)] = val 254 err = itr.Next() 255 } 256 if err != ErrIteratorDone { 257 t.Errorf("iterator error: %v", err) 258 } 259 if !reflect.DeepEqual(smallSample, got) { 260 t.Errorf("expected %v, got: %v", smallSample, got) 261 } 262 263 // with end key = "tye", we should NOT get it (end key exclusive) 264 expect := map[string]uint64{ 265 "mon": smallSample["mon"], 266 "tues": smallSample["tues"], 267 "thurs": smallSample["thurs"], 268 } 269 got = map[string]uint64{} 270 itr, err = fst.Iterator(nil, []byte("tye")) 271 for err == nil { 272 key, val := itr.Current() 273 got[string(key)] = val 274 err = itr.Next() 275 } 276 if err != ErrIteratorDone { 277 t.Errorf("iterator error: %v", err) 278 } 279 if !reflect.DeepEqual(expect, got) { 280 t.Errorf("expected %v, got: %v", expect, got) 281 } 282 283 // with start key < "tye", we don't expect to get it 284 got = map[string]uint64{} 285 itr, err = fst.Iterator(nil, []byte("tv")) 286 for err == nil { 287 key, val := itr.Current() 288 got[string(key)] = val 289 err = itr.Next() 290 } 291 if err != ErrIteratorDone { 292 t.Errorf("iterator error: %v", err) 293 } 294 if !reflect.DeepEqual(expect, got) { 295 t.Errorf("expected %v, got: %v", expect, got) 296 } 297} 298 299func TestIteratorSeek(t *testing.T) { 300 var buf bytes.Buffer 301 b, err := New(&buf, nil) 302 if err != nil { 303 t.Fatalf("error creating builder: %v", err) 304 } 305 306 err = insertStringMap(b, smallSample) 307 if err != nil { 308 t.Fatalf("error building: %v", err) 309 } 310 311 err = b.Close() 312 if err != nil { 313 t.Fatalf("error closing: %v", err) 314 } 315 316 fst, err := Load(buf.Bytes()) 317 if err != nil { 318 t.Fatalf("error loading set: %v", err) 319 } 320 321 // seek past thurs (exactly to tues) 322 expect := map[string]uint64{ 323 "mon": smallSample["mon"], 324 "tues": smallSample["tues"], 325 "tye": smallSample["tye"], 326 } 327 got := map[string]uint64{} 328 itr, err := fst.Iterator(nil, nil) 329 for err == nil { 330 key, val := itr.Current() 331 got[string(key)] = val 332 333 if string(key) == "mon" { 334 err = itr.Seek([]byte("tue")) 335 } else { 336 err = itr.Next() 337 } 338 } 339 if err != ErrIteratorDone { 340 t.Errorf("iterator error: %v", err) 341 } 342 if !reflect.DeepEqual(expect, got) { 343 t.Errorf("expected %v, got: %v", expect, got) 344 } 345 346 // similar but seek to something after thurs before tues 347 got = map[string]uint64{} 348 itr, err = fst.Iterator(nil, nil) 349 for err == nil { 350 key, val := itr.Current() 351 got[string(key)] = val 352 353 if string(key) == "mon" { 354 err = itr.Seek([]byte("thv")) 355 } else { 356 err = itr.Next() 357 } 358 } 359 if err != ErrIteratorDone { 360 t.Errorf("iterator error: %v", err) 361 } 362 if !reflect.DeepEqual(expect, got) { 363 t.Errorf("expected %v, got: %v", expect, got) 364 } 365 366 // similar but seek to thurs+suffix 367 got = map[string]uint64{} 368 itr, err = fst.Iterator(nil, nil) 369 for err == nil { 370 key, val := itr.Current() 371 got[string(key)] = val 372 373 if string(key) == "mon" { 374 err = itr.Seek([]byte("thursday")) 375 } else { 376 err = itr.Next() 377 } 378 } 379 if err != ErrIteratorDone { 380 t.Errorf("iterator error: %v", err) 381 } 382 if !reflect.DeepEqual(expect, got) { 383 t.Errorf("expected %v, got: %v", expect, got) 384 } 385 386 // seek past last key (still inside iterator boundaries) 387 expect = map[string]uint64{ 388 "mon": smallSample["mon"], 389 } 390 got = map[string]uint64{} 391 itr, err = fst.Iterator(nil, nil) 392 for err == nil { 393 key, val := itr.Current() 394 got[string(key)] = val 395 396 if string(key) == "mon" { 397 err = itr.Seek([]byte("zzz")) 398 } else { 399 err = itr.Next() 400 } 401 } 402 if err != ErrIteratorDone { 403 t.Errorf("iterator error: %v", err) 404 } 405 if !reflect.DeepEqual(expect, got) { 406 t.Errorf("expected %v, got: %v", expect, got) 407 } 408} 409 410func TestIteratorSeekOutsideBoundaries(t *testing.T) { 411 var buf bytes.Buffer 412 b, err := New(&buf, nil) 413 if err != nil { 414 t.Fatalf("error creating builder: %v", err) 415 } 416 417 err = insertStringMap(b, smallSample) 418 if err != nil { 419 t.Fatalf("error building: %v", err) 420 } 421 422 err = b.Close() 423 if err != nil { 424 t.Fatalf("error closing: %v", err) 425 } 426 427 fst, err := Load(buf.Bytes()) 428 if err != nil { 429 t.Fatalf("error loading set: %v", err) 430 } 431 432 // first test with boundaries should just see thurs/tues 433 expect := map[string]uint64{ 434 "thurs": smallSample["thurs"], 435 "tues": smallSample["tues"], 436 } 437 got := map[string]uint64{} 438 itr, err := fst.Iterator([]byte("th"), []byte("tuesd")) 439 for err == nil { 440 key, val := itr.Current() 441 got[string(key)] = val 442 err = itr.Next() 443 } 444 if err != ErrIteratorDone { 445 t.Errorf("iterator error: %v", err) 446 } 447 if !reflect.DeepEqual(expect, got) { 448 t.Errorf("expected %v, got: %v", expect, got) 449 } 450 451 // this time try to seek before the start, 452 // still shouldn't see mon 453 got = map[string]uint64{} 454 itr, err = fst.Iterator([]byte("th"), []byte("tuesd")) 455 if err != nil { 456 t.Fatalf("error before seeking: %v", err) 457 } 458 err = itr.Seek([]byte("cat")) 459 for err == nil { 460 key, val := itr.Current() 461 got[string(key)] = val 462 err = itr.Next() 463 } 464 if err != ErrIteratorDone { 465 t.Errorf("iterator error: %v", err) 466 } 467 if !reflect.DeepEqual(expect, got) { 468 t.Errorf("expected %v, got: %v", expect, got) 469 } 470 471 // this time try to seek past the end 472 // should see nothing 473 474 itr, err = fst.Iterator([]byte("th"), []byte("tuesd")) 475 if err != nil { 476 t.Fatalf("error before seeking: %v", err) 477 } 478 err = itr.Seek([]byte("ty")) 479 if err != ErrIteratorDone { 480 t.Fatalf("expected ErrIteratorDone, got %v", err) 481 } 482} 483 484var key []byte 485var val uint64 486 487func BenchmarkFSTIteratorAllInMem(b *testing.B) { 488 // first build the FST once 489 dataset := thousandTestWords 490 randomThousandVals := randomValues(dataset) 491 var buf bytes.Buffer 492 builder, err := New(&buf, nil) 493 if err != nil { 494 b.Fatalf("error creating builder: %v", err) 495 } 496 err = insertStrings(builder, dataset, randomThousandVals) 497 if err != nil { 498 b.Fatalf("error inserting thousand words: %v", err) 499 } 500 err = builder.Close() 501 if err != nil { 502 b.Fatalf("error closing builder: %v", err) 503 } 504 505 b.ResetTimer() 506 507 for i := 0; i < b.N; i++ { 508 509 fst, err := Load(buf.Bytes()) 510 if err != nil { 511 b.Fatalf("error loading FST: %v", err) 512 } 513 514 itr, err := fst.Iterator(nil, nil) 515 for err == nil { 516 key, val = itr.Current() 517 err = itr.Next() 518 } 519 if err != ErrIteratorDone { 520 b.Fatalf("iterator error: %v", err) 521 } 522 523 err = fst.Close() 524 if err != nil { 525 b.Fatalf("error closing FST: %v", err) 526 } 527 528 } 529} 530 531func TestFuzzySearch(t *testing.T) { 532 var buf bytes.Buffer 533 b, err := New(&buf, nil) 534 if err != nil { 535 t.Fatalf("error creating builder: %v", err) 536 } 537 538 err = insertStringMap(b, smallSample) 539 if err != nil { 540 t.Fatalf("error building: %v", err) 541 } 542 543 err = b.Close() 544 if err != nil { 545 t.Fatalf("error closing: %v", err) 546 } 547 548 fst, err := Load(buf.Bytes()) 549 if err != nil { 550 t.Fatalf("error loading set: %v", err) 551 } 552 553 fuzzy, err := levenshtein.New("tue", 1) 554 if err != nil { 555 t.Fatalf("error building levenshtein automaton: %v", err) 556 } 557 558 want := map[string]uint64{ 559 "tues": 3, 560 "tye": 99, 561 } 562 got := map[string]uint64{} 563 itr, err := fst.Search(fuzzy, nil, nil) 564 for err == nil { 565 key, val := itr.Current() 566 got[string(key)] = val 567 err = itr.Next() 568 } 569 if err != ErrIteratorDone { 570 t.Errorf("iterator error: %v", err) 571 } 572 if !reflect.DeepEqual(want, got) { 573 t.Errorf("expected %v, got: %v", want, got) 574 } 575} 576 577func TestRegexpSearch(t *testing.T) { 578 var buf bytes.Buffer 579 b, err := New(&buf, nil) 580 if err != nil { 581 t.Fatalf("error creating builder: %v", err) 582 } 583 584 err = insertStringMap(b, smallSample) 585 if err != nil { 586 t.Fatalf("error building: %v", err) 587 } 588 589 err = b.Close() 590 if err != nil { 591 t.Fatalf("error closing: %v", err) 592 } 593 594 fst, err := Load(buf.Bytes()) 595 if err != nil { 596 t.Fatalf("error loading set: %v", err) 597 } 598 599 r, err := regexp.New(`t.*s`) 600 if err != nil { 601 t.Fatalf("error building regexp automaton: %v", err) 602 } 603 604 want := map[string]uint64{ 605 "thurs": 5, 606 "tues": 3, 607 } 608 609 got := map[string]uint64{} 610 itr, err := fst.Search(r, nil, nil) 611 for err == nil { 612 key, val := itr.Current() 613 got[string(key)] = val 614 err = itr.Next() 615 } 616 if err != ErrIteratorDone { 617 t.Errorf("iterator error: %v", err) 618 } 619 if !reflect.DeepEqual(want, got) { 620 t.Errorf("expected %v, got: %v", want, got) 621 } 622 623 got = map[string]uint64{} 624 itr, err = fst.Search(r, []byte("t"), nil) 625 for err == nil { 626 key, val := itr.Current() 627 got[string(key)] = val 628 err = itr.Next() 629 } 630 if err != ErrIteratorDone { 631 t.Errorf("iterator error: %v", err) 632 } 633 if !reflect.DeepEqual(want, got) { 634 t.Errorf("with start key t, expected %v, got: %v", want, got) 635 } 636 637 got = map[string]uint64{} 638 itr, err = fst.Search(r, nil, []byte("u")) 639 for err == nil { 640 key, val := itr.Current() 641 got[string(key)] = val 642 err = itr.Next() 643 } 644 if err != ErrIteratorDone { 645 t.Errorf("iterator error: %v", err) 646 } 647 if !reflect.DeepEqual(want, got) { 648 t.Errorf("with end key u, expected %v, got: %v", want, got) 649 } 650 651 got = map[string]uint64{} 652 itr, err = fst.Search(r, []byte("t"), []byte("u")) 653 for err == nil { 654 key, val := itr.Current() 655 got[string(key)] = val 656 err = itr.Next() 657 } 658 if err != ErrIteratorDone { 659 t.Errorf("iterator error: %v", err) 660 } 661 if !reflect.DeepEqual(want, got) { 662 t.Errorf("with start key t, end key u, expected %v, got: %v", want, got) 663 } 664} 665