1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de> 33 // ========================================================================== 34 // Approximate string matching via backtracking on VSTrees 35 // ========================================================================== 36 37 #ifndef SEQAN_FIND_BACKTRACKING_H_ 38 #define SEQAN_FIND_BACKTRACKING_H_ 39 40 //#define SEQAN_DEBUG 41 42 namespace seqan { 43 44 // ============================================================================ 45 // Forwards 46 // ============================================================================ 47 48 template <typename TDistance, typename TSpec> 49 struct Backtracking; 50 51 // ============================================================================ 52 // Tags, Classes, Enums 53 // ============================================================================ 54 55 template <typename TPrefix, typename TDistance> 56 struct BacktrackingState_ {}; 57 58 // ============================================================================ 59 60 template <typename TSuffix, typename TDistance> 61 struct SuffixAligner_ 62 {}; 63 64 template <typename TSuffix> 65 struct SuffixAligner_<TSuffix, HammingDistance> 66 { 67 typedef typename Size<TSuffix>::Type TSize; 68 69 TSize suffix_length; 70 TSize suffix_position; 71 72 SuffixAligner_() : 73 suffix_length(0), 74 suffix_position(0) 75 {} 76 }; 77 78 // ============================================================================ 79 80 template <typename TPrefix, typename TDistance> 81 struct PrefixAligner_ 82 {}; 83 84 template <typename TPrefix> 85 struct PrefixAligner_<TPrefix, HammingDistance> 86 { 87 typedef typename Size<TPrefix>::Type TSize; 88 89 TSize prefix_length; 90 TSize prefix_position; 91 TSize global_position; 92 unsigned errors; 93 94 PrefixAligner_() : 95 prefix_length(0), 96 prefix_position(0), 97 global_position(0), 98 errors(0) 99 {} 100 }; 101 102 // ============================================================================ 103 104 template <typename TText, typename TSpec, typename TDistance, typename TBacktrackingSpec> 105 class Finder<Index<TText, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > 106 { 107 protected: 108 typedef Index<TText, TSpec> TIndex; 109 typedef typename Iterator<TIndex, TopDown<> >::Type TIndexIterator; 110 typedef String<TIndexIterator> TParentStack; 111 112 typedef typename Fibre<TIndex, FibreSA>::Type TSA; 113 typedef typename Iterator<TSA const, Standard>::Type TIterator; 114 typedef typename Size<TIndex>::Type TSize; 115 116 typedef typename EdgeLabel<TIndexIterator>::Type TSuffix; 117 typedef SuffixAligner_<TSuffix, TDistance> TSuffixAligner; 118 119 public: 120 bool index_iterator_at_root; 121 122 Holder<TIndex> index; 123 TIndexIterator index_iterator; 124 TParentStack index_parents; 125 Pair<TSize> index_range; 126 127 TIterator data_iterator; 128 TSize data_length; 129 Pair<TIterator> range; 130 131 TSuffixAligner suffix_aligner; 132 133 Finder() {} 134 135 Finder(TIndex & index) : 136 index(index), 137 index_iterator(index) 138 { 139 clear(*this); 140 } 141 142 Finder(TIndex const & index) : 143 index(index), 144 index_iterator(index) 145 { 146 clear(*this); 147 } 148 149 ~Finder() 150 { 151 #ifdef SEQAN_DEBUG 152 std::cout << "Finder Parents Height: " << length(this->index_parents) << std::endl; 153 #endif 154 } 155 156 }; 157 158 // ============================================================================ 159 160 template <typename TNeedle, typename TDistance, typename TBacktrackingSpec> 161 class Pattern<TNeedle, Backtracking<TDistance, TBacktrackingSpec> > 162 { 163 protected: 164 typedef TNeedle TPrefix; 165 typedef PrefixAligner_<TPrefix, TDistance> TPrefixAligner; 166 167 typedef typename BacktrackingState_<TPrefix, TDistance>::Type TState; 168 typedef String<TState> TStateStack; 169 170 public: 171 Holder<TNeedle> data_host; 172 TPrefixAligner prefix_aligner; 173 TStateStack state; 174 175 Pattern() {} 176 177 Pattern(TNeedle && needle, 178 SEQAN_CTOR_DISABLE_IF(IsSameType<typename std::remove_reference<TNeedle>::type const &, Pattern const &>)) 179 { 180 ignoreUnusedVariableWarning(dummy); 181 setHost(*this, std::forward<TNeedle>(needle)); 182 } 183 184 ~Pattern() 185 { 186 // Empty stack 187 clear(this->state); 188 } 189 190 }; 191 192 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 193 class Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > 194 { 195 protected: 196 typedef Index<TNeedle, TSpec> TIndex; 197 typedef typename Iterator<TIndex, TopDown<> >::Type TIndexIterator; 198 typedef String<TIndexIterator> TParentStack; 199 200 typedef typename Fibre<TIndex, FibreSA>::Type TSA; 201 typedef typename Iterator<TSA const, Standard>::Type TIterator; 202 typedef typename Size<TIndex>::Type TSize; 203 204 typedef typename EdgeLabel<TIndexIterator>::Type TPrefix; 205 typedef PrefixAligner_<TPrefix, TDistance> TPrefixAligner; 206 207 typedef typename BacktrackingState_<TPrefix, TDistance>::Type TState; 208 typedef String<TState> TStateStack; 209 210 typedef String<bool> TEndStack; 211 212 public: 213 bool index_iterator_at_root; 214 215 Holder<TIndex> data_host; 216 TIndexIterator index_iterator; 217 TParentStack index_parents; 218 Pair<TSize> index_range; 219 220 TIterator data_iterator; 221 TSize data_length; 222 Pair<TIterator> range; 223 224 TPrefixAligner prefix_aligner; 225 TStateStack state; 226 TEndStack atEnd; 227 228 unsigned exact; 229 bool search; 230 231 // TODO(esiragusa): Remove depth, isLeaf(it) should return true when repLength(it) >= depth 232 TSize depth; 233 234 Pattern() {} 235 236 237 Pattern(TIndex && index, TSize depth) : 238 data_host(std::forward<TIndex>(index)), 239 index_iterator(index), 240 depth(depth) 241 { 242 setHost(*this, std::forward<TIndex>(index)); 243 } 244 245 ~Pattern() 246 { 247 #ifdef SEQAN_DEBUG 248 std::cout << "BacktrackingState_ Height: " << length(this->state) << std::endl; 249 std::cout << "atEnd Height: " << length(this->atEnd) << std::endl; 250 std::cout << "Pattern Parents Height: " << length(this->index_parents) << std::endl; 251 #endif 252 253 // Empty stacks 254 clear(this->index_parents); 255 clear(this->state); 256 clear(this->atEnd); 257 } 258 259 }; 260 261 // ============================================================================ 262 // Metafunctions 263 // ============================================================================ 264 265 template <typename TPrefix> 266 struct BacktrackingState_<TPrefix, HammingDistance> 267 { 268 typedef typename Size<TPrefix>::Type TSize; 269 typedef Pair<TSize, unsigned> Type; 270 }; 271 272 // ============================================================================ 273 274 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 275 struct Position<Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > >: 276 SAValue<Index<TNeedle, TSpec> > 277 {}; 278 279 // ============================================================================ 280 // Functions 281 // ============================================================================ 282 283 template <typename TPrefix> 284 inline unsigned 285 getScore(PrefixAligner_<TPrefix, HammingDistance> const & me, bool atEnd) 286 { 287 return atEnd ? me.errors : std::numeric_limits<unsigned>::max(); 288 } 289 290 template <typename TPrefix> 291 inline unsigned 292 getMinScore(PrefixAligner_<TPrefix, HammingDistance> const & me) 293 { 294 return me.errors; 295 } 296 297 template <typename TPrefix> 298 inline typename BacktrackingState_<TPrefix, HammingDistance>::Type 299 getInitialState(PrefixAligner_<TPrefix, HammingDistance> &) 300 { 301 typedef typename BacktrackingState_<TPrefix, HammingDistance>::Type TState; 302 return TState(0, 0); 303 } 304 305 template <typename TPrefix> 306 inline typename BacktrackingState_<TPrefix, HammingDistance>::Type 307 getState(PrefixAligner_<TPrefix, HammingDistance> & me) 308 { 309 typedef typename BacktrackingState_<TPrefix, HammingDistance>::Type TState; 310 return TState(me.global_position, me.errors); 311 } 312 313 template <typename TPrefix, typename TState> 314 inline void 315 setState(PrefixAligner_<TPrefix, HammingDistance> & me, TState const & state) 316 { 317 me.global_position = state.i1; 318 me.errors = state.i2; 319 #ifdef SEQAN_DEBUG 320 std::cout << "Old BacktrackingState_: " << "(" << state.i1 << ", " << state.i2 << ")" << std::endl; 321 #endif 322 } 323 324 template <typename TSuffix, typename TState> 325 inline void 326 setState(SuffixAligner_<TSuffix, HammingDistance> &, TState const &) 327 { 328 // me.global_position = state.i1; 329 } 330 331 template <typename TSuffix, typename TSize> 332 inline void setLength(SuffixAligner_<TSuffix, HammingDistance> & me, TSize suffix_length) 333 { 334 me.suffix_length = suffix_length; 335 } 336 337 template <typename TPrefix, typename TSize> 338 inline void setLength(PrefixAligner_<TPrefix, HammingDistance> & me, TSize prefix_length) 339 { 340 me.prefix_length = prefix_length; 341 } 342 343 template <typename TSuffix, typename TSize> 344 inline void setPosition(SuffixAligner_<TSuffix, HammingDistance> & me, TSize suffix_position) 345 { 346 me.suffix_position = suffix_position; 347 } 348 349 template <typename TPrefix, typename TSize> 350 inline void setPosition(PrefixAligner_<TPrefix, HammingDistance> & me, TSize prefix_position) 351 { 352 me.prefix_position = prefix_position; 353 } 354 355 // ============================================================================ 356 357 template <typename TSuffix, typename TPrefix, typename TErrors> 358 inline bool align(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 359 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 360 TSuffix & suffix, 361 TPrefix & prefix, 362 TErrors errors) 363 { 364 return _align(suffix_aligner, prefix_aligner, suffix, prefix, errors, 365 typename IsSequence<TSuffix>::Type(), typename IsSequence<TPrefix>::Type()); 366 } 367 368 template <typename TSuffix, typename TPrefix, typename TErrors> 369 inline bool _align(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 370 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 371 TSuffix & suffix, 372 TPrefix & prefix, 373 TErrors errors, 374 False const & /* tag */, 375 False const & /* tag */) 376 { 377 typedef typename Size<TPrefix>::Type TSize; 378 379 TSize extension = _min(suffix_aligner.suffix_length - suffix_aligner.suffix_position, 380 prefix_aligner.prefix_length - prefix_aligner.prefix_position); 381 382 if (extension > 0) 383 { 384 if (suffix != prefix) 385 if (++prefix_aligner.errors > errors) 386 return false; 387 388 suffix_aligner.suffix_position++; 389 prefix_aligner.prefix_position++; 390 prefix_aligner.global_position++; 391 } 392 393 return true; 394 } 395 396 template <typename TSuffix, typename TPrefix, typename TErrors> 397 inline bool _align(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 398 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 399 TSuffix & suffix, 400 TPrefix & prefix, 401 TErrors errors, 402 True const & /* tag */, 403 False const & /* tag */) 404 { 405 typedef typename Size<TPrefix>::Type TSize; 406 407 TSize extension = _min(suffix_aligner.suffix_length - suffix_aligner.suffix_position, 408 prefix_aligner.prefix_length - prefix_aligner.prefix_position); 409 410 if (extension > 0) 411 { 412 if (suffix[suffix_aligner.suffix_position] != prefix) 413 if (++prefix_aligner.errors > errors) 414 return false; 415 416 suffix_aligner.suffix_position++; 417 prefix_aligner.prefix_position++; 418 prefix_aligner.global_position++; 419 } 420 421 return true; 422 } 423 424 template <typename TSuffix, typename TPrefix, typename TErrors> 425 inline bool _align(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 426 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 427 TSuffix & suffix, 428 TPrefix & prefix, 429 TErrors errors, 430 False const & /* tag */, 431 True const & /* tag */) 432 { 433 typedef typename Size<TPrefix>::Type TSize; 434 435 TSize extension = _min(suffix_aligner.suffix_length - suffix_aligner.suffix_position, 436 prefix_aligner.prefix_length - prefix_aligner.prefix_position); 437 438 if (extension > 0) 439 { 440 if (suffix != prefix[prefix_aligner.prefix_position]) 441 if (++prefix_aligner.errors > errors) 442 return false; 443 444 suffix_aligner.suffix_position++; 445 prefix_aligner.prefix_position++; 446 prefix_aligner.global_position++; 447 } 448 449 return true; 450 } 451 452 template <typename TSuffix, typename TPrefix, typename TErrors> 453 inline bool _align(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 454 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 455 TSuffix & suffix, 456 TPrefix & prefix, 457 TErrors errors, 458 True const & /* tag */, 459 True const & /* tag */) 460 { 461 typedef typename Size<TPrefix>::Type TSize; 462 463 TSize extension = _min(suffix_aligner.suffix_length - suffix_aligner.suffix_position, 464 prefix_aligner.prefix_length - prefix_aligner.prefix_position); 465 466 TSize endPosition = prefix_aligner.global_position + extension; 467 468 while (prefix_aligner.global_position < endPosition) 469 { 470 if (suffix[suffix_aligner.suffix_position] != prefix[prefix_aligner.prefix_position]) 471 if (++prefix_aligner.errors > errors) 472 return false; 473 474 suffix_aligner.suffix_position++; 475 prefix_aligner.prefix_position++; 476 prefix_aligner.global_position++; 477 } 478 479 return true; 480 } 481 482 // ============================================================================ 483 484 template <typename TSuffix, typename TPrefix> 485 inline bool match(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 486 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 487 TSuffix const & suffix, 488 TPrefix const & prefix) 489 { 490 return _match(suffix_aligner, prefix_aligner, suffix, prefix, 491 typename IsSequence<TSuffix>::Type(), typename IsSequence<TPrefix>::Type()); 492 } 493 494 template <typename TSuffix, typename TPrefix> 495 inline bool _match(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 496 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 497 TSuffix const & suffix, 498 TPrefix const & prefix, 499 False const & /* tag */, 500 False const & /* tag */) 501 { 502 typedef typename Size<TPrefix>::Type TSize; 503 504 TSize extension = _min(suffix_aligner.suffix_length - suffix_aligner.suffix_position, 505 prefix_aligner.prefix_length - prefix_aligner.prefix_position); 506 507 if (extension > 0) 508 { 509 if (suffix != prefix) 510 return false; 511 512 suffix_aligner.suffix_position++; 513 prefix_aligner.prefix_position++; 514 prefix_aligner.global_position++; 515 } 516 517 return true; 518 } 519 520 template <typename TSuffix, typename TPrefix> 521 inline bool _match(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 522 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 523 TSuffix const & suffix, 524 TPrefix const & prefix, 525 True const & /* tag */, 526 False const & /* tag */) 527 { 528 typedef typename Size<TPrefix>::Type TSize; 529 530 TSize extension = _min(suffix_aligner.suffix_length - suffix_aligner.suffix_position, 531 prefix_aligner.prefix_length - prefix_aligner.prefix_position); 532 533 if (extension > 0) 534 { 535 if (suffix[suffix_aligner.suffix_position] != prefix) 536 return false; 537 538 suffix_aligner.suffix_position++; 539 prefix_aligner.prefix_position++; 540 prefix_aligner.global_position++; 541 } 542 543 return true; 544 } 545 546 template <typename TSuffix, typename TPrefix> 547 inline bool _match(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 548 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 549 TSuffix const & suffix, 550 TPrefix const & prefix, 551 False const & /* tag */, 552 True const & /* tag */) 553 { 554 typedef typename Size<TPrefix>::Type TSize; 555 556 TSize extension = _min(suffix_aligner.suffix_length - suffix_aligner.suffix_position, 557 prefix_aligner.prefix_length - prefix_aligner.prefix_position); 558 559 if (extension > 0) 560 { 561 if (suffix != prefix[prefix_aligner.prefix_position]) 562 return false; 563 564 suffix_aligner.suffix_position++; 565 prefix_aligner.prefix_position++; 566 prefix_aligner.global_position++; 567 } 568 569 return true; 570 } 571 572 template <typename TSuffix, typename TPrefix> 573 inline bool _match(SuffixAligner_<TSuffix, HammingDistance> & suffix_aligner, 574 PrefixAligner_<TPrefix, HammingDistance> & prefix_aligner, 575 TSuffix const & suffix, 576 TPrefix const & prefix, 577 True const & /* tag */, 578 True const & /* tag */) 579 { 580 typedef typename Size<TPrefix>::Type TSize; 581 582 TSize extension = _min(suffix_aligner.suffix_length - suffix_aligner.suffix_position, 583 prefix_aligner.prefix_length - prefix_aligner.prefix_position); 584 585 TSize endPosition = prefix_aligner.global_position + extension; 586 587 while (prefix_aligner.global_position < endPosition) 588 { 589 if (suffix[suffix_aligner.suffix_position] != prefix[prefix_aligner.prefix_position]) 590 return false; 591 592 suffix_aligner.suffix_position++; 593 prefix_aligner.prefix_position++; 594 prefix_aligner.global_position++; 595 } 596 597 return true; 598 } 599 600 // ============================================================================ 601 602 template <typename TPrefix> 603 inline bool _atEnd(PrefixAligner_<TPrefix, HammingDistance> const & me) 604 { 605 return me.prefix_position == me.prefix_length; 606 } 607 608 template <typename TSuffix> 609 inline bool _atEnd(SuffixAligner_<TSuffix, HammingDistance> const & me) 610 { 611 return me.suffix_position == me.suffix_length; 612 } 613 614 // ============================================================================ 615 616 template <typename TText, typename TSpec, typename TDistance, typename TBacktrackingSpec> 617 inline void 618 clear(Finder<Index<TText, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 619 { 620 typedef Index<TText, TSpec> TIndex; 621 typedef typename Iterator<TIndex, TopDown<> >::Type TIndexIterator; 622 623 typedef typename Fibre<TIndex, FibreSA>::Type TSA; 624 typedef typename Iterator<TSA const, Standard>::Type TIterator; 625 626 typedef typename EdgeLabel<TIndexIterator>::Type TSuffix; 627 typedef SuffixAligner_<TSuffix, TDistance> TSuffixAligner; 628 629 // Init backtracking on root node 630 me.index_iterator = TIndexIterator(host(me)); 631 me.index_iterator_at_root = true; 632 633 // Empty parent stack 634 clear(me.index_parents); 635 636 // Init data iterator on empty range 637 hostIterator(me) = begin(indexSA(host(me)), Standard()); 638 me.range.i1 = me.range.i2 = TIterator(); 639 me.data_length = 0; 640 641 // Call SuffixAligner_ constructor 642 me.suffix_aligner = TSuffixAligner(); 643 } 644 645 // ============================================================================ 646 647 template <typename TNeedle, typename TDistance, typename TBacktrackingSpec> 648 void _reinitPattern(Pattern<TNeedle, Backtracking<TDistance, TBacktrackingSpec> > & me) 649 { 650 typedef TNeedle TPrefix; 651 typedef PrefixAligner_<TPrefix, TDistance> TPrefixAligner; 652 //typedef typename BacktrackingState_<TPrefix, TDistance>::Type TState; 653 654 // Call PrefixAligner_ constructor 655 me.prefix_aligner = TPrefixAligner(); 656 657 // Init stack on empty word 658 clear(me.state); 659 appendValue(me.state, getInitialState(me.prefix_aligner)); 660 } 661 662 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 663 void _moveIteratorAtRoot(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 664 { 665 typedef Index<TNeedle, TSpec> TIndex; 666 typedef typename Iterator<TIndex, TopDown<> >::Type TIndexIterator; 667 me.index_iterator = TIndexIterator(host(me)); 668 } 669 670 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 671 void _reinitPattern(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 672 { 673 typedef Index<TNeedle, TSpec> TIndex; 674 typedef typename Iterator< TIndex, TopDown<> >::Type TIndexIterator; 675 typedef typename Fibre<TIndex, FibreSA>::Type TSA; 676 typedef typename Iterator<TSA const, Standard>::Type TIterator; 677 678 typedef typename EdgeLabel<TIndexIterator>::Type TPrefix; 679 typedef PrefixAligner_<TPrefix, TDistance> TPrefixAligner; 680 //typedef typename BacktrackingState_<TPrefix, TDistance>::Type TState; 681 682 // Init backtracking on root node 683 _moveIteratorAtRoot(me); 684 // me.index_iterator = TIndexIterator(host(me)); 685 me.index_iterator_at_root = true; 686 687 // Empty parent stack 688 clear(me.index_parents); 689 690 // Init data iterator on empty range 691 hostIterator(me) = begin(indexSA(host(me)), Standard()); 692 me.range.i1 = me.range.i2 = TIterator(); 693 me.data_length = 0; 694 695 // Init stack on empty word 696 clear(me.state); 697 appendValue(me.state, getInitialState(me.prefix_aligner)); 698 699 // Empty atEnd stack 700 clear(me.atEnd); 701 // appendValue(me.atEnd, true); 702 703 // Empty exact search stack 704 me.exact = 0; 705 me.search = false; 706 707 // Call PrefixAligner_ constructor 708 me.prefix_aligner = TPrefixAligner(); 709 } 710 711 // ============================================================================ 712 713 template < typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec > 714 inline typename Iterator< typename Fibre<Index<TNeedle, TSpec>, FibreSA>::Type const, Standard>::Type const & 715 hostIterator(Pattern< Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > const & me) 716 { 717 return me.data_iterator; 718 } 719 720 template < typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec > 721 inline typename Iterator< typename Fibre<Index<TNeedle, TSpec>, FibreSA>::Type const, Standard >::Type & 722 hostIterator(Pattern< Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 723 { 724 return me.data_iterator; 725 } 726 727 // ============================================================================ 728 729 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 730 inline bool 731 empty(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 732 { 733 return me.range.i1 == me.range.i2; 734 } 735 736 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 737 inline bool 738 atBegin(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 739 { 740 return (empty(me) || hostIterator(me) == me.range.i1); 741 } 742 743 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 744 inline bool 745 atEnd(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 746 { 747 return (empty(me) || hostIterator(me) == me.range.i2); 748 } 749 750 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 751 inline void 752 goBegin(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 753 { 754 hostIterator(me) = me.range.i1; 755 } 756 757 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 758 inline void 759 goEnd(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 760 { 761 hostIterator(me) = me.range.i2; 762 } 763 764 // ============================================================================ 765 766 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 767 inline typename Position<Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > >::Type 768 beginPosition(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 769 { 770 SEQAN_ASSERT_NOT(empty(me)); 771 return *me.data_iterator; 772 } 773 774 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 775 inline typename Position<Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > >::Type 776 endPosition(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 777 { 778 return posAdd(beginPosition(me), me.data_length); 779 } 780 781 template <typename TNeedle, typename TSpec, typename TDistance, typename TBacktrackingSpec> 782 inline typename Position<Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > >::Type 783 position(Pattern<Index<TNeedle, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & me) 784 { 785 return beginPosition(me); 786 } 787 788 // ============================================================================ 789 790 template <typename TText, typename TSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle> 791 inline bool 792 _resume(Finder<Index<TText, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 793 Pattern<TNeedle, Backtracking<TDistance, TBacktrackingSpec> > & pattern) 794 { 795 // Resume positively from root only the first time 796 if (finder.index_iterator_at_root) 797 { 798 finder.index_iterator_at_root = false; 799 return true; 800 } 801 802 return _cut(finder, pattern); 803 } 804 805 template <typename TText, typename TSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle, typename TErrors> 806 inline bool 807 _backtrack(Finder<Index<TText, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 808 Pattern<TNeedle, Backtracking<TDistance, TBacktrackingSpec> > & pattern, 809 TErrors errors) 810 { 811 typedef Index<TText, TSpec> TIndex; 812 typedef typename Iterator<TIndex, TopDown<> >::Type TIndexIterator; 813 typedef typename EdgeLabel<TIndexIterator>::Type TSuffix; 814 typedef typename Size<TIndex>::Type TSuffixSize; 815 816 typedef typename BacktrackingState_<TNeedle, TDistance>::Type TState; 817 818 setLength(pattern.prefix_aligner, length(needle(pattern))); 819 setPosition(pattern.prefix_aligner, 0); 820 821 do 822 { 823 SEQAN_ASSERT_NOT(empty(pattern.state)); 824 825 #ifdef SEQAN_DEBUG 826 std::cout << "Stack Height: " << length(pattern.state) << std::endl; 827 std::cout << "Suffix: " << 828 prefix(representative(finder.index_iterator), 829 _min(repLength(finder.index_iterator), length(needle(pattern)))) 830 << std::endl; 831 #endif 832 // Restore last state 833 setState(finder.suffix_aligner, back(pattern.state)); 834 setState(pattern.prefix_aligner, back(pattern.state)); 835 836 // Update current prefix 837 setPosition(pattern.prefix_aligner, pattern.prefix_aligner.global_position); 838 839 // Update current suffix 840 TSuffix suffixEdge = parentEdgeLabel(finder.index_iterator); 841 TSuffixSize suffixLength = parentEdgeLength(finder.index_iterator); 842 TSuffixSize suffixPos = pattern.prefix_aligner.global_position - parentRepLength(finder.index_iterator); 843 setLength(finder.suffix_aligner, suffixLength); 844 setPosition(finder.suffix_aligner, suffixPos); 845 846 // Align suffix with pattern 847 align(finder.suffix_aligner, pattern.prefix_aligner, suffixEdge, needle(pattern), errors); 848 849 // A complete match was found 850 if (getScore(pattern.prefix_aligner, _atEnd(pattern.prefix_aligner)) <= errors) 851 { 852 finder.index_range = range(finder.index_iterator); 853 return true; 854 } 855 // Reduce to exact suffix search (speedup) 856 else if (getMinScore(pattern.prefix_aligner) == errors) 857 { 858 TIndexIterator index_iterator(finder.index_iterator); 859 860 // A complete match was found 861 if (goDown(index_iterator, suffix(needle(pattern), pattern.prefix_aligner.global_position))) 862 { 863 // Move aligners to end of pattern 864 pattern.prefix_aligner.global_position = length(needle(pattern)); 865 866 finder.index_range = range(index_iterator); 867 return true; 868 } 869 else 870 _cut(finder, pattern); 871 } 872 // Walk down text index only if an alignment is still possible 873 else if (getMinScore(pattern.prefix_aligner) <= errors && !isLeaf(finder.index_iterator)) 874 { 875 appendValue(finder.index_parents, finder.index_iterator); 876 goDown(finder.index_iterator); 877 // appendValue(pattern.state, getState(pattern.prefix_aligner)); 878 appendValue(pattern.state, TState(pattern.prefix_aligner.global_position, pattern.prefix_aligner.errors)); 879 } 880 // Otherwise cut branch 881 else 882 _cut(finder, pattern); 883 } 884 while (!isRoot(finder.index_iterator)); 885 886 return false; 887 } 888 889 template <typename TText, typename TSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle> 890 inline bool 891 _cut(Finder<Index<TText, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 892 Pattern<TNeedle, Backtracking<TDistance, TBacktrackingSpec> > & pattern) 893 { 894 // Walk to next node 895 if (!goRight(finder.index_iterator)) 896 while (!isRoot(finder.index_iterator)) 897 { 898 SEQAN_ASSERT_NOT(empty(finder.index_parents)); 899 finder.index_iterator = back(finder.index_parents); 900 eraseBack(finder.index_parents); 901 902 SEQAN_ASSERT_NOT(empty(pattern.state)); 903 eraseBack(pattern.state); 904 if (goRight(finder.index_iterator)) 905 break; 906 } 907 908 return !isRoot(finder.index_iterator); 909 } 910 911 // ============================================================================ 912 913 template <typename TText, typename TTextSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle, typename TNeedleSpec, typename TErrors> 914 inline bool 915 _resume(Finder<Index<TText, TTextSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 916 Pattern<Index<TNeedle, TNeedleSpec>, Backtracking<TDistance, TBacktrackingSpec> > & pattern, 917 TErrors errors) 918 { 919 // Resume positively from root only the first time 920 if (finder.index_iterator_at_root) 921 { 922 finder.index_iterator_at_root = false; 923 return _backtrack(finder, pattern, errors); 924 } 925 926 if (pattern.search) 927 { 928 if (_cut_exact(finder, pattern) && _search(finder, pattern)) 929 return true; 930 931 SEQAN_ASSERT_NOT(pattern.exact); 932 933 SEQAN_ASSERT_NOT(empty(finder.index_parents)); 934 finder.index_iterator = back(finder.index_parents); 935 eraseBack(finder.index_parents); 936 SEQAN_ASSERT_NOT(empty(pattern.index_parents)); 937 pattern.index_iterator = back(pattern.index_parents); 938 eraseBack(pattern.index_parents); 939 SEQAN_ASSERT_NOT(empty(pattern.state)); 940 eraseBack(pattern.state); 941 942 pattern.search = false; 943 } 944 945 return _cut(finder, pattern) && _backtrack(finder, pattern, errors); 946 } 947 948 template <typename TText, typename TTextSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle, typename TNeedleSpec, typename TErrors> 949 inline bool 950 _backtrack(Finder<Index<TText, TTextSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 951 Pattern<Index<TNeedle, TNeedleSpec>, Backtracking<TDistance, TBacktrackingSpec> > & pattern, 952 TErrors errors) 953 { 954 typedef Index<TText, TTextSpec> TTextIndex; 955 typedef typename Iterator<TTextIndex, TopDown<> >::Type TTextIndexIterator; 956 typedef typename EdgeLabel<TTextIndexIterator>::Type TSuffix; 957 typedef typename Size<TTextIndex>::Type TSuffixSize; 958 959 typedef Index<TNeedle, TNeedleSpec> TNeedleIndex; 960 typedef typename Iterator<TNeedleIndex, TopDown<> >::Type TNeedleIndexIterator; 961 typedef typename EdgeLabel<TNeedleIndexIterator>::Type TPrefix; 962 //typedef typename Size<TNeedleIndex>::Type TPrefixSize; 963 964 typedef typename BacktrackingState_<TNeedle, TDistance>::Type TState; 965 966 SEQAN_ASSERT_NOT(pattern.exact); 967 968 do 969 { 970 SEQAN_ASSERT_NOT(empty(pattern.state)); 971 972 #ifdef SEQAN_DEBUG 973 std::cout << "Stack Height: " << length(pattern.state) << std::endl; 974 std::cout << "Suffix: " << representative(finder.index_iterator) << std::endl; 975 std::cout << "Prefix: " << representative(pattern.index_iterator) << std::endl; 976 #endif 977 978 // Lookup last state 979 setState(finder.suffix_aligner, back(pattern.state)); 980 setState(pattern.prefix_aligner, back(pattern.state)); 981 982 // Update current suffix 983 TSuffix suffixEdge = parentEdgeLabel(finder.index_iterator); 984 TSuffixSize suffixLength = parentEdgeLength(finder.index_iterator); 985 TSuffixSize suffixPos = pattern.prefix_aligner.global_position - parentRepLength(finder.index_iterator); 986 setLength(finder.suffix_aligner, suffixLength); 987 setPosition(finder.suffix_aligner, suffixPos); 988 989 // Update current prefix 990 TPrefix prefixEdge = parentEdgeLabel(pattern.index_iterator); 991 TSuffixSize prefixLength = parentEdgeLength(pattern.index_iterator); 992 TSuffixSize prefixPos = pattern.prefix_aligner.global_position - parentRepLength(pattern.index_iterator); 993 setLength(pattern.prefix_aligner, _min(prefixLength, pattern.depth - parentRepLength(pattern.index_iterator))); 994 setPosition(pattern.prefix_aligner, prefixPos); 995 996 // Align suffix with prefix 997 align(finder.suffix_aligner, pattern.prefix_aligner, suffixEdge, prefixEdge, errors); 998 999 // A complete match was found 1000 // if (getScore(pattern.prefix_aligner, isLeaf(pattern.index_iterator)) <= errors) 1001 if (getScore(pattern.prefix_aligner, (pattern.prefix_aligner.global_position >= pattern.depth)) <= errors) 1002 { 1003 finder.index_range = range(finder.index_iterator); 1004 pattern.index_range = range(pattern.index_iterator); 1005 return true; 1006 } 1007 // Reduce to exact suffix search (speedup) 1008 else if (getMinScore(pattern.prefix_aligner) == errors) 1009 { 1010 pattern.search = true; 1011 1012 appendValue(finder.index_parents, finder.index_iterator); 1013 appendValue(pattern.index_parents, pattern.index_iterator); 1014 // appendValue(pattern.state, getState(pattern.prefix_aligner)); 1015 appendValue(pattern.state, TState(pattern.prefix_aligner.global_position, pattern.prefix_aligner.errors)); 1016 1017 if (_search(finder, pattern)) 1018 return true; 1019 1020 SEQAN_ASSERT_NOT(pattern.exact); 1021 1022 SEQAN_ASSERT_NOT(empty(finder.index_parents)); 1023 finder.index_iterator = back(finder.index_parents); 1024 eraseBack(finder.index_parents); 1025 SEQAN_ASSERT_NOT(empty(pattern.index_parents)); 1026 pattern.index_iterator = back(pattern.index_parents); 1027 eraseBack(pattern.index_parents); 1028 SEQAN_ASSERT_NOT(empty(pattern.state)); 1029 eraseBack(pattern.state); 1030 1031 pattern.search = false; 1032 1033 _cut(finder, pattern); 1034 } 1035 else if (getMinScore(pattern.prefix_aligner) <= errors) 1036 { 1037 if (_atEnd(finder.suffix_aligner)) 1038 { 1039 if (!isLeaf(finder.index_iterator)) 1040 { 1041 appendValue(finder.index_parents, finder.index_iterator); 1042 appendValue(pattern.index_parents, pattern.index_iterator); 1043 // appendValue(pattern.state, getState(pattern.prefix_aligner)); 1044 appendValue(pattern.state, TState(pattern.prefix_aligner.global_position, pattern.prefix_aligner.errors)); 1045 appendValue(pattern.atEnd, false); 1046 1047 goDown(finder.index_iterator); 1048 } 1049 else 1050 _cut(finder, pattern); 1051 } 1052 else if (_atEnd(pattern.prefix_aligner)) 1053 { 1054 if (prefixLength < pattern.depth && !isLeaf(pattern.index_iterator)) 1055 { 1056 appendValue(finder.index_parents, finder.index_iterator); 1057 appendValue(pattern.index_parents, pattern.index_iterator); 1058 // appendValue(pattern.state, getState(pattern.prefix_aligner)); 1059 appendValue(pattern.state, TState(pattern.prefix_aligner.global_position, pattern.prefix_aligner.errors)); 1060 appendValue(pattern.atEnd, true); 1061 1062 goDown(pattern.index_iterator); 1063 } 1064 else 1065 _cut(finder, pattern); 1066 } 1067 } 1068 else 1069 _cut(finder, pattern); 1070 } 1071 while (!(isRoot(pattern.index_iterator) && isRoot(finder.index_iterator))); 1072 1073 return false; 1074 } 1075 1076 template <typename TText, typename TTextSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle, typename TNeedleSpec> 1077 inline bool 1078 _cut(Finder<Index<TText, TTextSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 1079 Pattern<Index<TNeedle, TNeedleSpec>, Backtracking<TDistance, TBacktrackingSpec> > & pattern) 1080 { 1081 SEQAN_ASSERT_NOT(pattern.exact); 1082 1083 if (empty(pattern.atEnd)) 1084 return false; 1085 1086 do 1087 { 1088 SEQAN_ASSERT_NOT(empty(pattern.atEnd)); 1089 1090 if (!back(pattern.atEnd)) 1091 { 1092 if (goRight(finder.index_iterator)) 1093 { 1094 SEQAN_ASSERT_NOT(empty(pattern.index_parents)); 1095 pattern.index_iterator = back(pattern.index_parents); 1096 break; 1097 } 1098 } 1099 else 1100 { 1101 if (goRight(pattern.index_iterator)) 1102 { 1103 SEQAN_ASSERT_NOT(empty(finder.index_parents)); 1104 finder.index_iterator = back(finder.index_parents); 1105 break; 1106 } 1107 } 1108 1109 SEQAN_ASSERT_NOT(empty(finder.index_parents)); 1110 finder.index_iterator = back(finder.index_parents); 1111 eraseBack(finder.index_parents); 1112 SEQAN_ASSERT_NOT(empty(pattern.index_parents)); 1113 pattern.index_iterator = back(pattern.index_parents); 1114 eraseBack(pattern.index_parents); 1115 SEQAN_ASSERT_NOT(empty(pattern.state)); 1116 eraseBack(pattern.state); 1117 SEQAN_ASSERT_NOT(empty(pattern.atEnd)); 1118 eraseBack(pattern.atEnd); 1119 } 1120 while (!(isRoot(pattern.index_iterator) && isRoot(finder.index_iterator))); 1121 1122 return !(isRoot(pattern.index_iterator) && isRoot(finder.index_iterator)); 1123 } 1124 1125 template <typename TText, typename TTextSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle, typename TNeedleSpec> 1126 inline unsigned 1127 _search(Finder<Index<TText, TTextSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 1128 Pattern<Index<TNeedle, TNeedleSpec>, Backtracking<TDistance, TBacktrackingSpec> > & pattern) 1129 { 1130 typedef Index<TText, TTextSpec> TTextIndex; 1131 typedef typename Iterator<TTextIndex, TopDown<> >::Type TTextIndexIterator; 1132 typedef typename EdgeLabel<TTextIndexIterator>::Type TSuffix; 1133 typedef typename Size<TTextIndex>::Type TSuffixSize; 1134 1135 typedef Index<TNeedle, TNeedleSpec> TNeedleIndex; 1136 typedef typename Iterator<TNeedleIndex, TopDown<> >::Type TNeedleIndexIterator; 1137 typedef typename EdgeLabel<TNeedleIndexIterator>::Type TPrefix; 1138 typedef typename Size<TNeedleIndex>::Type TPrefixSize; 1139 1140 typedef typename BacktrackingState_<TNeedle, TDistance>::Type TState; 1141 1142 do 1143 { 1144 SEQAN_ASSERT_NOT(empty(pattern.state)); 1145 1146 #ifdef SEQAN_DEBUG 1147 std::cout << "Stack Height: " << length(pattern.state) << std::endl; 1148 std::cout << "Exact Height: " << pattern.exact << std::endl; 1149 std::cout << "Suffix: " << representative(finder.index_iterator) << std::endl; 1150 std::cout << "Prefix: " << representative(pattern.index_iterator) << std::endl; 1151 #endif 1152 1153 // Lookup last state 1154 setState(finder.suffix_aligner, back(pattern.state)); 1155 setState(pattern.prefix_aligner, back(pattern.state)); 1156 1157 // Update current suffix 1158 TSuffix suffixEdge = parentEdgeLabel(finder.index_iterator); 1159 TSuffixSize suffixLength = parentEdgeLength(finder.index_iterator); 1160 TSuffixSize suffixPos = pattern.prefix_aligner.global_position - parentRepLength(finder.index_iterator); 1161 setLength(finder.suffix_aligner, suffixLength); 1162 setPosition(finder.suffix_aligner, suffixPos); 1163 1164 // Update current prefix 1165 TPrefix prefixEdge = parentEdgeLabel(pattern.index_iterator); 1166 TSuffixSize prefixLength = parentEdgeLength(pattern.index_iterator); 1167 TSuffixSize prefixPos = pattern.prefix_aligner.global_position - parentRepLength(pattern.index_iterator); 1168 setLength(pattern.prefix_aligner, _min(prefixLength, pattern.depth - parentRepLength(pattern.index_iterator))); 1169 setPosition(pattern.prefix_aligner, prefixPos); 1170 1171 // Align exactly suffix with prefix 1172 if (match(finder.suffix_aligner, pattern.prefix_aligner, suffixEdge, prefixEdge)) 1173 { 1174 if (!_atEnd(pattern.prefix_aligner) && _atEnd(finder.suffix_aligner)) 1175 { 1176 TPrefixSize extension = pattern.prefix_aligner.prefix_length - pattern.prefix_aligner.prefix_position; 1177 1178 TTextIndexIterator index_iterator(finder.index_iterator); 1179 // NOTE(esiragusa): This works if prefixEdge is a sequence, not if it is a simple type. 1180 if (!goDown(finder.index_iterator, 1181 infixWithLength(prefixEdge, pattern.prefix_aligner.prefix_position, extension))) 1182 { 1183 finder.index_iterator = index_iterator; 1184 if (!_cut_exact(finder, pattern)) 1185 break; 1186 1187 continue; 1188 } 1189 1190 pattern.prefix_aligner.global_position += extension; 1191 // NOTE(esiragusa): It is not necessary to update length and position as long as they are not used below. 1192 // suffixEdge = parentEdgeLabel(finder.index_iterator); 1193 // suffixLength = parentEdgeLength(finder.index_iterator); 1194 // suffixPos = pattern.prefix_aligner.global_position - parentRepLength(finder.index_iterator); 1195 // setLength(finder.suffix_aligner, suffixLength); 1196 // setPosition(finder.suffix_aligner, suffixPos); 1197 } 1198 1199 // A complete match was found 1200 if (pattern.prefix_aligner.global_position >= pattern.depth) 1201 { 1202 finder.index_range = range(finder.index_iterator); 1203 pattern.index_range = range(pattern.index_iterator); 1204 return true; 1205 } 1206 else if (prefixLength < pattern.depth && !isLeaf(pattern.index_iterator)) 1207 { 1208 ++pattern.exact; 1209 1210 appendValue(finder.index_parents, finder.index_iterator); 1211 appendValue(pattern.index_parents, pattern.index_iterator); 1212 // appendValue(pattern.state, getState(pattern.prefix_aligner)); 1213 appendValue(pattern.state, TState(pattern.prefix_aligner.global_position, pattern.prefix_aligner.errors)); 1214 1215 goDown(pattern.index_iterator); 1216 } 1217 } 1218 else if (!_cut_exact(finder, pattern)) 1219 break; 1220 1221 } 1222 while (!(isRoot(pattern.index_iterator) && isRoot(finder.index_iterator))); 1223 1224 SEQAN_ASSERT_NOT(pattern.exact); 1225 1226 return false; 1227 } 1228 1229 template <typename TText, typename TTextSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle, typename TNeedleSpec> 1230 inline bool 1231 _cut_exact(Finder<Index<TText, TTextSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 1232 Pattern<Index<TNeedle, TNeedleSpec>, Backtracking<TDistance, TBacktrackingSpec> > & pattern) 1233 { 1234 do 1235 { 1236 if (!pattern.exact) 1237 return false; 1238 1239 if (goRight(pattern.index_iterator)) 1240 { 1241 SEQAN_ASSERT_NOT(empty(finder.index_parents)); 1242 finder.index_iterator = back(finder.index_parents); 1243 break; 1244 } 1245 1246 SEQAN_ASSERT_NOT(empty(finder.index_parents)); 1247 finder.index_iterator = back(finder.index_parents); 1248 eraseBack(finder.index_parents); 1249 SEQAN_ASSERT_NOT(empty(pattern.index_parents)); 1250 pattern.index_iterator = back(pattern.index_parents); 1251 eraseBack(pattern.index_parents); 1252 SEQAN_ASSERT_NOT(empty(pattern.state)); 1253 eraseBack(pattern.state); 1254 1255 --pattern.exact; 1256 } 1257 while (!(isRoot(pattern.index_iterator) && isRoot(finder.index_iterator))); 1258 1259 return !(isRoot(pattern.index_iterator) && isRoot(finder.index_iterator)); 1260 } 1261 1262 // ============================================================================ 1263 1264 template <typename TText, typename TSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle, typename TErrors> 1265 inline bool 1266 find(Finder<Index<TText, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 1267 Pattern<TNeedle, Backtracking<TDistance, TBacktrackingSpec> > & pattern, 1268 TErrors errors) 1269 { 1270 1271 // Try to get another match from current node 1272 if (!atEnd(finder)) 1273 goNext(hostIterator(finder)); 1274 1275 // Try to get more matches by backtracking some more 1276 if (atEnd(finder)) 1277 { 1278 // Resume from last matching node and backtrack until next match 1279 if (_resume(finder, pattern) && _backtrack(finder, pattern, errors)) 1280 { 1281 // Set data iterator range to the interval containing matches 1282 hostIterator(finder) = begin(indexSA(host(finder)), Standard()); 1283 finder.range.i1 = hostIterator(finder) + finder.index_range.i1; 1284 finder.range.i2 = hostIterator(finder) + finder.index_range.i2; 1285 hostIterator(finder) = finder.range.i1; 1286 1287 // Set match length 1288 _setFinderLength(finder, pattern.prefix_aligner.global_position); 1289 } 1290 // No more matches 1291 else 1292 { 1293 hostIterator(finder) = begin(indexSA(host(finder)), Standard()); 1294 finder.range.i1 = hostIterator(finder); 1295 finder.range.i2 = hostIterator(finder); 1296 } 1297 } 1298 1299 return !atEnd(finder); 1300 } 1301 1302 template <typename TText, typename TTextSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle, typename TNeedleSpec, typename TErrors> 1303 inline bool 1304 find(Finder<Index<TText, TTextSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 1305 Pattern<Index<TNeedle, TNeedleSpec>, Backtracking<TDistance, TBacktrackingSpec> > & pattern, 1306 TErrors errors) 1307 { 1308 1309 // Try to get another match from current pattern node 1310 goNext(hostIterator(pattern)); 1311 1312 // Try to get another match from current text node 1313 if (atEnd(pattern)) 1314 { 1315 goNext(hostIterator(finder)); 1316 1317 if (!atEnd(finder)) 1318 { 1319 // Set data iterator range to the interval containing matches 1320 hostIterator(pattern) = begin(indexSA(host(pattern)), Standard()); 1321 pattern.range.i1 = hostIterator(pattern) + pattern.index_range.i1; 1322 pattern.range.i2 = hostIterator(pattern) + pattern.index_range.i2; 1323 hostIterator(pattern) = pattern.range.i1; 1324 } 1325 // Try to get more matches by backtracking some more 1326 else 1327 { 1328 // Resume from last matching node and backtrack until next match 1329 // if (_resume(finder, pattern) && _backtrack(finder, pattern, errors)) 1330 if (_resume(finder, pattern, errors)) 1331 { 1332 // Set data iterator range to the interval containing matches 1333 hostIterator(finder) = begin(indexSA(host(finder)), Standard()); 1334 finder.range.i1 = hostIterator(finder) + finder.index_range.i1; 1335 finder.range.i2 = hostIterator(finder) + finder.index_range.i2; 1336 hostIterator(finder) = finder.range.i1; 1337 1338 hostIterator(pattern) = begin(indexSA(host(pattern)), Standard()); 1339 pattern.range.i1 = hostIterator(pattern) + pattern.index_range.i1; 1340 pattern.range.i2 = hostIterator(pattern) + pattern.index_range.i2; 1341 hostIterator(pattern) = pattern.range.i1; 1342 1343 // Set match length 1344 _setFinderLength(finder, pattern.prefix_aligner.global_position); 1345 pattern.data_length = pattern.prefix_aligner.global_position; 1346 } 1347 // No more matches 1348 else 1349 { 1350 hostIterator(finder) = begin(indexSA(host(finder)), Standard()); 1351 finder.range.i1 = hostIterator(finder); 1352 finder.range.i2 = hostIterator(finder); 1353 1354 hostIterator(pattern) = begin(indexSA(host(pattern)), Standard()); 1355 pattern.range.i1 = hostIterator(pattern); 1356 pattern.range.i2 = hostIterator(pattern); 1357 } 1358 } 1359 1360 } 1361 1362 return !(atEnd(finder) && atEnd(pattern)); 1363 } 1364 1365 //template <typename TText, typename TSpec, typename TDistance, typename TBacktrackingSpec, typename TNeedle> 1366 //inline bool 1367 //find(Finder<Index<TText, TSpec>, Backtracking<TDistance, TBacktrackingSpec> > & finder, 1368 // Pattern<TNeedle, Backtracking<TDistance, TBacktrackingSpec> > & pattern) 1369 //{ 1370 // return find(finder, pattern, 0u); 1371 //} 1372 1373 } 1374 1375 #endif // #ifndef SEQAN_FIND_BACKTRACKING_H_ 1376