1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: David Weese <david.weese@fu-berlin.de> 33 // ========================================================================== 34 // This filter uses the pigeonhole principle to search k-error matches 35 // between strings in a stringset and a text. 36 // The pigeonhole principle states: 37 // If a string is split into k+1 pieces then in every k-error match of the 38 // whole string one piece matches without error. 39 // ========================================================================== 40 41 42 #ifndef INCLUDE_SEQAN_FIND_PIGEONHOLE_H_ 43 #define INCLUDE_SEQAN_FIND_PIGEONHOLE_H_ 44 45 namespace seqan 46 { 47 48 // ========================================================================== 49 // Forwards 50 // ========================================================================== 51 52 template < typename TObject, typename TSpec > class Index; 53 template < typename TObject > struct SAValue; 54 55 // ========================================================================== 56 // Tags, Classes, Enums 57 // ========================================================================== 58 59 template <typename TSpec = void> 60 struct Pigeonhole { 61 enum { ONE_PER_DIAGONAL = 1 }; // 1..record last seed diagonal and ignore seeds on the same diag. (heuristic) 62 enum { HAMMING_ONLY = 0 }; // 0..no indels; 1..allow indels 63 }; 64 65 template <> 66 struct Pigeonhole<Hamming_> { 67 enum { ONE_PER_DIAGONAL = 1 }; // 1..record last seed diagonal and ignore seeds on the same diag. (heuristic) 68 enum { HAMMING_ONLY = 1 }; // 0..no indels; 1..allow indels 69 }; 70 71 ////////////////////////////////////////////////////////////////////////////// 72 73 74 //____________________________________________________________________________ 75 76 77 template <typename TSpec, typename THstkPos> 78 struct PigeonholeHit_ 79 { 80 THstkPos hstkPos; // seed diagonal begin in haystack 81 unsigned ndlSeqNo; // needle sequence number 82 }; 83 84 template <typename THaystack, typename TPattern, typename TPigeonholeSpec> 85 struct FindResult<Finder<THaystack, Pigeonhole<TPigeonholeSpec> >, TPattern> 86 { 87 typedef SwiftHitSemiGlobal_<int64_t> Type; 88 }; 89 90 /*! 91 * @class PigeonholeParameters 92 * @headerfile <seqan/index/find_pigeonhole.h> 93 * @brief Parameters for the pigeonhole filter algorithm. 94 * 95 * @signature struct PigeonholeParameters; 96 * 97 * @see PigeonholePattern 98 * 99 * 100 * @fn PigeonholeParameters::PigeonholeParameters 101 * @brief Default constructor. 102 * 103 * @signature PigeonholeParameters::PigeonholeParameters(); 104 * 105 * Initializes all member variables, default values are given in the variable documentation. 106 * 107 * 108 * @var unsigned PigeonholeParameters::overlap; 109 * @brief Overlap length of adjacent q-grams, default is to use non-overlappling q-grams (<tt>= 0</tt>). 110 * 111 * @var unsigned PigeonholeParameters::delta; 112 * @brief Delta length, becomes index step site; defaults to <tt>0</tt>. 113 * 114 * A value of <tt>0</tt> means that the delta will be auto-detected. 115 * 116 * @var bool PigeonholeParameters::printDots; 117 * @brief Whether to print dots for the scanning progress, defaults to <tt>false</tt>. 118 * 119 * @var bool PigeonholeParameters::debug; 120 * @brief Whether to print debug information, defaults to <tt>false</tt>. 121 */ 122 123 124 struct PigeonholeParameters 125 { 126 unsigned overlap; // overlap length of adjacent q-grams, default is to use non-overlapping q-grams (=0) 127 unsigned delta; // delta length (0 = automatic detection), becomes index stepSize 128 bool printDots; // the q-gram will have length q=delta+overlap 129 bool debug; 130 131 PigeonholeParameters() : 132 overlap(0), 133 delta(0), 134 printDots(false), // print a . for every 100kbps mapped genome 135 debug(false) 136 {} 137 }; 138 139 140 //____________________________________________________________________________ 141 142 143 /*! 144 * @class PigeonholeFinder 145 * @extends Finder 146 * @headerfile <seqan/index/find_pigeonhole.h> 147 * 148 * @brief Pigeonhole-based finder. Must be used together with @link PigeonholePattern @endlink. 149 * 150 * @signature template <typename THaystack, typename TSpec> 151 * class Finder<THaystack, Pigeonhole<TSpec> >; 152 * 153 * @tparam TSpec Specifies the type of pigeonhole filter. 154 * @tparam THaystack The type of the sequence that should be searched. 155 * Types: @link ContainerConcept @endlink 156 * 157 * Provides a fast filter algorithm that uses the pigeonhole lemma, i.e. if a pattern matches with k errors in the text, 158 * every partition into k+1 parts contains one part that matches without error. 159 * 160 * The @link Pattern @endlink must be a q-gram index over multiple patterns. The tolerated error rate must be given 161 * when @link Finder#find @endlink or @link PigeonholeFinder#windowFindBegin @endlink is called. In these functions the 162 * length of the index @link Shape @endlink is set automatically thus it must be modifiable at runtime, e.g. @link 163 * OneGappedShape @endlink. 164 * 165 * @see PigeonholePattern 166 * 167 * @var PigeonholeParameters PigeonholePattern::parameters; 168 * @brief The pigeonhole parameters to use for configuration. 169 */ 170 171 /*! 172 * @class PigeonholePattern 173 * @extends Pattern 174 * @headerfile <seqan/index/find_pigeonhole.h> 175 * 176 * @brief Pigeonhole-based pattern. Must be used together with @link PigeonholeFinder @endlink. 177 * 178 * See @link PigeonholeFinder @endlink for more information on the algorithm. 179 * 180 * @signature template <typename THaystack, typename TSpec> 181 * class Pattern<TIndex, Pigeonhole<TSpec> >; 182 * 183 * @tparam TSpec Specifies the type of pigeonhole filter. 184 * @tparam TIndex A q-gram index of needle(s) that should be searched for. 185 * Types: @link IndexQGram @endlink 186 * 187 * @see PigeonholeFinder 188 */ 189 190 /*! 191 * @typedef PigeonholeFinder::THitString 192 * @brief The type to use for the hit string returned in @link PigeonholeFinder#getWindowFindHits @endlink. 193 * 194 * @signature typedef (...) PigeonholeFinder::THitString; 195 */ 196 197 // docu is now in find_pattern_base.h 198 template <typename THaystack, typename TSpec> 199 class Finder<THaystack, Pigeonhole<TSpec> > 200 { 201 public: 202 typedef typename Iterator<THaystack, Rooted>::Type TIterator; 203 typedef typename Position<THaystack>::Type THstkPos; 204 // typedef PigeonholeHit_<TSpec, int64_t> TPigeonholeHit; 205 typedef typename FindResult<Finder>::Type TPigeonholeHit; 206 typedef typename WindowFindResult<Finder>::Type THitString; 207 typedef typename Iterator<THitString, Standard>::Type THitIterator; 208 typedef typename SAValue<THaystack>::Type TSAValue; 209 typedef Repeat<TSAValue, unsigned> TRepeat; 210 // TODO(holtgrew): Make this a holder so we can share the actual string when copying. 211 typedef String<TRepeat> TRepeatString; 212 typedef typename Iterator<TRepeatString, Standard>::Type TRepeatIterator; 213 214 TIterator data_iterator; 215 TIterator haystackEnd; 216 bool _needReinit; // if true, the Pattern needs to be reinitialized 217 THitString hits; 218 THitIterator curHit, endHit; 219 THstkPos startPos, curPos, endPos; 220 THstkPos windowStart; 221 THstkPos dotPos, dotPos2; 222 TRepeatString data_repeats; 223 TRepeatIterator curRepeat, endRepeat; 224 225 Finder(): 226 _needReinit(true) { } 227 228 Finder(THaystack &haystack): 229 data_iterator(begin(haystack, Rooted())), 230 _needReinit(true) { } 231 232 template <typename TRepeatSize, typename TPeriodSize> 233 Finder(THaystack &haystack, TRepeatSize minRepeatLen, TPeriodSize maxPeriod): 234 data_iterator(begin(haystack, Rooted())), 235 _needReinit(true) 236 { 237 findRepeats(data_repeats, haystack, minRepeatLen, maxPeriod); 238 } 239 240 Finder(TIterator &iter): 241 data_iterator(iter), 242 _needReinit(true) { } 243 244 Finder(TIterator const &iter): 245 data_iterator(iter), 246 _needReinit(true) { } 247 248 Finder(Finder const &orig): 249 data_iterator(orig.data_iterator), 250 haystackEnd(orig.haystackEnd), 251 _needReinit(orig._needReinit), 252 hits(orig.hits), 253 startPos(orig.startPos), 254 curPos(orig.curPos), 255 endPos(orig.endPos), 256 windowStart(orig.windowStart), 257 dotPos(orig.dotPos), 258 dotPos2(orig.dotPos2), 259 data_repeats(orig.data_repeats) 260 { 261 curHit = begin(hits, Standard()) + (orig.curHit - begin(orig.hits, Standard())); 262 endHit = end(hits, Standard()); 263 curRepeat = begin(data_repeats, Standard()) + (orig.curRepeat - begin(orig.data_repeats, Standard())); 264 endRepeat = end(data_repeats, Standard()); 265 } 266 267 inline typename Reference<TIterator>::Type 268 operator* () { return value(hostIterator(*this)); } 269 270 inline typename Reference<TIterator const>::Type 271 operator* () const { return value(hostIterator(*this)); } 272 273 operator TIterator () const { return data_iterator; } 274 275 Finder & operator = (Finder const &orig) 276 { 277 data_iterator = orig.data_iterator; 278 haystackEnd = orig.haystackEnd; 279 _needReinit = orig._needReinit; 280 hits = orig.hits; 281 startPos = orig.startPos; 282 windowStart = orig.windowStart; 283 curPos = orig.curPos; 284 endPos = orig.endPos; 285 dotPos = orig.dotPos; 286 dotPos2 = orig.dotPos2; 287 data_repeats = orig.data_repeats; 288 curHit = begin(hits, Standard()) + (orig.curHit - begin(orig.hits, Standard())); 289 endHit = end(hits, Standard()); 290 curRepeat = begin(data_repeats, Standard()) + (orig.curRepeat - begin(orig.data_repeats, Standard())); 291 endRepeat = end(data_repeats, Standard()); 292 return *this; 293 } 294 }; 295 296 //____________________________________________________________________________ 297 298 299 template <typename THaystack, typename TSpec> 300 inline bool 301 atEnd(Finder<THaystack, Pigeonhole<TSpec> > & me) 302 { 303 return hostIterator(hostIterator(me)) == hostIterator(me.haystackEnd); 304 } 305 306 template <typename THaystack, typename TSpec> 307 inline void 308 goEnd(Finder<THaystack, Pigeonhole<TSpec> > & me) 309 { 310 hostIterator(me) = me.haystackEnd; 311 } 312 313 314 //____________________________________________________________________________ 315 316 317 template <typename TIndex, typename TSpec> 318 class Pattern<TIndex, Pigeonhole<TSpec> > 319 { 320 public: 321 typedef typename Size<TIndex>::Type TSize; 322 typedef unsigned TShortSize; 323 typedef typename Fibre<TIndex, Tag<FibreSA_> const >::Type TSA; 324 typedef typename Fibre<TIndex, Tag<Fibre_Shape_> const >::Type TShape; 325 typedef typename Iterator<TSA const, Standard>::Type TIterator; 326 327 TShape shape; 328 int64_t curBeginPos, curEndPos; 329 int64_t finderLength; 330 int64_t seqDisabled; 331 TSize finderPosOffset; // these must be of 332 TSize finderPosNextOffset; // type TSize of TBucket 333 TSize maxSeqLen; 334 unsigned curSeqNo; 335 String<int64_t> lastSeedDiag; 336 PigeonholeParameters params; 337 double _currentErrorRate; 338 339 Holder<TIndex> data_host; 340 341 Pattern() 342 { 343 clear(*this); 344 } 345 Pattern(TIndex &_index): data_host(_index) 346 { 347 clear(*this); 348 } 349 Pattern(TIndex const &_index): data_host(_index) 350 { 351 clear(*this); 352 } 353 }; 354 355 //____________________________________________________________________________ 356 357 358 template <typename TValue, typename TSpec> 359 inline unsigned 360 _pigeonholeMaxShapeWeight(Shape<TValue, TSpec> const &) 361 { 362 typedef typename Value<Shape<TValue, SimpleShape> >::Type THashValue; 363 return (unsigned)(BitsPerValue<THashValue>::VALUE - 1)/ (unsigned)BitsPerValue<TValue>::VALUE; 364 } 365 366 template <typename TShape, typename TSize> 367 inline bool 368 _pigeonholeUpdateShapeLength(TShape, TSize) 369 { 370 return false; 371 } 372 373 template <typename TValue, typename TSize> 374 inline bool 375 _pigeonholeUpdateShapeLength(Shape<TValue, SimpleShape> &shape, TSize newLength) 376 { 377 TSize weight = _pigeonholeMaxShapeWeight(shape); 378 379 if (weight > newLength) 380 weight = newLength; 381 382 if (weight == length(shape)) 383 return false; 384 385 resize(shape, weight); 386 return true; 387 } 388 389 template <typename TValue, typename TSize> 390 inline bool 391 _pigeonholeUpdateShapeLength(Shape<TValue, OneGappedShape> &shape, TSize newLength) 392 { 393 if (length(shape) == newLength) 394 return false; 395 396 TSize weight = _pigeonholeMaxShapeWeight(shape); 397 398 if (weight > newLength) 399 weight = newLength; 400 401 CharString str; 402 resize(str, weight / 2, '1'); 403 resize(str, newLength - (weight + 1) / 2, '0'); 404 resize(str, newLength, '1'); 405 stringToShape(shape, str); 406 return true; 407 } 408 409 template <typename TValue, typename TSize> 410 inline bool 411 _pigeonholeUpdateShapeLength(Shape<TValue, GenericShape> &shape, TSize newLength) 412 { 413 if (length(shape) == newLength) 414 return false; 415 416 TSize weight = _pigeonholeMaxShapeWeight(shape); 417 418 if (weight >= newLength) 419 weight = newLength; 420 421 CharString str; 422 resize(str, weight / 2, '1'); 423 resize(str, newLength - (weight + 1) / 2, '0'); 424 resize(str, newLength, '1'); 425 stringToShape(shape, str); 426 427 return true; 428 } 429 430 /*! 431 * @fn PigeonholePattern#maskPatternSequence 432 * @headerfile <seqan/index/find_pigeonhole.h> 433 * @brief Mask and unmask pattern sequence. 434 * 435 * @note Disabling sequences in the pigeonhole filter requires the <tt>ONE_PER_DIAGONAL</tt> heuristic. 436 * 437 * @signature void maskPatternSequence(pattern, seqNo, enable); 438 * 439 * @param[in,out] pattern The PigeonholePattern to update. 440 * @param[in] seqNo Index of the sequence to disable/enable. 441 * @param[in] enable A <tt>bool</tt> that indicates whether hits are to be generated for the given sequence. 442 */ 443 444 template <typename TIndex, typename TSpec, typename TSeqNo> 445 inline void 446 maskPatternSequence(Pattern<TIndex, Pigeonhole<TSpec> > & pattern, TSeqNo seqNo, bool enable) 447 { 448 SEQAN_ASSERT_NEQ_MSG((int)Pigeonhole<TSpec>::ONE_PER_DIAGONAL, 0, 449 "Disabling sequences in the pigeonhole filter requires the ONE_PER_DIAGONAL heuristic."); 450 451 if (enable) 452 pattern.lastSeedDiag[seqNo] = -(int64_t)pattern.maxSeqLen; 453 else 454 pattern.lastSeedDiag[seqNo] = pattern.seqDisabled; 455 } 456 457 template <typename TIndex, typename TFloat, typename TSpec> 458 inline void _patternInit(Pattern<TIndex, Pigeonhole<TSpec> > &pattern, TFloat errorRate) 459 { 460 typedef typename Size<TIndex>::Type TSize; 461 462 double _newErrorRate = errorRate; 463 TSize seqCount = countSequences(host(pattern)); 464 465 if (pattern._currentErrorRate != _newErrorRate) 466 { 467 // settings have been changed -> initialize bucket parameters 468 469 pattern._currentErrorRate = _newErrorRate; 470 471 TSize minDelta = std::numeric_limits<TSize>::max(); 472 TSize maxDelta = 3; 473 TSize maxSeqLen = 0; 474 for(unsigned seqNo = 0; seqNo < seqCount; ++seqNo) 475 { 476 // get pattern length and max. allowed errors 477 TSize length = sequenceLength(seqNo, host(pattern)); 478 if (maxSeqLen < length) maxSeqLen = length; 479 480 // sequence must have sufficient length 481 if (length <= pattern.params.overlap) continue; 482 483 // cut overlap many characters from the end 484 TSize errors = (TSize) floor(errorRate * length); 485 length -= pattern.params.overlap; 486 TSize delta = length / (errors + 1); 487 488 489 // ignore too short q-grams 490 if (delta < 3) continue; 491 if (minDelta > delta) minDelta = delta; 492 if (maxDelta < delta) maxDelta = delta; 493 } 494 pattern.maxSeqLen = maxSeqLen; 495 if (minDelta < 3) minDelta = maxDelta; 496 497 TIndex &index = host(pattern); 498 pattern.finderPosOffset = 0; 499 pattern.finderPosNextOffset = pattern.maxSeqLen + pattern.finderLength; 500 501 if (pattern.params.delta != 0) 502 { 503 // use user-defined delta 504 minDelta = pattern.params.delta; 505 } 506 507 if (minDelta == std::numeric_limits<TSize>::max()) 508 { 509 // disable index 510 minDelta = pattern.maxSeqLen + 1; 511 } 512 513 if (_pigeonholeUpdateShapeLength(pattern.shape, minDelta + pattern.params.overlap) || getStepSize(index) != minDelta) 514 { 515 clear(index); 516 setStepSize(index, minDelta); 517 } 518 indexShape(host(pattern)) = pattern.shape; 519 // double start = sysTime(); 520 indexRequire(host(pattern), QGramSADir()); 521 // double stop = sysTime(); 522 // std::cout << "created in " <<(stop-start) << " seconds" << std::endl; 523 524 clear(pattern.lastSeedDiag); 525 if (Pigeonhole<TSpec>::ONE_PER_DIAGONAL) 526 resize(pattern.lastSeedDiag, seqCount, -(int64_t)maxSeqLen); 527 pattern.seqDisabled = -(int64_t)maxSeqLen - 1; 528 } 529 else 530 { 531 // settings are unchanged -> reset buckets 532 533 // finderPosOffset is used to circumvent expensive resetting of all buckets 534 pattern.finderPosOffset = pattern.finderPosNextOffset; 535 pattern.finderPosNextOffset += pattern.maxSeqLen + pattern.finderLength; 536 } 537 } 538 539 template < 540 typename TFinder, 541 typename TIndex, 542 typename TSpec, 543 typename THValue 544 > 545 inline bool _pigeonholeProcessQGram( 546 TFinder &finder, 547 Pattern<TIndex, Pigeonhole<TSpec> > &pattern, 548 THValue hash) 549 { 550 //typedef Pattern<TIndex, Pigeonhole<TSpec> > TPattern; 551 //typedef typename TFinder::THstkPos THstkPos; 552 553 typedef typename Fibre<TIndex, QGramSA>::Type const TSA; 554 typedef typename Iterator<TSA, Standard>::Type TSAIter; 555 typedef typename TFinder::TPigeonholeHit THit; 556 557 TIndex const &index = host(pattern); 558 559 // all previous matches reported -> search new ones 560 clear(finder.hits); 561 562 TSAIter saBegin = begin(indexSA(index), Standard()); 563 Pair<unsigned> ndlPos; 564 THit hit; 565 566 unsigned bktNo = getBucket(index.bucketMap, hash); 567 TSAIter occ = saBegin + indexDir(index)[bktNo]; 568 TSAIter occEnd = saBegin + indexDir(index)[bktNo + 1]; 569 570 for(; occ != occEnd; ++occ) 571 { 572 posLocalize(ndlPos, *occ, stringSetLimits(index)); 573 hit.hstkPos = finder.curPos; 574 hit.hstkPos -= getSeqOffset(ndlPos); // bucket begin in haystack 575 hit.ndlSeqNo = getSeqNo(ndlPos); // needle seq. number 576 if (Pigeonhole<TSpec>::ONE_PER_DIAGONAL) 577 { 578 int64_t diag = hit.hstkPos + (int64_t)pattern.finderPosOffset; 579 if (pattern.lastSeedDiag[hit.ndlSeqNo] == diag) 580 continue; 581 pattern.lastSeedDiag[hit.ndlSeqNo] = diag; 582 } 583 unsigned ndlLength = sequenceLength(hit.ndlSeqNo, host(pattern)); 584 585 if (Pigeonhole<TSpec>::HAMMING_ONLY != 0) 586 { 587 hit.bucketWidth = ndlLength; 588 } 589 else 590 { 591 unsigned indels = (unsigned)floor(pattern._currentErrorRate * ndlLength); 592 hit.bucketWidth = ndlLength + (indels << 1); 593 hit.hstkPos -= indels; 594 } 595 appendValue(finder.hits, hit); 596 } 597 598 finder.curHit = begin(finder.hits, Standard()); 599 finder.endHit = end(finder.hits, Standard()); 600 601 return !empty(finder.hits); 602 } 603 604 605 template <typename TIndex, typename TSpec> 606 inline bool 607 empty(Pattern<TIndex, Pigeonhole<TSpec> > & me) 608 { 609 return me._currentErrorRate == -1; 610 } 611 612 template <typename TIndex, typename TSpec> 613 inline void 614 clear(Pattern<TIndex, Pigeonhole<TSpec> > & me) 615 { 616 me.finderPosOffset = 0; 617 me.finderPosNextOffset = 0; 618 me.finderLength = 0; 619 me._currentErrorRate = -1; 620 clear(me.lastSeedDiag); 621 } 622 623 //____________________________________________________________________________ 624 625 template <typename THaystack, typename TSpec> 626 inline typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type 627 position(Finder<THaystack, Pigeonhole<TSpec> > const & finder) 628 { 629 typename Finder<THaystack, Pigeonhole<TSpec> >::TPigeonholeHit &hit = *finder.curHit; 630 return hit.hstkPos + hit.bucketWidth; 631 } 632 633 template <typename THaystack, typename TSpec> 634 inline typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type 635 position(Finder<THaystack, Pigeonhole<TSpec> > & finder) 636 { 637 return position(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder)); 638 } 639 640 //____________________________________________________________________________ 641 642 template <typename TIndex, typename TSpec> 643 inline typename SAValue<TIndex>::Type 644 position(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern) 645 { 646 typedef typename Size<TIndex>::Type TSize; 647 typename SAValue<TIndex >::Type pos; 648 posLocalToX(pos, Pair<unsigned, TSize>(pattern.curSeqNo, length(needle(pattern))), stringSetLimits(host(pattern))); 649 return pos; 650 } 651 652 template <typename TIndex, typename TSpec> 653 inline typename SAValue<TIndex>::Type 654 position(Pattern<TIndex, Pigeonhole<TSpec> > & pattern) 655 { 656 return position(const_cast<Pattern<TIndex, Pigeonhole<TSpec> > const &>(pattern)); 657 } 658 659 //____________________________________________________________________________ 660 661 template <typename THaystack, typename TSpec> 662 inline int64_t 663 beginPosition(Finder<THaystack, Pigeonhole<TSpec> > const & finder) 664 { 665 return (*finder.curHit).hstkPos; 666 } 667 668 template <typename THaystack, typename TSpec> 669 inline int64_t 670 beginPosition(Finder<THaystack, Pigeonhole<TSpec> > & finder) 671 { 672 return beginPosition(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder)); 673 } 674 675 //____________________________________________________________________________ 676 677 template <typename TIndex, typename TSpec> 678 inline typename SAValue<TIndex >::Type 679 beginPosition(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern) 680 { 681 typename SAValue<TIndex >::Type pos; 682 posLocalToX(pos, Pair<unsigned>(pattern.curSeqNo, 0), stringSetLimits(host(pattern))); 683 return pos; 684 } 685 686 template <typename TIndex, typename TSpec> 687 inline typename SAValue<TIndex>::Type 688 beginPosition(Pattern<TIndex, Pigeonhole<TSpec> > & pattern) 689 { 690 return beginPosition(const_cast<Pattern<TIndex, Pigeonhole<TSpec> > const &>(pattern)); 691 } 692 693 //____________________________________________________________________________ 694 695 template <typename THaystack, typename TSpec> 696 inline typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type 697 endPosition(Finder<THaystack, Pigeonhole<TSpec> > const & finder) 698 { 699 typename Finder<THaystack, Pigeonhole<TSpec> >::TPigeonholeHit &hit = *finder.curHit; 700 return hit.hstkPos + hit.bucketWidth; 701 } 702 703 template <typename THaystack, typename TSpec> 704 inline typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type 705 endPosition(Finder<THaystack, Pigeonhole<TSpec> > & finder) 706 { 707 return endPosition(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder)); 708 } 709 710 //____________________________________________________________________________ 711 712 template <typename TIndex, typename TSpec> 713 inline typename SAValue<TIndex >::Type 714 endPosition(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern) 715 { 716 typedef typename Size<TIndex>::Type TSize; 717 typename SAValue<TIndex >::Type pos; 718 posLocalToX(pos, Pair<unsigned, TSize>(pattern.curSeqNo, length(needle(pattern))), stringSetLimits(host(pattern))); 719 return pos; 720 } 721 722 template <typename TIndex, typename TSpec> 723 inline typename SAValue<TIndex>::Type 724 endPosition(Pattern<TIndex, Pigeonhole<TSpec> > & pattern) 725 { 726 return endPosition(const_cast<Pattern<TIndex, Pigeonhole<TSpec> > const &>(pattern)); 727 } 728 729 template <typename THaystack, typename TSpec> 730 inline Pair<typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type> 731 positionRangeNoClip(Finder<THaystack, Pigeonhole<TSpec> > const & finder) 732 { 733 typedef typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type TPosition; 734 typedef Pair<TPosition> TPair; 735 typename Finder<THaystack, Pigeonhole<TSpec> >::TPigeonholeHit &hit = *finder.curHit; 736 return TPair((TPosition)hit.hstkPos, (TPosition)(hit.hstkPos + hit.bucketWidth)); 737 } 738 739 template <typename THaystack, typename TSpec> 740 inline Pair<typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type> 741 positionRangeNoClip(Finder<THaystack, Pigeonhole<TSpec> > & finder) 742 { 743 return positionRangeNoClip(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder)); 744 } 745 746 template <typename THaystack, typename TSpec> 747 inline Pair<typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type> 748 positionRange(Finder<THaystack, Pigeonhole<TSpec> > const & finder) 749 { 750 typedef typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type TPosition; 751 typedef Pair<TPosition> TPair; 752 typename Finder<THaystack, Pigeonhole<TSpec> >::TPigeonholeHit &hit = *finder.curHit; 753 754 int64_t hitBegin = hit.hstkPos; 755 int64_t hitEnd = hit.hstkPos + hit.bucketWidth; 756 int64_t textEnd = length(haystack(finder)); 757 758 if (hitBegin < 0) hitBegin = 0; 759 if (hitEnd > textEnd) hitEnd = textEnd; 760 return TPair((TPosition)hitBegin, (TPosition)hitEnd); 761 } 762 763 template <typename THaystack, typename TSpec> 764 inline Pair<typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type> 765 positionRange(Finder<THaystack, Pigeonhole<TSpec> > & finder) 766 { 767 return positionRange(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder)); 768 } 769 770 //____________________________________________________________________________ 771 772 template <typename TIndex, typename TSpec> 773 inline Pair<typename SAValue<TIndex>::Type> 774 positionRange(Pattern<TIndex, Pigeonhole<TSpec> > & pattern) 775 { 776 return Pair<typename SAValue<TIndex>::Type> (beginPosition(pattern), endPosition(pattern)); 777 } 778 779 //____________________________________________________________________________ 780 781 template <typename TPigeonholeHit, typename TText> 782 inline typename Infix<TText>::Type 783 pigeonholeInfixNoClip(TPigeonholeHit const &hit, TText &text) 784 { 785 return infix(text, hit.hstkPos, hit.hstkPos + hit.bucketWidth); 786 } 787 788 template <typename TPigeonholeHit, typename TText> 789 inline typename Infix<TText>::Type 790 pigeonholeInfix(TPigeonholeHit const &hit, TText &text) 791 { 792 int64_t hitBegin = hit.hstkPos; 793 int64_t hitEnd = hit.hstkPos + hit.bucketWidth; 794 int64_t textEnd = length(text); 795 796 if (hitBegin < 0) hitBegin = 0; 797 if (hitEnd > textEnd) hitEnd = textEnd; 798 SEQAN_ASSERT_LEQ(hitBegin, hitEnd); 799 return infix(text, hitBegin, hitEnd); 800 } 801 802 //____________________________________________________________________________ 803 804 template <typename THaystack, typename TSpec> 805 inline typename Infix<THaystack>::Type 806 infix(Finder<THaystack, Pigeonhole<TSpec> > &finder) 807 { 808 typename Parameter_<THaystack>::Type tmpHaystack = haystack(finder); 809 return swiftInfix(*finder.curHit, tmpHaystack); 810 } 811 812 template <typename THaystack, typename TSpec, typename TText> 813 inline typename Infix<TText>::Type 814 infix(Finder<THaystack, Pigeonhole<TSpec> > &finder, TText &text) 815 { 816 return swiftInfix(*finder.curHit, text); 817 } 818 819 //____________________________________________________________________________ 820 821 template <typename THaystack, typename TSpec> 822 inline typename Infix<THaystack>::Type 823 infixNoClip(Finder<THaystack, Pigeonhole<TSpec> > &finder) 824 { 825 return swiftInfixNoClip(*finder.curHit, haystack(finder)); 826 } 827 828 template <typename THaystack, typename TSpec, typename TText> 829 inline typename Infix<TText>::Type 830 infixNoClip(Finder<THaystack, Pigeonhole<TSpec> > &finder, TText &text) 831 { 832 return swiftInfixNoClip(*finder.curHit, text); 833 } 834 835 //____________________________________________________________________________ 836 837 template <typename TIndex, typename TSpec, typename TText> 838 inline typename Infix<TText>::Type 839 infix(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern, TText &text) 840 { 841 int64_t hitBegin = pattern.curBeginPos; 842 int64_t hitEnd = pattern.curEndPos; 843 int64_t textLength = sequenceLength(pattern.curSeqNo, needle(pattern)); 844 845 if (hitEnd > textLength) hitEnd = textLength; 846 if (hitBegin < 0) hitBegin = 0; 847 848 return infix(text, hitBegin, hitEnd); 849 } 850 851 template <typename TIndex, typename TSpec> 852 inline typename Infix< typename GetSequenceByNo< TIndex const >::Type >::Type 853 infix(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern) 854 { 855 return infix(getSequenceByNo(pattern.curSeqNo, needle(pattern)), 0, sequenceLength(pattern.curSeqNo, needle(pattern))); 856 } 857 858 template <typename TIndex, typename TSpec> 859 inline typename Infix< typename GetSequenceByNo< TIndex const >::Type >::Type 860 infix(Pattern<TIndex, Pigeonhole<TSpec> > & pattern) 861 { 862 return infix(const_cast<Pattern<TIndex, Pigeonhole<TSpec> > const &>(pattern)); 863 } 864 865 //____________________________________________________________________________ 866 867 template <typename THaystack, typename TSpec> 868 inline void 869 _printDots(Finder<THaystack, Pigeonhole<TSpec> > &finder) 870 { 871 while (finder.curPos >= finder.dotPos) 872 { 873 finder.dotPos += 100000; 874 if (finder.dotPos >= finder.dotPos2) 875 { 876 std::cerr << (finder.dotPos2 / 1000000) << "M" << std::flush; 877 finder.dotPos2 += 1000000; 878 } else 879 std::cerr << "." << std::flush; 880 } 881 } 882 883 template <typename TFinder, typename TIndex, typename TSpec> 884 inline bool 885 _nextNonRepeatRange( 886 TFinder &finder, 887 Pattern<TIndex, Pigeonhole<TSpec> > &pattern) 888 { 889 //typedef typename TFinder::TRepeat TRepeat; 890 //typedef typename Value<TRepeat>::Type TPos; 891 892 if (finder.curRepeat == finder.endRepeat) return false; 893 894 do 895 { 896 finder.startPos = (*finder.curRepeat).endPosition; 897 if (++finder.curRepeat == finder.endRepeat) 898 { 899 finder.endPos = length(host(finder)); 900 if (finder.startPos + length(pattern.shape) > finder.endPos) 901 return false; 902 else 903 break; 904 } else 905 finder.endPos = (*finder.curRepeat).beginPosition; 906 // repeat until the shape fits in non-repeat range 907 } while (finder.startPos + length(pattern.shape) > finder.endPos); 908 909 finder.curPos = finder.startPos; 910 hostIterator(finder) = begin(host(finder)) + finder.startPos; 911 finder.haystackEnd = begin(host(finder)) + (finder.endPos - length(pattern.shape) + 1); 912 913 // if (pattern.params.printDots) 914 // std::cerr << std::endl << " scan range (" << finder.startPos << ", " << finder.endPos << ") " << std::flush; 915 916 return true; 917 } 918 919 template <typename TFinder, typename TIndex, typename TSpec> 920 inline bool 921 _firstNonRepeatRange( 922 TFinder &finder, 923 Pattern<TIndex, Pigeonhole<TSpec> > &pattern) 924 { 925 //typedef typename TFinder::TRepeat TRepeat; 926 //typedef typename Value<TRepeat>::Type TPos; 927 928 finder.curRepeat = begin(finder.data_repeats, Standard()); 929 finder.endRepeat = end(finder.data_repeats, Standard()); 930 931 if (finder.curRepeat == finder.endRepeat) 932 finder.endPos = length(host(finder)); 933 else 934 finder.endPos = (*finder.curRepeat).beginPosition; 935 936 if (length(pattern.shape) > finder.endPos) 937 return _nextNonRepeatRange(finder, pattern); 938 939 finder.curPos = finder.startPos = 0; 940 hostIterator(finder) = begin(host(finder)); 941 finder.haystackEnd = begin(host(finder)) + (finder.endPos - length(pattern.shape) + 1); 942 943 // if (pattern.params.printDots) 944 // std::cerr << std::endl << " scan range (" << finder.startPos << ", " << finder.endPos << ") " << std::flush; 945 946 return true; 947 } 948 949 template <typename TFinder, typename TIndex, typename TSpec> 950 inline void 951 _copyPigeonholeHit( 952 TFinder &finder, 953 Pattern<TIndex, Pigeonhole<TSpec> > &pattern) 954 { 955 pattern.curSeqNo = (*finder.curHit).ndlSeqNo; 956 pattern.curBeginPos = 0; 957 pattern.curEndPos = length(indexText(needle(pattern))[pattern.curSeqNo]); 958 } 959 960 template <typename THaystack, typename TIndex, typename TSpec> 961 inline bool 962 find( 963 Finder<THaystack, Pigeonhole<TSpec> > &finder, 964 Pattern<TIndex, Pigeonhole<TSpec> > &pattern, 965 double errorRate) 966 { 967 if (empty(finder)) 968 { 969 pattern.finderLength = length(container(finder)); 970 _patternInit(pattern, errorRate); 971 _finderSetNonEmpty(finder); 972 finder.dotPos = 100000; 973 finder.dotPos2 = 10 * finder.dotPos; 974 975 if (!_firstNonRepeatRange(finder, pattern)) return false; 976 if (_pigeonholeProcessQGram(finder, pattern, hash(pattern.shape, hostIterator(hostIterator(finder))))) 977 { 978 _copyPigeonholeHit(finder, pattern); 979 return true; 980 } 981 } 982 else 983 { 984 if (++finder.curHit < finder.endHit) 985 { 986 _copyPigeonholeHit(finder, pattern); 987 return true; 988 } 989 } 990 991 // all previous matches reported -> search new ones 992 clear(finder.hits); 993 994 // are we at the end of the text? 995 if (atEnd(finder) && finder.curRepeat == finder.endRepeat) 996 { 997 finder.curHit = finder.endHit; 998 return false; 999 } 1000 1001 do 1002 { 1003 if (pattern.params.printDots) _printDots(finder); 1004 if (atEnd(++finder)) 1005 { 1006 if (!_nextNonRepeatRange(finder, pattern)) 1007 return false; 1008 hash(pattern.shape, hostIterator(hostIterator(finder))); 1009 } 1010 else 1011 { 1012 ++finder.curPos; 1013 hashNext(pattern.shape, hostIterator(hostIterator(finder))); 1014 } 1015 1016 if (_pigeonholeProcessQGram(finder, pattern, value(pattern.shape))) 1017 { 1018 _copyPigeonholeHit(finder, pattern); 1019 return true; 1020 } 1021 1022 } while (true); 1023 } 1024 1025 /*! 1026 * @fn PigeonholeFinder#windowFindBegin 1027 * @headerfile <seqan/index/find_pigeonhole.h> 1028 * 1029 * @brief Initializes the pattern. Sets the finder on the begin position. Gets the first non-repeat range and sets it 1030 * in the finder. Used together with @link PigeonholeFinder#windowFindEnd @endlink. 1031 * 1032 * @signature windowFindBegin(finder, pattern, errorRate) 1033 * 1034 * @param[in,out] finder The PigeonholeFinder to use. 1035 * @param[in,out] pattern The PigeonholePattern to use. 1036 * @param[in] errorRate Error rate that is allowed between reads and reference. Should be the same in as in @link 1037 * PigeonholeFinder#windowFindNext @endlink. Types: <tt>double</tt> 1038 * 1039 * @see PigeonholeFinder#windowFindNext 1040 * @see PigeonholeFinder#windowFindEnd 1041 */ 1042 1043 template <typename THaystack, typename TIndex, typename TSpec> 1044 inline bool 1045 windowFindBegin( 1046 Finder<THaystack, Pigeonhole<TSpec> > &finder, 1047 Pattern<TIndex, Pigeonhole<TSpec> > &pattern, 1048 double errorRate) 1049 { 1050 1051 pattern.finderLength = pattern.maxSeqLen + length(container(finder)); 1052 _patternInit(pattern, errorRate); 1053 _finderSetNonEmpty(finder); 1054 finder.dotPos = 100000; 1055 finder.dotPos2 = 10 * finder.dotPos; 1056 finder.windowStart = 0; 1057 1058 if (!_firstNonRepeatRange(finder, pattern)) return false; 1059 1060 return true; 1061 } 1062 1063 1064 /*! 1065 * @fn PigeonholeFinder#windowFindNext 1066 * @headerfile <seqan/index/find_pigeonhol.h> 1067 * @brief Searches over the next window with the finder. The found hits can be retrieved with @link 1068 * PigeonholeFinder#getWindowFindHits @endlink Used together with @link PigeonholeFinder#windowFindBegin @endlink 1069 * and @link PigeonholeFinder#windowFindEnd @endlink. 1070 * 1071 * @signature bool windowFindNext(finder, pattern, finderWindowLength) 1072 * 1073 * @param[in,out] finder The PigeonholeFinder to use. 1074 * @param[in,out] pattern The PigeonholePattern to use. 1075 * @param[in] finderWindowLength Number of bases that are scanned beginning from the position the finder is at. 1076 * Including bases that are marked as repeats and that are skipped. Types: <tt>unsigned</tt>. 1077 * 1078 * @return bool <tt>true</tt>, if there are bases that can be scanned, <tt>false</tt> otherwise. 1079 * 1080 * @see PigeonholeFinder#windowFindBegin 1081 * @see PigeonholeFinder#windowFindEnd 1082 * @see PigeonholeFinder#getWindowFindHits 1083 */ 1084 1085 1086 1087 template <typename THaystack, typename TIndex, typename TSpec, typename TSize> 1088 inline bool 1089 windowFindNext( 1090 Finder<THaystack, Pigeonhole<TSpec> > &finder, 1091 Pattern<TIndex, Pigeonhole<TSpec> > &pattern, 1092 TSize finderWindowLength) 1093 { 1094 1095 typedef typename Fibre<TIndex, QGramShape>::Type TShape; 1096 typedef Finder<THaystack, Pigeonhole<TSpec> > TFinder; 1097 typedef typename TFinder::THstkPos THstkPos; 1098 1099 typedef typename Fibre<TIndex, QGramSA>::Type TSA; 1100 typedef typename Iterator<TSA const, Standard>::Type TSAIter; 1101 typedef typename TFinder::TPigeonholeHit THit; 1102 1103 TIndex const &index = host(pattern); 1104 1105 // all previous matches reported -> search new ones 1106 clear(finder.hits); 1107 1108 THstkPos windowEnd = finder.windowStart + finderWindowLength; 1109 TSAIter saBegin = begin(indexSA(index), Standard()); 1110 Pair<unsigned> ndlPos; 1111 THit hit; 1112 double errorRate = pattern._currentErrorRate; 1113 int64_t seqDisabled = pattern.seqDisabled; 1114 1115 // iterate over all non-repeat regions within the window 1116 for (; finder.curPos < windowEnd; ) 1117 { 1118 THstkPos nonRepeatEnd = finder.endPos - length(pattern.shape) + 1; 1119 THstkPos localEnd = _min(windowEnd, nonRepeatEnd); 1120 1121 // filter a non-repeat region within the window 1122 if (finder.curPos < localEnd) 1123 { 1124 TShape &shape = pattern.shape; 1125 1126 hashInit(shape, hostIterator(hostIterator(finder))); 1127 for (; finder.curPos < localEnd; ++finder.curPos, ++finder) 1128 { 1129 hashNext(shape, hostIterator(hostIterator(finder))); 1130 1131 unsigned bktNo = getBucket(index.bucketMap, value(shape)); 1132 TSAIter occ = saBegin + indexDir(index)[bktNo]; 1133 TSAIter occEnd = saBegin + indexDir(index)[bktNo + 1]; 1134 1135 for(; occ != occEnd; ++occ) 1136 { 1137 posLocalize(ndlPos, *occ, stringSetLimits(index)); 1138 hit.hstkPos = finder.curPos; 1139 hit.hstkPos -= getSeqOffset(ndlPos); // bucket begin in haystack 1140 hit.ndlSeqNo = getSeqNo(ndlPos); // needle seq. number 1141 1142 if (Pigeonhole<TSpec>::ONE_PER_DIAGONAL) 1143 { 1144 int64_t lastDiag = pattern.lastSeedDiag[hit.ndlSeqNo]; 1145 if (lastDiag == seqDisabled) continue; 1146 1147 int64_t diag = hit.hstkPos + (int64_t)pattern.finderPosOffset; 1148 if (lastDiag == diag) 1149 { 1150 // std::cout<<"double hit"<<std::endl; 1151 continue; 1152 } 1153 pattern.lastSeedDiag[hit.ndlSeqNo] = diag; 1154 } 1155 // std::cout<<"hit at:\thpos="<<finder.curPos<<"\tnpos="<<getSeqOffset(ndlPos)<<"\treadNo="<<hit.ndlSeqNo<<"\thash="<<value(shape)<<std::endl; 1156 unsigned ndlLength = sequenceLength(hit.ndlSeqNo, host(pattern)); 1157 if (Pigeonhole<TSpec>::HAMMING_ONLY != 0) 1158 { 1159 hit.bucketWidth = ndlLength; 1160 } 1161 else 1162 { 1163 unsigned indels = (unsigned)floor(errorRate * ndlLength); 1164 hit.bucketWidth = ndlLength + (indels << 1); 1165 hit.hstkPos -= indels; 1166 } 1167 appendValue(finder.hits, hit); 1168 } 1169 } 1170 } 1171 1172 if (pattern.params.printDots) _printDots(finder); 1173 1174 if (finder.curPos >= nonRepeatEnd) 1175 if (!_nextNonRepeatRange(finder, pattern)) 1176 { 1177 finder.windowStart = windowEnd; 1178 return false; 1179 } 1180 } 1181 finder.windowStart = windowEnd; 1182 return true; 1183 } 1184 1185 1186 /*! 1187 * @fn PigeonholeFinder#windowFindEnd 1188 * @headerfile <seqan/index/find_pigeonhole.h> 1189 * @brief Flushes the pattern. Used together with @link PigeonholeFinder#windowFindBegin @endlink and @link 1190 * PigeonholeFinder#windowFindNext @endlink. 1191 * 1192 * @signature void windowFindEnd(finder, pattern); 1193 * 1194 * @param[in,out] pattern A pattern with window interface. 1195 * @param[in,out] finder A finder with window interface. Types: @link PigeonholeFinder @endlink 1196 * 1197 * @see PigeonholeFinder#windowFindBegin 1198 * @see PigeonholeFinder#windowFindNext 1199 */ 1200 1201 1202 template <typename THaystack, typename TIndex, typename TSpec> 1203 inline void 1204 windowFindEnd( 1205 Finder<THaystack, Pigeonhole<TSpec> > &, 1206 Pattern<TIndex, Pigeonhole<TSpec> > &) 1207 { 1208 } 1209 1210 /*! 1211 * @fn PigeonholeFinder#getWindowFindHits 1212 * @headerfile <seqan/index/find_pigeonhole.h> 1213 * @brief Returns the string of hits from the finder. 1214 * 1215 * @signature THitString getWindowFindHits(finder); 1216 * 1217 * @param[in] finder A PigeonFinder. 1218 * @return THitString @link String @endlink of Hits, see @link PigeonholeFinder::THitString @endlink. 1219 * 1220 * @see PigeonholeFinder#windowFindNext 1221 */ 1222 1223 1224 1225 template <typename THaystack, typename TSpec> 1226 inline typename Finder<THaystack, Pigeonhole<TSpec> >::THitString & 1227 getWindowFindHits(Finder<THaystack, Pigeonhole<TSpec> > &finder) 1228 { 1229 1230 return finder.hits; 1231 } 1232 1233 /*! 1234 * @fn PigeonholePattern#getMaxDeviationOfOrder 1235 * @headerfile <seqan/index/find_pigeonhole.h> 1236 * @brief Returns the maximal out-of-order distance of adjacent hits. 1237 * 1238 * @signature TSize getMaxDeviationOfOrder(pattern); 1239 * 1240 * @param[in] pattern A PigeonholeFinder. 1241 * @return TSize Returns the maximal distance two adjacent hits can have which are not in increasing order. 1242 * <tt>TSize</tt> is the size type of the underlying index. 1243 */ 1244 1245 template <typename TIndex, typename TSpec> 1246 inline typename Size<TIndex>::Type 1247 getMaxDeviationOfOrder(Pattern<TIndex, Pigeonhole<TSpec> > &pattern) 1248 { 1249 1250 return (pattern.maxSeqLen <= length(indexShape(host(pattern))))? 0: pattern.maxSeqLen - length(indexShape(host(pattern))); 1251 } 1252 1253 }// namespace seqan 1254 1255 #endif //#ifndef INCLUDE_SEQAN_FIND_PIGEONHOLE_H_ 1256 1257