1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 // This filter uses the pigeonhole principle to search k-error matches
35 // between strings in a stringset and a text.
36 // The pigeonhole principle states:
37 // If a string is split into k+1 pieces then in every k-error match of the
38 // whole string one piece matches without error.
39 // ==========================================================================
40 
41 
42 #ifndef INCLUDE_SEQAN_FIND_PIGEONHOLE_H_
43 #define INCLUDE_SEQAN_FIND_PIGEONHOLE_H_
44 
45 namespace seqan
46 {
47 
48 // ==========================================================================
49 // Forwards
50 // ==========================================================================
51 
52     template < typename TObject, typename TSpec > class Index;
53     template < typename TObject > struct SAValue;
54 
55 // ==========================================================================
56 // Tags, Classes, Enums
57 // ==========================================================================
58 
59     template <typename TSpec = void>
60     struct Pigeonhole {
61         enum { ONE_PER_DIAGONAL = 1 };    // 1..record last seed diagonal and ignore seeds on the same diag. (heuristic)
62         enum { HAMMING_ONLY = 0 };        // 0..no indels; 1..allow indels
63     };
64 
65     template <>
66     struct Pigeonhole<Hamming_> {
67         enum { ONE_PER_DIAGONAL = 1 };    // 1..record last seed diagonal and ignore seeds on the same diag. (heuristic)
68         enum { HAMMING_ONLY = 1 };        // 0..no indels; 1..allow indels
69     };
70 
71 //////////////////////////////////////////////////////////////////////////////
72 
73 
74 //____________________________________________________________________________
75 
76 
77     template <typename TSpec, typename THstkPos>
78     struct PigeonholeHit_
79     {
80         THstkPos    hstkPos;            // seed diagonal begin in haystack
81         unsigned    ndlSeqNo;            // needle sequence number
82     };
83 
84     template <typename THaystack, typename TPattern, typename TPigeonholeSpec>
85     struct FindResult<Finder<THaystack, Pigeonhole<TPigeonholeSpec> >, TPattern>
86     {
87         typedef SwiftHitSemiGlobal_<int64_t> Type;
88     };
89 
90 /*!
91  * @class PigeonholeParameters
92  * @headerfile <seqan/index/find_pigeonhole.h>
93  * @brief Parameters for the pigeonhole filter algorithm.
94  *
95  * @signature struct PigeonholeParameters;
96  *
97  * @see PigeonholePattern
98  *
99  *
100  * @fn PigeonholeParameters::PigeonholeParameters
101  * @brief Default constructor.
102  *
103  * @signature PigeonholeParameters::PigeonholeParameters();
104  *
105  * Initializes all member variables, default values are given in the variable documentation.
106  *
107  *
108  * @var unsigned PigeonholeParameters::overlap;
109  * @brief Overlap length of adjacent q-grams, default is to use non-overlappling q-grams (<tt>= 0</tt>).
110  *
111  * @var unsigned PigeonholeParameters::delta;
112  * @brief Delta length, becomes index step site; defaults to <tt>0</tt>.
113  *
114  * A value of <tt>0</tt> means that the delta will be auto-detected.
115  *
116  * @var bool PigeonholeParameters::printDots;
117  * @brief Whether to print dots for the scanning progress, defaults to <tt>false</tt>.
118  *
119  * @var bool PigeonholeParameters::debug;
120  * @brief Whether to print debug information, defaults to <tt>false</tt>.
121  */
122 
123 
124     struct PigeonholeParameters
125     {
126         unsigned overlap;           // overlap length of adjacent q-grams, default is to use non-overlapping q-grams (=0)
127         unsigned delta;             // delta length (0 = automatic detection), becomes index stepSize
128         bool printDots;             // the q-gram will have length q=delta+overlap
129         bool debug;
130 
131         PigeonholeParameters() :
132             overlap(0),
133             delta(0),
134             printDots(false),        // print a . for every 100kbps mapped genome
135             debug(false)
136         {}
137     };
138 
139 
140 //____________________________________________________________________________
141 
142 
143 /*!
144  * @class PigeonholeFinder
145  * @extends Finder
146  * @headerfile <seqan/index/find_pigeonhole.h>
147  *
148  * @brief Pigeonhole-based finder.  Must be used together with @link PigeonholePattern @endlink.
149  *
150  * @signature template <typename THaystack, typename TSpec>
151  *            class Finder<THaystack, Pigeonhole<TSpec> >;
152  *
153  * @tparam TSpec Specifies the type of pigeonhole filter.
154  * @tparam THaystack The type of the sequence that should be searched.
155  *                   Types: @link ContainerConcept @endlink
156  *
157  * Provides a fast filter algorithm that uses the pigeonhole lemma, i.e. if a pattern matches with k errors in the text,
158  * every partition into k+1 parts contains one part that matches without error.
159  *
160  * The @link Pattern @endlink must be a q-gram index over multiple patterns.  The tolerated error rate must be given
161  * when @link Finder#find @endlink or @link PigeonholeFinder#windowFindBegin @endlink is called.  In these functions the
162  * length of the index @link Shape @endlink is set automatically thus it must be modifiable at runtime, e.g. @link
163  * OneGappedShape @endlink.
164  *
165  * @see PigeonholePattern
166  *
167  * @var PigeonholeParameters PigeonholePattern::parameters;
168  * @brief The pigeonhole parameters to use for configuration.
169  */
170 
171 /*!
172  * @class PigeonholePattern
173  * @extends Pattern
174  * @headerfile <seqan/index/find_pigeonhole.h>
175  *
176  * @brief Pigeonhole-based pattern.  Must be used together with @link PigeonholeFinder @endlink.
177  *
178  * See @link PigeonholeFinder @endlink for more information on the algorithm.
179  *
180  * @signature template <typename THaystack, typename TSpec>
181  *            class Pattern<TIndex, Pigeonhole<TSpec> >;
182  *
183  * @tparam TSpec Specifies the type of pigeonhole filter.
184  * @tparam TIndex A q-gram index of needle(s) that should be searched for.
185  *                Types: @link IndexQGram @endlink
186  *
187  * @see PigeonholeFinder
188  */
189 
190 /*!
191  * @typedef PigeonholeFinder::THitString
192  * @brief The type to use for the hit string returned in @link PigeonholeFinder#getWindowFindHits @endlink.
193  *
194  * @signature typedef (...) PigeonholeFinder::THitString;
195  */
196 
197 // docu is now in find_pattern_base.h
198     template <typename THaystack, typename TSpec>
199     class Finder<THaystack, Pigeonhole<TSpec> >
200     {
201     public:
202         typedef typename Iterator<THaystack, Rooted>::Type            TIterator;
203         typedef typename Position<THaystack>::Type                    THstkPos;
204 //        typedef PigeonholeHit_<TSpec, int64_t>                        TPigeonholeHit;
205         typedef typename FindResult<Finder>::Type                   TPigeonholeHit;
206         typedef typename WindowFindResult<Finder>::Type                THitString;
207         typedef typename Iterator<THitString, Standard>::Type        THitIterator;
208         typedef typename SAValue<THaystack>::Type                    TSAValue;
209         typedef Repeat<TSAValue, unsigned>                            TRepeat;
210         // TODO(holtgrew): Make this a holder so we can share the actual string when copying.
211         typedef String<TRepeat>                                        TRepeatString;
212         typedef typename Iterator<TRepeatString, Standard>::Type    TRepeatIterator;
213 
214         TIterator        data_iterator;
215         TIterator        haystackEnd;
216         bool            _needReinit;    // if true, the Pattern needs to be reinitialized
217         THitString        hits;
218         THitIterator    curHit, endHit;
219         THstkPos        startPos, curPos, endPos;
220         THstkPos        windowStart;
221         THstkPos        dotPos, dotPos2;
222         TRepeatString    data_repeats;
223         TRepeatIterator    curRepeat, endRepeat;
224 
225         Finder():
226             _needReinit(true) { }
227 
228         Finder(THaystack &haystack):
229             data_iterator(begin(haystack, Rooted())),
230             _needReinit(true) { }
231 
232         template <typename TRepeatSize, typename TPeriodSize>
233         Finder(THaystack &haystack, TRepeatSize minRepeatLen, TPeriodSize maxPeriod):
234             data_iterator(begin(haystack, Rooted())),
235             _needReinit(true)
236         {
237             findRepeats(data_repeats, haystack, minRepeatLen, maxPeriod);
238         }
239 
240         Finder(TIterator &iter):
241             data_iterator(iter),
242             _needReinit(true) { }
243 
244         Finder(TIterator const &iter):
245             data_iterator(iter),
246             _needReinit(true) { }
247 
248         Finder(Finder const &orig):
249             data_iterator(orig.data_iterator),
250             haystackEnd(orig.haystackEnd),
251             _needReinit(orig._needReinit),
252             hits(orig.hits),
253             startPos(orig.startPos),
254             curPos(orig.curPos),
255             endPos(orig.endPos),
256             windowStart(orig.windowStart),
257             dotPos(orig.dotPos),
258             dotPos2(orig.dotPos2),
259             data_repeats(orig.data_repeats)
260         {
261             curHit = begin(hits, Standard()) + (orig.curHit - begin(orig.hits, Standard()));
262             endHit = end(hits, Standard());
263             curRepeat = begin(data_repeats, Standard()) + (orig.curRepeat - begin(orig.data_repeats, Standard()));
264             endRepeat = end(data_repeats, Standard());
265         }
266 
267         inline typename Reference<TIterator>::Type
268         operator* () { return value(hostIterator(*this)); }
269 
270         inline typename Reference<TIterator const>::Type
271         operator* () const { return value(hostIterator(*this)); }
272 
273         operator TIterator () const    { return data_iterator;    }
274 
275         Finder & operator = (Finder const &orig)
276         {
277             data_iterator = orig.data_iterator;
278             haystackEnd = orig.haystackEnd;
279             _needReinit = orig._needReinit;
280             hits = orig.hits;
281             startPos = orig.startPos;
282             windowStart = orig.windowStart;
283             curPos = orig.curPos;
284             endPos = orig.endPos;
285             dotPos = orig.dotPos;
286             dotPos2 = orig.dotPos2;
287             data_repeats = orig.data_repeats;
288             curHit = begin(hits, Standard()) + (orig.curHit - begin(orig.hits, Standard()));
289             endHit = end(hits, Standard());
290             curRepeat = begin(data_repeats, Standard()) + (orig.curRepeat - begin(orig.data_repeats, Standard()));
291             endRepeat = end(data_repeats, Standard());
292             return *this;
293         }
294     };
295 
296 //____________________________________________________________________________
297 
298 
299     template <typename THaystack, typename TSpec>
300     inline bool
301     atEnd(Finder<THaystack, Pigeonhole<TSpec> > & me)
302     {
303         return hostIterator(hostIterator(me)) == hostIterator(me.haystackEnd);
304     }
305 
306     template <typename THaystack, typename TSpec>
307     inline void
308     goEnd(Finder<THaystack, Pigeonhole<TSpec> > & me)
309     {
310         hostIterator(me) = me.haystackEnd;
311     }
312 
313 
314 //____________________________________________________________________________
315 
316 
317     template <typename TIndex, typename TSpec>
318     class Pattern<TIndex, Pigeonhole<TSpec> >
319     {
320     public:
321         typedef typename Size<TIndex>::Type                                TSize;
322         typedef unsigned                                                TShortSize;
323         typedef typename Fibre<TIndex, Tag<FibreSA_> const >::Type        TSA;
324         typedef typename Fibre<TIndex, Tag<Fibre_Shape_> const >::Type    TShape;
325         typedef typename Iterator<TSA const, Standard>::Type            TIterator;
326 
327         TShape                    shape;
328         int64_t                 curBeginPos, curEndPos;
329         int64_t                    finderLength;
330         int64_t                    seqDisabled;
331         TSize                    finderPosOffset;                        // these must be of
332         TSize                    finderPosNextOffset;                    // type TSize of TBucket
333         TSize                   maxSeqLen;
334         unsigned                curSeqNo;
335         String<int64_t>         lastSeedDiag;
336         PigeonholeParameters    params;
337         double                    _currentErrorRate;
338 
339         Holder<TIndex>    data_host;
340 
341         Pattern()
342         {
343             clear(*this);
344         }
345         Pattern(TIndex &_index): data_host(_index)
346         {
347             clear(*this);
348         }
349         Pattern(TIndex const &_index): data_host(_index)
350         {
351             clear(*this);
352         }
353     };
354 
355 //____________________________________________________________________________
356 
357 
358 template <typename TValue, typename TSpec>
359 inline unsigned
360 _pigeonholeMaxShapeWeight(Shape<TValue, TSpec> const &)
361 {
362     typedef typename Value<Shape<TValue, SimpleShape> >::Type THashValue;
363     return (unsigned)(BitsPerValue<THashValue>::VALUE - 1)/ (unsigned)BitsPerValue<TValue>::VALUE;
364 }
365 
366 template <typename TShape, typename TSize>
367 inline bool
368 _pigeonholeUpdateShapeLength(TShape, TSize)
369 {
370     return false;
371 }
372 
373 template <typename TValue, typename TSize>
374 inline bool
375 _pigeonholeUpdateShapeLength(Shape<TValue, SimpleShape> &shape, TSize newLength)
376 {
377     TSize weight = _pigeonholeMaxShapeWeight(shape);
378 
379     if (weight > newLength)
380         weight = newLength;
381 
382     if (weight == length(shape))
383         return false;
384 
385     resize(shape, weight);
386     return true;
387 }
388 
389 template <typename TValue, typename TSize>
390 inline bool
391 _pigeonholeUpdateShapeLength(Shape<TValue, OneGappedShape> &shape, TSize newLength)
392 {
393     if (length(shape) == newLength)
394         return false;
395 
396     TSize weight = _pigeonholeMaxShapeWeight(shape);
397 
398     if (weight > newLength)
399         weight = newLength;
400 
401     CharString str;
402     resize(str, weight / 2, '1');
403     resize(str, newLength - (weight + 1) / 2, '0');
404     resize(str, newLength, '1');
405     stringToShape(shape, str);
406     return true;
407 }
408 
409 template <typename TValue, typename TSize>
410 inline bool
411 _pigeonholeUpdateShapeLength(Shape<TValue, GenericShape> &shape, TSize newLength)
412 {
413     if (length(shape) == newLength)
414         return false;
415 
416     TSize weight = _pigeonholeMaxShapeWeight(shape);
417 
418     if (weight >= newLength)
419         weight = newLength;
420 
421     CharString str;
422     resize(str, weight / 2, '1');
423     resize(str, newLength - (weight + 1) / 2, '0');
424     resize(str, newLength, '1');
425     stringToShape(shape, str);
426 
427     return true;
428 }
429 
430 /*!
431  * @fn PigeonholePattern#maskPatternSequence
432  * @headerfile <seqan/index/find_pigeonhole.h>
433  * @brief Mask and unmask pattern sequence.
434  *
435  * @note Disabling sequences in the pigeonhole filter requires the <tt>ONE_PER_DIAGONAL</tt> heuristic.
436  *
437  * @signature void maskPatternSequence(pattern, seqNo, enable);
438  *
439  * @param[in,out] pattern The PigeonholePattern to update.
440  * @param[in]     seqNo   Index of the sequence to disable/enable.
441  * @param[in]     enable  A <tt>bool</tt> that indicates whether hits are to be generated for the given sequence.
442  */
443 
444 template <typename TIndex, typename TSpec, typename TSeqNo>
445 inline void
446 maskPatternSequence(Pattern<TIndex, Pigeonhole<TSpec> > & pattern, TSeqNo seqNo, bool enable)
447 {
448     SEQAN_ASSERT_NEQ_MSG((int)Pigeonhole<TSpec>::ONE_PER_DIAGONAL, 0,
449                          "Disabling sequences in the pigeonhole filter requires the ONE_PER_DIAGONAL heuristic.");
450 
451     if (enable)
452         pattern.lastSeedDiag[seqNo] = -(int64_t)pattern.maxSeqLen;
453     else
454         pattern.lastSeedDiag[seqNo] = pattern.seqDisabled;
455 }
456 
457 template <typename TIndex, typename TFloat, typename TSpec>
458 inline void _patternInit(Pattern<TIndex, Pigeonhole<TSpec> > &pattern, TFloat errorRate)
459 {
460     typedef typename Size<TIndex>::Type TSize;
461 
462     double _newErrorRate = errorRate;
463     TSize seqCount = countSequences(host(pattern));
464 
465     if (pattern._currentErrorRate != _newErrorRate)
466     {
467         // settings have been changed -> initialize bucket parameters
468 
469         pattern._currentErrorRate = _newErrorRate;
470 
471         TSize minDelta = std::numeric_limits<TSize>::max();
472         TSize maxDelta = 3;
473         TSize maxSeqLen = 0;
474         for(unsigned seqNo = 0; seqNo < seqCount; ++seqNo)
475         {
476             // get pattern length and max. allowed errors
477             TSize length = sequenceLength(seqNo, host(pattern));
478             if (maxSeqLen < length) maxSeqLen = length;
479 
480             // sequence must have sufficient length
481             if (length <= pattern.params.overlap) continue;
482 
483             // cut overlap many characters from the end
484             TSize errors = (TSize) floor(errorRate * length);
485             length -= pattern.params.overlap;
486             TSize delta = length / (errors + 1);
487 
488 
489             // ignore too short q-grams
490             if (delta < 3) continue;
491             if (minDelta > delta) minDelta = delta;
492             if (maxDelta < delta) maxDelta = delta;
493         }
494         pattern.maxSeqLen = maxSeqLen;
495         if (minDelta < 3) minDelta = maxDelta;
496 
497         TIndex &index = host(pattern);
498         pattern.finderPosOffset = 0;
499         pattern.finderPosNextOffset = pattern.maxSeqLen + pattern.finderLength;
500 
501         if (pattern.params.delta != 0)
502         {
503             // use user-defined delta
504             minDelta = pattern.params.delta;
505         }
506 
507         if (minDelta == std::numeric_limits<TSize>::max())
508         {
509             // disable index
510             minDelta = pattern.maxSeqLen + 1;
511         }
512 
513         if (_pigeonholeUpdateShapeLength(pattern.shape, minDelta + pattern.params.overlap) || getStepSize(index) != minDelta)
514         {
515             clear(index);
516             setStepSize(index, minDelta);
517          }
518         indexShape(host(pattern)) = pattern.shape;
519 //        double start = sysTime();
520         indexRequire(host(pattern), QGramSADir());
521 //        double stop = sysTime();
522 //        std::cout << "created in " <<(stop-start) << " seconds" << std::endl;
523 
524         clear(pattern.lastSeedDiag);
525         if (Pigeonhole<TSpec>::ONE_PER_DIAGONAL)
526             resize(pattern.lastSeedDiag, seqCount, -(int64_t)maxSeqLen);
527         pattern.seqDisabled = -(int64_t)maxSeqLen - 1;
528     }
529     else
530     {
531         // settings are unchanged -> reset buckets
532 
533         // finderPosOffset is used to circumvent expensive resetting of all buckets
534         pattern.finderPosOffset = pattern.finderPosNextOffset;
535         pattern.finderPosNextOffset += pattern.maxSeqLen + pattern.finderLength;
536     }
537 }
538 
539 template <
540     typename TFinder,
541     typename TIndex,
542     typename TSpec,
543     typename THValue
544 >
545 inline bool _pigeonholeProcessQGram(
546     TFinder &finder,
547     Pattern<TIndex, Pigeonhole<TSpec> > &pattern,
548     THValue hash)
549 {
550     //typedef Pattern<TIndex, Pigeonhole<TSpec> >         TPattern;
551     //typedef typename TFinder::THstkPos                    THstkPos;
552 
553     typedef typename Fibre<TIndex, QGramSA>::Type const TSA;
554     typedef typename Iterator<TSA, Standard>::Type      TSAIter;
555     typedef typename TFinder::TPigeonholeHit            THit;
556 
557     TIndex const &index = host(pattern);
558 
559     // all previous matches reported -> search new ones
560     clear(finder.hits);
561 
562     TSAIter saBegin = begin(indexSA(index), Standard());
563     Pair<unsigned> ndlPos;
564     THit hit;
565 
566     unsigned bktNo = getBucket(index.bucketMap, hash);
567     TSAIter occ = saBegin + indexDir(index)[bktNo];
568     TSAIter occEnd = saBegin + indexDir(index)[bktNo + 1];
569 
570     for(; occ != occEnd; ++occ)
571     {
572         posLocalize(ndlPos, *occ, stringSetLimits(index));
573         hit.hstkPos = finder.curPos;
574         hit.hstkPos -= getSeqOffset(ndlPos);                    // bucket begin in haystack
575         hit.ndlSeqNo = getSeqNo(ndlPos);                        // needle seq. number
576         if (Pigeonhole<TSpec>::ONE_PER_DIAGONAL)
577         {
578             int64_t diag = hit.hstkPos + (int64_t)pattern.finderPosOffset;
579             if (pattern.lastSeedDiag[hit.ndlSeqNo] == diag)
580                 continue;
581             pattern.lastSeedDiag[hit.ndlSeqNo] = diag;
582         }
583         unsigned ndlLength = sequenceLength(hit.ndlSeqNo, host(pattern));
584 
585         if (Pigeonhole<TSpec>::HAMMING_ONLY != 0)
586         {
587             hit.bucketWidth = ndlLength;
588         }
589         else
590         {
591             unsigned indels = (unsigned)floor(pattern._currentErrorRate * ndlLength);
592             hit.bucketWidth = ndlLength + (indels << 1);
593             hit.hstkPos -= indels;
594         }
595         appendValue(finder.hits, hit);
596     }
597 
598     finder.curHit = begin(finder.hits, Standard());
599     finder.endHit = end(finder.hits, Standard());
600 
601     return !empty(finder.hits);
602 }
603 
604 
605 template <typename TIndex, typename TSpec>
606 inline bool
607 empty(Pattern<TIndex, Pigeonhole<TSpec> > & me)
608 {
609     return me._currentErrorRate == -1;
610 }
611 
612 template <typename TIndex, typename TSpec>
613 inline void
614 clear(Pattern<TIndex, Pigeonhole<TSpec> > & me)
615 {
616     me.finderPosOffset = 0;
617     me.finderPosNextOffset = 0;
618     me.finderLength = 0;
619     me._currentErrorRate = -1;
620     clear(me.lastSeedDiag);
621 }
622 
623 //____________________________________________________________________________
624 
625 template <typename THaystack, typename TSpec>
626 inline typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type
627 position(Finder<THaystack, Pigeonhole<TSpec> > const & finder)
628 {
629     typename Finder<THaystack, Pigeonhole<TSpec> >::TPigeonholeHit &hit = *finder.curHit;
630     return hit.hstkPos + hit.bucketWidth;
631 }
632 
633 template <typename THaystack, typename TSpec>
634 inline typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type
635 position(Finder<THaystack, Pigeonhole<TSpec> > & finder)
636 {
637     return position(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder));
638 }
639 
640 //____________________________________________________________________________
641 
642 template <typename TIndex, typename TSpec>
643 inline typename SAValue<TIndex>::Type
644 position(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern)
645 {
646     typedef typename Size<TIndex>::Type TSize;
647     typename SAValue<TIndex >::Type pos;
648     posLocalToX(pos, Pair<unsigned, TSize>(pattern.curSeqNo, length(needle(pattern))), stringSetLimits(host(pattern)));
649     return pos;
650 }
651 
652 template <typename TIndex, typename TSpec>
653 inline typename SAValue<TIndex>::Type
654 position(Pattern<TIndex, Pigeonhole<TSpec> > & pattern)
655 {
656     return position(const_cast<Pattern<TIndex, Pigeonhole<TSpec> > const &>(pattern));
657 }
658 
659 //____________________________________________________________________________
660 
661 template <typename THaystack, typename TSpec>
662 inline int64_t
663 beginPosition(Finder<THaystack, Pigeonhole<TSpec> > const & finder)
664 {
665     return (*finder.curHit).hstkPos;
666 }
667 
668 template <typename THaystack, typename TSpec>
669 inline int64_t
670 beginPosition(Finder<THaystack, Pigeonhole<TSpec> > & finder)
671 {
672     return beginPosition(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder));
673 }
674 
675 //____________________________________________________________________________
676 
677 template <typename TIndex, typename TSpec>
678 inline typename SAValue<TIndex >::Type
679 beginPosition(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern)
680 {
681     typename SAValue<TIndex >::Type pos;
682     posLocalToX(pos, Pair<unsigned>(pattern.curSeqNo, 0), stringSetLimits(host(pattern)));
683     return pos;
684 }
685 
686 template <typename TIndex, typename TSpec>
687 inline typename SAValue<TIndex>::Type
688 beginPosition(Pattern<TIndex, Pigeonhole<TSpec> > & pattern)
689 {
690     return beginPosition(const_cast<Pattern<TIndex, Pigeonhole<TSpec> > const &>(pattern));
691 }
692 
693 //____________________________________________________________________________
694 
695 template <typename THaystack, typename TSpec>
696 inline typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type
697 endPosition(Finder<THaystack, Pigeonhole<TSpec> > const & finder)
698 {
699     typename Finder<THaystack, Pigeonhole<TSpec> >::TPigeonholeHit &hit = *finder.curHit;
700     return hit.hstkPos + hit.bucketWidth;
701 }
702 
703 template <typename THaystack, typename TSpec>
704 inline typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type
705 endPosition(Finder<THaystack, Pigeonhole<TSpec> > & finder)
706 {
707     return endPosition(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder));
708 }
709 
710 //____________________________________________________________________________
711 
712 template <typename TIndex, typename TSpec>
713 inline typename SAValue<TIndex >::Type
714 endPosition(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern)
715 {
716     typedef typename Size<TIndex>::Type TSize;
717     typename SAValue<TIndex >::Type pos;
718     posLocalToX(pos, Pair<unsigned, TSize>(pattern.curSeqNo, length(needle(pattern))), stringSetLimits(host(pattern)));
719     return pos;
720 }
721 
722 template <typename TIndex, typename TSpec>
723 inline typename SAValue<TIndex>::Type
724 endPosition(Pattern<TIndex, Pigeonhole<TSpec> > & pattern)
725 {
726     return endPosition(const_cast<Pattern<TIndex, Pigeonhole<TSpec> > const &>(pattern));
727 }
728 
729 template <typename THaystack, typename TSpec>
730 inline Pair<typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type>
731 positionRangeNoClip(Finder<THaystack, Pigeonhole<TSpec> > const & finder)
732 {
733     typedef typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type TPosition;
734     typedef Pair<TPosition> TPair;
735     typename Finder<THaystack, Pigeonhole<TSpec> >::TPigeonholeHit &hit = *finder.curHit;
736     return TPair((TPosition)hit.hstkPos, (TPosition)(hit.hstkPos + hit.bucketWidth));
737 }
738 
739 template <typename THaystack, typename TSpec>
740 inline Pair<typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type>
741 positionRangeNoClip(Finder<THaystack, Pigeonhole<TSpec> > & finder)
742 {
743     return positionRangeNoClip(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder));
744 }
745 
746 template <typename THaystack, typename TSpec>
747 inline Pair<typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type>
748 positionRange(Finder<THaystack, Pigeonhole<TSpec> > const & finder)
749 {
750     typedef typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type TPosition;
751     typedef Pair<TPosition> TPair;
752     typename Finder<THaystack, Pigeonhole<TSpec> >::TPigeonholeHit &hit = *finder.curHit;
753 
754     int64_t hitBegin = hit.hstkPos;
755     int64_t hitEnd = hit.hstkPos + hit.bucketWidth;
756     int64_t textEnd = length(haystack(finder));
757 
758     if (hitBegin < 0) hitBegin = 0;
759     if (hitEnd > textEnd) hitEnd = textEnd;
760     return TPair((TPosition)hitBegin, (TPosition)hitEnd);
761 }
762 
763 template <typename THaystack, typename TSpec>
764 inline Pair<typename Position<Finder<THaystack, Pigeonhole<TSpec> > >::Type>
765 positionRange(Finder<THaystack, Pigeonhole<TSpec> > & finder)
766 {
767     return positionRange(const_cast<Finder<THaystack, Pigeonhole<TSpec> > const &>(finder));
768 }
769 
770 //____________________________________________________________________________
771 
772 template <typename TIndex, typename TSpec>
773 inline Pair<typename SAValue<TIndex>::Type>
774 positionRange(Pattern<TIndex, Pigeonhole<TSpec> > & pattern)
775 {
776     return Pair<typename SAValue<TIndex>::Type> (beginPosition(pattern), endPosition(pattern));
777 }
778 
779 //____________________________________________________________________________
780 
781 template <typename TPigeonholeHit, typename TText>
782 inline typename Infix<TText>::Type
783 pigeonholeInfixNoClip(TPigeonholeHit const &hit, TText &text)
784 {
785     return infix(text, hit.hstkPos, hit.hstkPos + hit.bucketWidth);
786 }
787 
788 template <typename TPigeonholeHit, typename TText>
789 inline typename Infix<TText>::Type
790 pigeonholeInfix(TPigeonholeHit const &hit, TText &text)
791 {
792     int64_t hitBegin = hit.hstkPos;
793     int64_t hitEnd = hit.hstkPos + hit.bucketWidth;
794     int64_t textEnd = length(text);
795 
796     if (hitBegin < 0) hitBegin = 0;
797     if (hitEnd > textEnd) hitEnd = textEnd;
798     SEQAN_ASSERT_LEQ(hitBegin, hitEnd);
799     return infix(text, hitBegin, hitEnd);
800 }
801 
802 //____________________________________________________________________________
803 
804 template <typename THaystack, typename TSpec>
805 inline typename Infix<THaystack>::Type
806 infix(Finder<THaystack, Pigeonhole<TSpec> > &finder)
807 {
808     typename Parameter_<THaystack>::Type tmpHaystack = haystack(finder);
809     return swiftInfix(*finder.curHit, tmpHaystack);
810 }
811 
812 template <typename THaystack, typename TSpec, typename TText>
813 inline typename Infix<TText>::Type
814 infix(Finder<THaystack, Pigeonhole<TSpec> > &finder, TText &text)
815 {
816     return swiftInfix(*finder.curHit, text);
817 }
818 
819 //____________________________________________________________________________
820 
821 template <typename THaystack, typename TSpec>
822 inline typename Infix<THaystack>::Type
823 infixNoClip(Finder<THaystack, Pigeonhole<TSpec> > &finder)
824 {
825     return swiftInfixNoClip(*finder.curHit, haystack(finder));
826 }
827 
828 template <typename THaystack, typename TSpec, typename TText>
829 inline typename Infix<TText>::Type
830 infixNoClip(Finder<THaystack, Pigeonhole<TSpec> > &finder, TText &text)
831 {
832     return swiftInfixNoClip(*finder.curHit, text);
833 }
834 
835 //____________________________________________________________________________
836 
837 template <typename TIndex, typename TSpec, typename TText>
838 inline typename Infix<TText>::Type
839 infix(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern, TText &text)
840 {
841     int64_t hitBegin = pattern.curBeginPos;
842     int64_t hitEnd = pattern.curEndPos;
843     int64_t textLength = sequenceLength(pattern.curSeqNo, needle(pattern));
844 
845     if (hitEnd > textLength) hitEnd = textLength;
846     if (hitBegin < 0) hitBegin = 0;
847 
848     return infix(text, hitBegin, hitEnd);
849 }
850 
851 template <typename TIndex, typename TSpec>
852 inline typename Infix< typename GetSequenceByNo< TIndex const >::Type >::Type
853 infix(Pattern<TIndex, Pigeonhole<TSpec> > const & pattern)
854 {
855     return infix(getSequenceByNo(pattern.curSeqNo, needle(pattern)), 0, sequenceLength(pattern.curSeqNo, needle(pattern)));
856 }
857 
858 template <typename TIndex, typename TSpec>
859 inline typename Infix< typename GetSequenceByNo< TIndex const >::Type >::Type
860 infix(Pattern<TIndex, Pigeonhole<TSpec> > & pattern)
861 {
862     return infix(const_cast<Pattern<TIndex, Pigeonhole<TSpec> > const &>(pattern));
863 }
864 
865 //____________________________________________________________________________
866 
867 template <typename THaystack, typename TSpec>
868 inline void
869 _printDots(Finder<THaystack, Pigeonhole<TSpec> > &finder)
870 {
871     while (finder.curPos >= finder.dotPos)
872     {
873         finder.dotPos += 100000;
874         if (finder.dotPos >= finder.dotPos2)
875         {
876             std::cerr << (finder.dotPos2 / 1000000) << "M" << std::flush;
877             finder.dotPos2 += 1000000;
878         } else
879             std::cerr << "." << std::flush;
880     }
881 }
882 
883 template <typename TFinder, typename TIndex, typename TSpec>
884 inline bool
885 _nextNonRepeatRange(
886     TFinder &finder,
887     Pattern<TIndex, Pigeonhole<TSpec> > &pattern)
888 {
889     //typedef typename TFinder::TRepeat        TRepeat;
890     //typedef typename Value<TRepeat>::Type    TPos;
891 
892     if (finder.curRepeat == finder.endRepeat) return false;
893 
894     do
895     {
896         finder.startPos = (*finder.curRepeat).endPosition;
897         if (++finder.curRepeat == finder.endRepeat)
898         {
899             finder.endPos = length(host(finder));
900             if (finder.startPos + length(pattern.shape) > finder.endPos)
901                 return false;
902             else
903                 break;
904         } else
905             finder.endPos = (*finder.curRepeat).beginPosition;
906         // repeat until the shape fits in non-repeat range
907     } while (finder.startPos + length(pattern.shape) > finder.endPos);
908 
909     finder.curPos = finder.startPos;
910     hostIterator(finder) = begin(host(finder)) + finder.startPos;
911     finder.haystackEnd = begin(host(finder)) + (finder.endPos - length(pattern.shape) + 1);
912 
913 //    if (pattern.params.printDots)
914 //        std::cerr << std::endl << "  scan range (" << finder.startPos << ", " << finder.endPos << ") " << std::flush;
915 
916     return true;
917 }
918 
919 template <typename TFinder, typename TIndex, typename TSpec>
920 inline bool
921 _firstNonRepeatRange(
922     TFinder &finder,
923     Pattern<TIndex, Pigeonhole<TSpec> > &pattern)
924 {
925     //typedef typename TFinder::TRepeat        TRepeat;
926     //typedef typename Value<TRepeat>::Type    TPos;
927 
928     finder.curRepeat = begin(finder.data_repeats, Standard());
929     finder.endRepeat = end(finder.data_repeats, Standard());
930 
931     if (finder.curRepeat == finder.endRepeat)
932         finder.endPos = length(host(finder));
933     else
934         finder.endPos = (*finder.curRepeat).beginPosition;
935 
936     if (length(pattern.shape) > finder.endPos)
937         return _nextNonRepeatRange(finder, pattern);
938 
939     finder.curPos = finder.startPos = 0;
940     hostIterator(finder) = begin(host(finder));
941     finder.haystackEnd = begin(host(finder)) + (finder.endPos - length(pattern.shape) + 1);
942 
943 //    if (pattern.params.printDots)
944 //        std::cerr << std::endl << "  scan range (" << finder.startPos << ", " << finder.endPos << ") " << std::flush;
945 
946     return true;
947 }
948 
949 template <typename TFinder, typename TIndex, typename TSpec>
950 inline void
951 _copyPigeonholeHit(
952     TFinder &finder,
953     Pattern<TIndex, Pigeonhole<TSpec> > &pattern)
954 {
955     pattern.curSeqNo = (*finder.curHit).ndlSeqNo;
956     pattern.curBeginPos = 0;
957     pattern.curEndPos = length(indexText(needle(pattern))[pattern.curSeqNo]);
958 }
959 
960 template <typename THaystack, typename TIndex, typename TSpec>
961 inline bool
962 find(
963     Finder<THaystack, Pigeonhole<TSpec> > &finder,
964     Pattern<TIndex, Pigeonhole<TSpec> > &pattern,
965     double errorRate)
966 {
967     if (empty(finder))
968     {
969         pattern.finderLength = length(container(finder));
970         _patternInit(pattern, errorRate);
971         _finderSetNonEmpty(finder);
972         finder.dotPos = 100000;
973         finder.dotPos2 = 10 * finder.dotPos;
974 
975         if (!_firstNonRepeatRange(finder, pattern)) return false;
976         if (_pigeonholeProcessQGram(finder, pattern, hash(pattern.shape, hostIterator(hostIterator(finder)))))
977         {
978             _copyPigeonholeHit(finder, pattern);
979             return true;
980         }
981     }
982     else
983     {
984         if (++finder.curHit < finder.endHit)
985         {
986             _copyPigeonholeHit(finder, pattern);
987             return true;
988         }
989     }
990 
991     // all previous matches reported -> search new ones
992     clear(finder.hits);
993 
994     // are we at the end of the text?
995     if (atEnd(finder) && finder.curRepeat == finder.endRepeat)
996     {
997         finder.curHit = finder.endHit;
998         return false;
999     }
1000 
1001     do
1002     {
1003         if (pattern.params.printDots) _printDots(finder);
1004         if (atEnd(++finder))
1005         {
1006             if (!_nextNonRepeatRange(finder, pattern))
1007                 return false;
1008             hash(pattern.shape, hostIterator(hostIterator(finder)));
1009         }
1010         else
1011         {
1012             ++finder.curPos;
1013             hashNext(pattern.shape, hostIterator(hostIterator(finder)));
1014         }
1015 
1016         if (_pigeonholeProcessQGram(finder, pattern, value(pattern.shape)))
1017         {
1018             _copyPigeonholeHit(finder, pattern);
1019             return true;
1020         }
1021 
1022     } while (true);
1023 }
1024 
1025 /*!
1026  * @fn PigeonholeFinder#windowFindBegin
1027  * @headerfile <seqan/index/find_pigeonhole.h>
1028  *
1029  * @brief Initializes the pattern. Sets the finder on the begin position.  Gets the first non-repeat range and sets it
1030  * in the finder.  Used together with @link PigeonholeFinder#windowFindEnd @endlink.
1031  *
1032  * @signature windowFindBegin(finder, pattern, errorRate)
1033  *
1034  * @param[in,out] finder    The PigeonholeFinder to use.
1035  * @param[in,out] pattern   The PigeonholePattern to use.
1036  * @param[in]     errorRate Error rate that is allowed between reads and reference.  Should be the same in as in @link
1037  *                          PigeonholeFinder#windowFindNext @endlink.  Types: <tt>double</tt>
1038  *
1039  * @see PigeonholeFinder#windowFindNext
1040  * @see PigeonholeFinder#windowFindEnd
1041  */
1042 
1043 template <typename THaystack, typename TIndex, typename TSpec>
1044 inline bool
1045 windowFindBegin(
1046     Finder<THaystack, Pigeonhole<TSpec> > &finder,
1047     Pattern<TIndex, Pigeonhole<TSpec> > &pattern,
1048     double errorRate)
1049 {
1050 
1051     pattern.finderLength = pattern.maxSeqLen + length(container(finder));
1052     _patternInit(pattern, errorRate);
1053     _finderSetNonEmpty(finder);
1054     finder.dotPos = 100000;
1055     finder.dotPos2 = 10 * finder.dotPos;
1056     finder.windowStart = 0;
1057 
1058     if (!_firstNonRepeatRange(finder, pattern)) return false;
1059 
1060     return true;
1061 }
1062 
1063 
1064 /*!
1065  * @fn PigeonholeFinder#windowFindNext
1066  * @headerfile <seqan/index/find_pigeonhol.h>
1067  * @brief Searches over the next window with the finder.  The found hits can be retrieved with @link
1068  *        PigeonholeFinder#getWindowFindHits @endlink Used together with @link PigeonholeFinder#windowFindBegin @endlink
1069  *        and @link PigeonholeFinder#windowFindEnd @endlink.
1070  *
1071  * @signature bool windowFindNext(finder, pattern, finderWindowLength)
1072  *
1073  * @param[in,out] finder             The PigeonholeFinder to use.
1074  * @param[in,out] pattern            The PigeonholePattern to use.
1075  * @param[in]     finderWindowLength Number of bases that are scanned beginning from the position the finder is at.
1076  *                                   Including bases that are marked as repeats and that are skipped. Types: <tt>unsigned</tt>.
1077  *
1078  * @return bool <tt>true</tt>, if there are bases that can be scanned, <tt>false</tt> otherwise.
1079  *
1080  * @see PigeonholeFinder#windowFindBegin
1081  * @see PigeonholeFinder#windowFindEnd
1082  * @see PigeonholeFinder#getWindowFindHits
1083  */
1084 
1085 
1086 
1087 template <typename THaystack, typename TIndex, typename TSpec, typename TSize>
1088 inline bool
1089 windowFindNext(
1090     Finder<THaystack, Pigeonhole<TSpec> > &finder,
1091     Pattern<TIndex, Pigeonhole<TSpec> > &pattern,
1092     TSize finderWindowLength)
1093 {
1094 
1095     typedef typename Fibre<TIndex, QGramShape>::Type        TShape;
1096     typedef Finder<THaystack, Pigeonhole<TSpec> >           TFinder;
1097     typedef typename TFinder::THstkPos                      THstkPos;
1098 
1099     typedef typename Fibre<TIndex, QGramSA>::Type           TSA;
1100     typedef typename Iterator<TSA const, Standard>::Type    TSAIter;
1101     typedef typename TFinder::TPigeonholeHit                THit;
1102 
1103     TIndex const &index = host(pattern);
1104 
1105     // all previous matches reported -> search new ones
1106     clear(finder.hits);
1107 
1108     THstkPos windowEnd = finder.windowStart + finderWindowLength;
1109     TSAIter saBegin = begin(indexSA(index), Standard());
1110     Pair<unsigned> ndlPos;
1111     THit hit;
1112     double errorRate = pattern._currentErrorRate;
1113     int64_t seqDisabled = pattern.seqDisabled;
1114 
1115     // iterate over all non-repeat regions within the window
1116     for (; finder.curPos < windowEnd; )
1117     {
1118         THstkPos nonRepeatEnd = finder.endPos - length(pattern.shape) + 1;
1119         THstkPos localEnd = _min(windowEnd, nonRepeatEnd);
1120 
1121         // filter a non-repeat region within the window
1122         if (finder.curPos < localEnd)
1123         {
1124             TShape &shape = pattern.shape;
1125 
1126             hashInit(shape, hostIterator(hostIterator(finder)));
1127             for (; finder.curPos < localEnd; ++finder.curPos, ++finder)
1128             {
1129                 hashNext(shape, hostIterator(hostIterator(finder)));
1130 
1131                 unsigned bktNo = getBucket(index.bucketMap, value(shape));
1132                 TSAIter occ = saBegin + indexDir(index)[bktNo];
1133                 TSAIter occEnd = saBegin + indexDir(index)[bktNo + 1];
1134 
1135                 for(; occ != occEnd; ++occ)
1136                 {
1137                     posLocalize(ndlPos, *occ, stringSetLimits(index));
1138                     hit.hstkPos = finder.curPos;
1139                     hit.hstkPos -= getSeqOffset(ndlPos);                    // bucket begin in haystack
1140                     hit.ndlSeqNo = getSeqNo(ndlPos);                        // needle seq. number
1141 
1142                     if (Pigeonhole<TSpec>::ONE_PER_DIAGONAL)
1143                     {
1144                         int64_t lastDiag = pattern.lastSeedDiag[hit.ndlSeqNo];
1145                         if (lastDiag == seqDisabled) continue;
1146 
1147                         int64_t diag = hit.hstkPos + (int64_t)pattern.finderPosOffset;
1148                         if (lastDiag == diag)
1149                         {
1150 //                            std::cout<<"double hit"<<std::endl;
1151                             continue;
1152                         }
1153                         pattern.lastSeedDiag[hit.ndlSeqNo] = diag;
1154                     }
1155 //                     std::cout<<"hit at:\thpos="<<finder.curPos<<"\tnpos="<<getSeqOffset(ndlPos)<<"\treadNo="<<hit.ndlSeqNo<<"\thash="<<value(shape)<<std::endl;
1156                     unsigned ndlLength = sequenceLength(hit.ndlSeqNo, host(pattern));
1157                     if (Pigeonhole<TSpec>::HAMMING_ONLY != 0)
1158                     {
1159                         hit.bucketWidth = ndlLength;
1160                     }
1161                     else
1162                     {
1163                         unsigned indels = (unsigned)floor(errorRate * ndlLength);
1164                         hit.bucketWidth = ndlLength + (indels << 1);
1165                         hit.hstkPos -= indels;
1166                     }
1167                     appendValue(finder.hits, hit);
1168                 }
1169             }
1170         }
1171 
1172         if (pattern.params.printDots) _printDots(finder);
1173 
1174         if (finder.curPos >= nonRepeatEnd)
1175             if (!_nextNonRepeatRange(finder, pattern))
1176             {
1177                 finder.windowStart = windowEnd;
1178                 return false;
1179             }
1180     }
1181     finder.windowStart = windowEnd;
1182     return true;
1183 }
1184 
1185 
1186 /*!
1187  * @fn PigeonholeFinder#windowFindEnd
1188  * @headerfile <seqan/index/find_pigeonhole.h>
1189  * @brief Flushes the pattern.  Used together with @link PigeonholeFinder#windowFindBegin @endlink and @link
1190  *        PigeonholeFinder#windowFindNext @endlink.
1191  *
1192  * @signature void windowFindEnd(finder, pattern);
1193  *
1194  * @param[in,out] pattern A pattern with window interface.
1195  * @param[in,out] finder  A finder with window interface. Types: @link PigeonholeFinder @endlink
1196  *
1197  * @see PigeonholeFinder#windowFindBegin
1198  * @see PigeonholeFinder#windowFindNext
1199  */
1200 
1201 
1202 template <typename THaystack, typename TIndex, typename TSpec>
1203 inline void
1204 windowFindEnd(
1205     Finder<THaystack, Pigeonhole<TSpec> > &,
1206     Pattern<TIndex, Pigeonhole<TSpec> > &)
1207 {
1208 }
1209 
1210 /*!
1211  * @fn PigeonholeFinder#getWindowFindHits
1212  * @headerfile <seqan/index/find_pigeonhole.h>
1213  * @brief Returns the string of hits from the finder.
1214  *
1215  * @signature THitString getWindowFindHits(finder);
1216  *
1217  * @param[in] finder     A PigeonFinder.
1218  * @return    THitString @link String @endlink of Hits, see @link PigeonholeFinder::THitString @endlink.
1219  *
1220  * @see PigeonholeFinder#windowFindNext
1221  */
1222 
1223 
1224 
1225 template <typename THaystack, typename TSpec>
1226 inline typename Finder<THaystack, Pigeonhole<TSpec> >::THitString &
1227 getWindowFindHits(Finder<THaystack, Pigeonhole<TSpec> > &finder)
1228 {
1229 
1230     return finder.hits;
1231 }
1232 
1233 /*!
1234  * @fn PigeonholePattern#getMaxDeviationOfOrder
1235  * @headerfile <seqan/index/find_pigeonhole.h>
1236  * @brief Returns the maximal out-of-order distance of adjacent hits.
1237  *
1238  * @signature TSize getMaxDeviationOfOrder(pattern);
1239  *
1240  * @param[in] pattern A PigeonholeFinder.
1241  * @return    TSize   Returns the maximal distance two adjacent hits can have which are not in increasing order.
1242  *                    <tt>TSize</tt> is the size type of the underlying index.
1243  */
1244 
1245 template <typename TIndex, typename TSpec>
1246 inline typename Size<TIndex>::Type
1247 getMaxDeviationOfOrder(Pattern<TIndex, Pigeonhole<TSpec> > &pattern)
1248 {
1249 
1250     return (pattern.maxSeqLen <= length(indexShape(host(pattern))))? 0: pattern.maxSeqLen - length(indexShape(host(pattern)));
1251 }
1252 
1253 }// namespace seqan
1254 
1255 #endif //#ifndef INCLUDE_SEQAN_FIND_PIGEONHOLE_H_
1256 
1257