1 /*==========================================================================
2              RazerS - Fast Read Mapping with Controlled Loss Rate
3                    http://www.seqan.de/projects/razers.html
4 
5  ============================================================================
6   Copyright (C) 2008 by David Weese
7 
8   This program is free software; you can redistribute it and/or
9   modify it under the terms of the GNU Lesser General Public
10   License as published by the Free Software Foundation; either
11   version 3 of the License, or (at your options) any later version.
12 
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17 
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  ==========================================================================*/
21 
22 #ifndef SEQAN_HEADER_RAZERS_H
23 #define SEQAN_HEADER_RAZERS_H
24 
25 #include <iostream>
26 #include <fstream>
27 
28 #ifdef _OPENMP
29 #include <omp.h>
30 #endif
31 
32 #include <seqan/find.h>
33 #include <seqan/seq_io.h>
34 #include <seqan/index.h>
35 #include <seqan/index/find_pigeonhole.h>
36 #include <seqan/store.h>
37 #include <seqan/pipe.h>
38 #include <seqan/parallel.h>
39 #include <seqan/seq_io.h>
40 #include <seqan/stream.h>
41 
42 #ifdef RAZERS_PROFILE
43 #include "profile_timeline.h"
44 #endif  // #ifdef RAZERS_PROFILE
45 
46 // No parallelism for less than MIN_PARALLEL_WORK reads.
47 const unsigned MIN_PARALLEL_WORK = 1; //100/*0*/; // TODO(holtgrew): Set to some useful value after development.
48 
49 namespace seqan {
50 
51 // Compact representation of a match.
52 template <typename TContigPos_>
53 struct MatchRecord
54 {
55     typedef typename MakeSigned_<TContigPos_>::Type TContigPos;
56 
57     unsigned        contigId;       // genome seqNo
58     unsigned        readId;         // read seqNo
59     TContigPos      beginPos;       // begin position of the match in the genome
60     TContigPos      endPos;         // end position of the match in the genome
61 #ifdef RAZERS_DEFER_COMPACTION
62     bool            isRegistered; // registered in masking process.
63 #endif  // #ifdef RAZERS_DEFER_COMPACTION
64     char            orientation;    // 'F', 'R', '-'
65     short int       score;          // Levenshtein distance / score.
66     unsigned        pairMatchId;            // unique id for the two mate-pair matches (0 if unpaired)
67     int             libDiff : 24;     // outer distance difference from librarySize
68     int             pairScore : 8;    // combined score of both mates
69 
70     static const unsigned INVALID_ID;
71 
MatchRecordMatchRecord72     MatchRecord() :
73         contigId(std::numeric_limits<unsigned>::max()), readId(std::numeric_limits<unsigned>::max()),
74         beginPos(0), endPos(0),
75 #ifdef RAZERS_DEFER_COMPACTION
76         isRegistered(false),
77 #endif  // #ifndef RAZERS_DEFER_COMPACTION
78         orientation('-'), score(0), pairMatchId(std::numeric_limits<unsigned>::max()),
79         libDiff(0), pairScore(0)
80     {}
81 };
82 
83 template <typename TStream, typename TPos>
84 TStream &
85 operator<<(TStream & stream, MatchRecord<TPos> & record)
86 {
87     stream << "(contigId=" << record.contigId << ", readId=" << record.readId << ", beginPos=" << record.beginPos << ", endPos = " << record.endPos << ", orientation=" << record.orientation << ", score=" << record.score << ", pairMatchId=" << record.pairMatchId << ", libDiff=" << record.libDiff << ", pairScore=" << record.pairScore << ")";
88     return stream;
89 }
90 
91 template <typename TGPos_>
92 const unsigned MatchRecord<TGPos_>::INVALID_ID = std::numeric_limits<unsigned>::max();
93 
94 #ifdef RAZERS_PROFILE
95 enum
96 {
97     TASK_WAIT,
98     TASK_ON_CONTIG,
99     TASK_INIT,
100     TASK_REVCOMP,
101     TASK_FILTER,
102     TASK_VERIFY,
103     TASK_WRITEBACK,
104     TASK_COMPACT,
105     TASK_DUMP_MATCHES,
106     TASK_LOAD,
107     TASK_SORT,
108     TASK_COPY_FINDER
109 };
110 #endif  // #ifdef RAZERS_PROFILE
111 
112 //////////////////////////////////////////////////////////////////////////////
113 // RazerS modes
114 
115 // Alignment mode
116 struct RazerSLocal;
117 struct RazerSGlobal;
118 struct RazerSPrefix;
119 
120 // Gap mode
121 struct RazerSGapped;
122 struct RazerSUngapped;
123 
124 // Score mode
125 struct RazerSErrors;
126 struct RazerSScore;
127 struct RazerSMAQ;
128 
129 template <typename TSpec = Default>
130 struct RazerSQuality;
131 
132 template <typename TAlignMode_, typename TGapMode_, typename TScoreMode_, typename TMatchNPolicy_>
133 struct RazerSMode
134 {
135     typedef TAlignMode_ TAlignMode;
136     typedef TGapMode_   TGapMode;
137     typedef TScoreMode_ TScoreMode;
138     typedef TMatchNPolicy_  TMatchNPolicy;
139 };
140 
141 enum AlignMode          {RAZERS_LOCAL, RAZERS_PREFIX, RAZERS_GLOBAL};
142 enum GapMode            {RAZERS_GAPPED, RAZERS_UNGAPPED};
143 enum ScoreMode          {RAZERS_ERRORS, RAZERS_SCORE, RAZERS_QUALITY};
144 enum CompactMatchesMode {COMPACT, COMPACT_FINAL, COMPACT_FINAL_EXTERNAL};
145 
146 
147 
148 //////////////////////////////////////////////////////////////////////////////
149 // Default options
150 
151 template <bool DONT_VERIFY_ = false, bool DONT_DUMP_RESULTS_ = false>
152 struct RazerSSpec
153 {
154     enum {DONT_VERIFY = DONT_VERIFY_};                      // omit verifying potential matches
155     enum {DONT_DUMP_RESULTS = DONT_DUMP_RESULTS_};          // omit dumping results
156 };
157 
158 template <typename TSpec = RazerSSpec<> >
159 struct RazerSCoreOptions
160 {
161     // major options
162     AlignMode  alignMode;
163     GapMode    gapMode;
164     ScoreMode  scoreMode;
165 
166     // main options
167     TSpec       spec;
168     bool        forward;                // compute forward oriented read matches
169     bool        reverse;                // compute reverse oriented read matches
170     double      errorRate;              // Criteria 1 threshold
171     unsigned    maxHits;                // output at most maxHits many matches
172     unsigned    scoreDistanceRange;     // output only the best, second best, ..., scoreDistanceRange best matches
173     int         dRange;                 // used in matchVerify
174                                         // to a best match with e errors
175     bool        purgeAmbiguous;         // true..remove reads with more than maxHits best matches, false..keep them
176     CharString  output;                 // name of result file
177     int         _debugLevel;            // level of verbosity
178     bool        printVersion;           // print version number
179     int         trimLength;             // if >0, cut reads to #trimLength characters
180     // controlled pigeonhole extensions
181     double      mutationRate;           // difference between reference genome and sequenced genome
182     double      lossRate;               // 1.0 - sensitivity
183 
184     // output format options
185     unsigned    outputFormat;           // 0..Razer format
186                                         // 1..enhanced Fasta
187                                         // 2..ELAND format
188     bool        dumpAlignment;          // compute and dump the match alignments in the result files
189     unsigned    genomeNaming;           // 0..use Fasta id
190                                         // 1..enumerate reads beginning with 1
191     // TODO(holtgrew): SAM export should imply --read-naming 3
192     unsigned    readNaming;             // 0..use Fasta id
193                                         // 1..enumerate reads beginning with 1
194                                         // 2..use the read sequence (only for short reads!)
195                                         // 3..use Fasta id, do not append /L and /R for mate pairs.
196     bool        fullFastaId;            // read full FastaId or clip after first whitespace
197     unsigned    sortOrder;              // 0..sort keys: 1. read number, 2. genome position
198                                         // 1..           1. genome pos50ition, 2. read number
199     int         positionFormat;         // 0..gap space
200                                         // 1..position space
201     const char  * runID;                // runID needed for gff output
202     bool        dontShrinkAlignments;   // Required when used for building gold Rabema mapping.
203     bool        computeGlobal;          // compute global alignment in SAM output
204 
205     // filtration parameters
206     std::string shape;                  // shape (e.g. 11111111111)
207     int         threshold;              // threshold (minimal threshold, 0 activates pigeonhole mode)
208     int         tabooLength;            // taboo length
209     int         repeatLength;           // repeat length threshold
210     double      abundanceCut;           // abundance threshold
211     int         delta;                  // q-gram delta (in pigeonhole mode), 0=automatic
212     int         overlap;                // q-gram overlap (in pigeonhole mode), 0=lossless
213     unsigned    maxOverlap;             // limits the overlap in automatic mode
214 
215     // mate-pair parameters
216     int         libraryLength;          // offset between two mates
217     int         libraryError;           // offset tolerance
218     unsigned    nextPairMatchId;        // use this id for the next mate-pair
219 
220     // verification parameters
221     unsigned    prefixSeedLength;       // length of the prefix seed
222     bool        matchN;                 // false..N is always a mismatch, true..N matches with all
223     unsigned char compMask[5];
224     Score<int, Simple> scoringScheme;
225     int         minScore;               // minimal alignment score
226 
227     // statistics
228     typedef LogProb<> TProb;
229 //		typedef double TProb;
230     String<unsigned> readLengths;       // read length histogram (i -> #reads of length i)
231     String<double>   avrgQuality;       // average error quality per base
232     String<TProb>    errorProb;         // error probability per base
233     CharString  errorPrbFileName;
234     CharString  mismatchFilename;
235 
236     String<double> errorDist;           // error distribution
237     int64_t     countFiltration;        // matches returned by the filter
238     int64_t     countVerification;      // matches returned by the verifier
239     double      timeLoadFiles;          // time for loading input files
240     double      timeMapReads;           // time for mapping reads
241     double      timeDumpResults;        // time for dumping the results
242     double      timeBuildQGramIndex;    // time for q-gram index building.
243     double      timeCompactMatches;     // time for compacting reads
244     double      timeMaskDuplicates;     // time spent masking duplicates
245     double      timeFsCopy;             // time spent copying alignments back into the fragment store
246     double      timeFiltration;
247     double      timeVerification;
248 
249     bool        maqMapping;
250     int         absMaxQualSumErrors;
251 
252     bool        lowMemory;              // set maximum shape weight to 13 to limit size of q-gram index
253     bool        fastaIdQual;            // hidden option for special fasta+quality format we use
254 
255     // misc
256     double      noCompactFrac;          // If in last noCompactFrac of genome, don't compact.
257     double      compactMult;            // Multiplicator for compaction threshold.
258     int64_t     compactThresh;          // compact match array if larger than compactThresh
259 
260     // multi-threading
261 
262     unsigned    threadCount;      // Number of threads to use in the parallel version.
263     unsigned    windowSize;      // Collect SWIFT hits in windows of this length.
264     unsigned    verificationPackageSize;      // This number of SWIFT hits per verification.
265     unsigned    maxVerificationPackageCount;      // Maximum number of verification packages to create.
266     int64_t     availableMatchesMemorySize;      // Memory available for matches.  Used for switching to external memory algorithms. -1 for always external, 0 for never.
267     int         matchHistoStartThreshold;      // Threshold to use for starting histogram. >= 1
268 
269 #ifdef RAZERS_OPENADDRESSING
270     double      loadFactor;
271 #endif
272 
273     // global preprocessing information and maximal allowed errors
274 
275     typedef Infix<String<Dna5Q> >::Type         TRead;
276     typedef Pattern<TRead const, MyersUkkonen>  TMyersPattern;      // verifier
277     typedef String<TMyersPattern>               TPreprocessing;
278 
279     static String<unsigned char>                errorCutOff;        // ignore matches with >=errorCutOff errors
280     static TPreprocessing                       forwardPatterns;
281 
282     CharString commandLine;
283     std::string version;
284 
RazerSCoreOptionsRazerSCoreOptions285     RazerSCoreOptions()
286     {
287         alignMode = RAZERS_GLOBAL;
288         gapMode = RAZERS_GAPPED;
289         scoreMode = RAZERS_ERRORS;
290 
291         forward = true;
292         reverse = true;
293         errorRate = 0.05;
294         maxHits = 100;
295         scoreDistanceRange = 0;     // disabled
296         dRange = 1 << 30;
297         purgeAmbiguous = false;
298         output = "";
299         _debugLevel = 0;
300         printVersion = false;
301         trimLength = 0;
302         mutationRate = 0.05;
303 
304         outputFormat = 0;
305         dumpAlignment = false;
306         genomeNaming = 0;
307         readNaming = 0;
308         fullFastaId = false;
309         sortOrder = 0;
310         positionFormat = 0;
311         runID = "s";        //
312         dontShrinkAlignments = false;
313         computeGlobal = false;
314 
315         matchN = false;
316         shape = "11111111111";
317         threshold = 1;
318         tabooLength = 1;
319         repeatLength = 1000;
320         abundanceCut = 1;
321         delta = 0;
322         overlap = 0;
323         maxOverlap = 10;
324 
325         libraryLength = 220;
326         libraryError = 50;
327         nextPairMatchId = 0;
328 
329         prefixSeedLength = 28;      // the "artificial" seed length that is used for mapping quality assignment
330         for (unsigned i = 0; i < 4; ++i)
331             compMask[i] = 1 << i;
332         compMask[4] = 0;
333 
334         noCompactFrac = 0.05;
335         compactMult = 2.2;
336         compactThresh = 1024;
337         // compactThresh = 40;
338 
339         absMaxQualSumErrors = 100;      // maximum for sum of mism qualities in total readlength
340         lowMemory = false;          // set maximum shape weight to 13 to limit size of q-gram index
341         fastaIdQual = false;
342 
343         threadCount = 1;
344         // TODO(holtgrew): Tune this!
345         windowSize = 500000;
346         verificationPackageSize = 100;
347         maxVerificationPackageCount = 100;
348         availableMatchesMemorySize = 0;
349         matchHistoStartThreshold = 5;
350 
351 #ifdef RAZERS_OPENADDRESSING
352         loadFactor = 1.6;
353 #endif
354 
355         lossRate = 0.0;
356         minScore = 0;
357         countFiltration = 0;
358         countVerification = 0;
359         timeLoadFiles = 0.0;
360         timeMapReads = 0.0;
361         timeDumpResults = 0.0;
362         timeBuildQGramIndex = 0.0;
363         timeCompactMatches = 0.0;
364         timeMaskDuplicates = 0.0;
365         timeFsCopy = 0.0;
366         timeFiltration = 0.0;
367         timeVerification = 0.0;
368         maqMapping = false;
369     }
370 
371 };
372 
373 template <typename TSpec = RazerSSpec<> >
374 struct RazerSOptions : RazerSCoreOptions<TSpec>
375 {
376     typedef RazerSCoreOptions<TSpec> TCoreOptions;
377     SeqFileIn readFile;           // left read's SeqFile (we have to keep it open and store it here to stream it only once)
378 };
379 
380 template <typename TSpec>
381 String<unsigned char> RazerSCoreOptions<TSpec>::errorCutOff;
382 
383 template <typename TSpec>
384 typename RazerSCoreOptions<TSpec>::TPreprocessing RazerSCoreOptions<TSpec>::forwardPatterns;
385 
386 //////////////////////////////////////////////////////////////////////////////
387 // Typedefs
388 
389 enum RAZERS_ERROR
390 {
391     RAZERS_INVALID_OPTIONS = -1,
392     RAZERS_READS_FAILED    = -2,
393     RAZERS_GENOME_FAILED   = -3,
394     RAZERS_INVALID_SHAPE   = -4
395 };
396 
397 //////////////////////////////////////////////////////////////////////////////
398 // Definitions
399 
400 template <typename TReadSet, typename TShape, typename TSpec>
401 struct Cargo<Index<TReadSet, IndexQGram<TShape, TSpec> > >
402 {
403     typedef struct
404     {
405         double      abundanceCut;
406         int         _debugLevel;
407     } Type;
408 };
409 
410 //////////////////////////////////////////////////////////////////////////////
411 // Memory tuning
412 
413 #ifdef RAZERS_MEMOPT
414 
415 template <typename TReadSet, typename TShape, typename TSpec>
416 struct SAValue<Index<TReadSet, IndexQGram<TShape, TSpec> > >
417 {
418     typedef Pair<
419         unsigned,
420         unsigned,
421         BitCompressed<24, 8>    // max. 16M reads of length < 256
422         > Type;
423 };
424 
425 #else
426 
427 template <typename TReadSet, typename TShape, typename TSpec>
428 struct SAValue<Index<TReadSet, IndexQGram<TShape, TSpec> > >
429 {
430     typedef Pair<
431         unsigned,               // many reads
432         unsigned                // of arbitrary length
433         > Type;
434 };
435 
436 #endif
437 
438 template <>
439 struct Size<Dna5String>
440 {
441     typedef unsigned Type;
442 };
443 
444 template <typename TReadSet, typename TShape>
445 struct Size<Index<TReadSet, IndexQGram<TShape> > >
446 {
447     typedef unsigned Type;
448 };
449 
450 template <typename TReadSet, typename TShape>
451 struct Size<Index<TReadSet, IndexQGram<TShape, OpenAddressing> > >
452 {
453     typedef unsigned Type;
454 };
455 
456 
457 #ifdef RAZERS_PRUNE_QGRAM_INDEX
458 
459 //////////////////////////////////////////////////////////////////////////////
460 // Repeat masker
461 template <typename TReadSet, typename TShape, typename TSpec>
462 inline bool _qgramDisableBuckets(Index<TReadSet, IndexQGram<TShape, TSpec> > & index)
463 {
464     typedef Index<TReadSet, IndexQGram<TShape, TSpec> > TReadIndex;
465     typedef typename Fibre<TReadIndex, QGramDir>::Type      TDir;
466     typedef typename Iterator<TDir, Standard>::Type         TDirIterator;
467     typedef typename Value<TDir>::Type                      TSize;
468 
469     TDir & dir    = indexDir(index);
470     bool result  = false;
471     unsigned counter = 0;
472     TSize thresh = (TSize)(length(index) * cargo(index).abundanceCut);
473     if (thresh < 100)
474         thresh = 100;
475 
476     TDirIterator it = begin(dir, Standard());
477     TDirIterator itEnd = end(dir, Standard());
478     for (; it != itEnd; ++it)
479         if (*it > thresh)
480         {
481             *it = (TSize) - 1;
482             result = true;
483             ++counter;
484         }
485 
486     if (counter > 0 && cargo(index)._debugLevel >= 1)
487         std::cerr << "Removed " << counter << " k-mers" << std::endl;
488 
489     return result;
490 }
491 
492 #endif
493 
494 
495 template <
496     typename TFragmentStore_,
497     typename TMatches_,
498     typename TRazerSOptions_,
499     typename TRazerSMode_,
500     typename TFilterPattern_,
501     typename TCounts_
502     >
503 struct MatchVerifier
504 {
505     typedef TFragmentStore_                                 TFragmentStore;
506     typedef TMatches_                                       TMatches;
507     typedef TRazerSOptions_                                 TOptions;
508     typedef TRazerSMode_                                    TRazerSMode;
509     typedef TFilterPattern_                                 TFilterPattern;
510     typedef TCounts_                                        TCounts;
511 
512     typedef typename TRazerSMode::TMatchNPolicy             TMatchNPolicy;
513 
514     typedef typename TFragmentStore::TReadSeqStore          TReadSeqStore;
515     typedef typename Value<TReadSeqStore>::Type const       TRead;
516     typedef typename TFragmentStore::TAlignedReadStore      TAlignedReadStore;
517     typedef typename TFragmentStore::TAlignQualityStore     TAlignQualityStore;
518     typedef typename TFragmentStore::TContigSeq             TContigSeq;
519     typedef typename Value<TAlignedReadStore>::Type         TAlignedRead;
520     typedef typename Value<TAlignQualityStore>::Type        TAlignQuality;
521     typedef typename Size<TContigSeq>::Type                 TSize;
522     typedef typename MakeSigned_<TSize>::Type               TContigPos;
523     typedef ModifiedString<TRead, ModReverse>               TRevRead;
524 
525     typedef typename Value<TMatches>::Type TMatchRecord;
526 
527 #ifdef RAZERS_BANDED_MYERS
528     typedef PatternState_<TRead, Myers<AlignTextBanded<FindInfix, TMatchNPolicy, TMatchNPolicy>, True, void> > TPatternState;
529     typedef PatternState_<TRevRead, Myers<AlignTextBanded<FindPrefix, TMatchNPolicy, TMatchNPolicy>, True, void> > TRPatternState;
530 #else  // #ifdef RAZERS_BANDED_MYERS
531     typedef Pattern<TRead, Myers<FindInfix, False, void> >      TMyersPattern;
532     typedef Pattern<TRevRead, Myers<FindInfix, False, void> >   TRevMyersPattern;
533     typedef typename PatternState<TMyersPattern>::Type          TPatternState;
534     typedef typename PatternState<TRevMyersPattern>::Type       TRPatternState;
535 #endif  // #ifdef RAZERS_BANDED_MYERS
536 
537     TMatches        * matches;
538     TOptions        * options;              // RazerS options
539     TFilterPattern  * filterPattern;
540     TCounts         * cnts;
541 
542     TMatchRecord    m;
543     TPatternState   patternState;
544     TRPatternState  revPatternState;
545     TSize           genomeLength;
546     TSize           rightClip;
547     TContigPos      sinkPos;
548     bool            onReverseComplement;
549     bool            oneMatchPerBucket;
550 
551     double compactionTime;
552 
553     MatchVerifier() :
554         genomeLength(0), rightClip(0), sinkPos(std::numeric_limits<TContigPos>::max()), onReverseComplement(false), oneMatchPerBucket(false), compactionTime(0) {}
555 
556     MatchVerifier(TMatches_ & _matches, TOptions & _options, TFilterPattern & _filterPattern, TCounts & _cnts) :
557         matches(&_matches),
558         options(&_options),
559         filterPattern(&_filterPattern),
560         cnts(&_cnts)
561     {
562         genomeLength = 0;
563         rightClip = 0;
564         sinkPos = std::numeric_limits<TContigPos>::max() >> 1;
565         onReverseComplement = false;
566         oneMatchPerBucket = false;
567         compactionTime = 0;
568     }
569 
570     inline void push()
571     {
572         if (onReverseComplement)
573         {
574             // transform coordinates to the forward strand
575             m.beginPos = genomeLength - m.beginPos;
576             m.endPos = genomeLength - m.endPos;
577             std::swap(m.beginPos, m.endPos);
578             m.orientation = 'R';
579         }
580         else
581         {
582             m.orientation = 'F';
583         }
584 
585 //SEQAN_OMP_PRAGMA(critical)
586 // begin of critical section
587         {
588             if (!options->spec.DONT_DUMP_RESULTS)
589             {
590 //                    std::cout << "begin: "<<m.beginPos <<"\tendPos: "<<m.endPos << "\terrors: "<<m.score <<std::endl;
591                 appendValue(*matches, m, Generous());
592 
593                 if ((int64_t)length(*matches) > options->compactThresh)
594                 {
595                     double beginTime = sysTime();
596                     typename Size<TMatches>::Type oldSize = length(*matches);
597 
598                     if (IsSameType<typename TRazerSMode::TGapMode, RazerSGapped>::VALUE || options->threshold == 0)
599                         maskDuplicates(*matches, *options, TRazerSMode());      // overlapping parallelograms cause duplicates
600                     // SEQAN_ASSERT_MSG((back(*matches).endPos - back(*matches).beginPos == 100), "len == %d", int(m.endPos - m.beginPos));
601 
602                     compactMatches(*matches, *cnts, *options, TRazerSMode(), *filterPattern, COMPACT);
603                     // SEQAN_ASSERT_MSG((back(*matches).endPos - back(*matches).beginPos == 100), "len == %d", int(m.endPos - m.beginPos));
604 
605                     if (length(*matches) * 4 > oldSize)                 // the threshold should not be raised
606                     {       // fprintf(stderr, "[raising threshold]");
607                             // options->compactThresh += (options->compactThresh >> 1);	// if too many matches were removed
608                         options->compactThresh = (int64_t)(options->compactThresh * options->compactMult);
609                     }
610 
611 //						if (options._debugLevel >= 2)
612 //							std::cerr << '(' << oldSize - length(store.alignedReadStore) << " matches removed)";
613                     double endTime = sysTime();
614                     compactionTime += (endTime - beginTime);
615                 }
616             }
617             ++options->countVerification;
618         }
619 // end of critical section
620     }
621 
622 };
623 
624 
625 
626 //////////////////////////////////////////////////////////////////////////////
627 // Read a list of genome file names
628 template<typename TSpec>
629 int getGenomeFileNameList(CharString filename, StringSet<CharString> & genomeFileNames, RazerSCoreOptions<TSpec> &options)
630 {
631 	std::ifstream file;
632 	file.open(toCString(filename), std::ios_base::in | std::ios_base::binary);
633 	if (!file.is_open())
634 		return RAZERS_GENOME_FAILED;
635 
636     DirectionIterator<std::fstream, Input>::Type reader(file);
637     if (!atEnd(reader))
638         return 0;
639 
640     clear(genomeFileNames);
641 	if (*reader == '>' && *reader != '@')	//if file does not start with a fasta header --> list of multiple reference genome files
642 	{
643 		if(options._debugLevel >=1)
644 			std::cout << std::endl << "Reading multiple genome files:" << std::endl;
645 
646 		unsigned i = 1;
647         CharString line;
648 		while (!atEnd(reader))
649 		{
650             readLine(line, reader);
651             cropOuter(line, IsWhitespace());
652 			appendValue(genomeFileNames, line);
653 			if(options._debugLevel >=2)
654 				std::cout <<"Genome file #"<< i <<": " << back(genomeFileNames) << std::endl;
655 			++i;
656 		}
657 		if(options._debugLevel >=1)
658 			std::cout << i-1 << " genome files total." << std::endl;
659 	}
660 	else		//if file starts with a fasta header --> regular one-genome-file input
661 		appendValue(genomeFileNames, filename, Exact());
662 	file.close();
663 	return 0;
664 }
665 
666 //////////////////////////////////////////////////////////////////////////////
667 // Load multi-Fasta sequences with or w/o quality values
668 template <typename TFSSpec, typename TFSConfig, typename TRazerSOptions>
669 bool loadReads(
670     FragmentStore<TFSSpec, TFSConfig> & store,
671 	SeqFileIn &seqFile,
672     TRazerSOptions & options)
673 {
674     bool countN = !(options.matchN || options.outputFormat == 1);
675 
676     String<uint64_t> qualSum;
677     String<Dna5Q>    seq;
678     CharString       qual;
679     CharString       seqId;
680 
681     unsigned seqCount = 0;
682     unsigned kickoutcount = 0;
683 
684     while (!atEnd(seqFile))
685     {
686         readRecord(seqId, seq, qual, seqFile);
687         ++seqCount;
688 
689         if (options.readNaming == 0 || options.readNaming == 3)
690         {
691             if (!options.fullFastaId)
692                 cropAfterFirst(seqId, IsWhitespace());  // read Fasta id up to the first whitespace
693         }
694         else
695         {
696             clear(seqId);
697         }
698 
699         if (countN)
700         {
701             int count = 0;
702             int cutoffCount = (int)(options.errorRate * length(seq));
703             for (unsigned j = 0; j < length(seq); ++j)
704                 if (getValue(seq, j) == 'N')
705                     if (++count > cutoffCount)
706                     {
707                         clear(seq);
708                         clear(seqId);
709                         clear(qual);  // So no qualities are assigned below.
710                         ++kickoutcount;
711                         break;
712                     }
713 // low qual. reads are empty to output them and their id later as LQ reads
714 //			if (count > cutoffCount) continue;
715         }
716 
717         // store dna and quality together
718         assignQualities(seq, qual);
719         if (options.trimLength > 0 && length(seq) > (unsigned)options.trimLength)
720             resize(seq, options.trimLength);
721 
722         // append read to fragment store
723         appendRead(store, seq, seqId);
724 
725         unsigned len = length(seq);
726         if (length(qualSum) <= len)
727         {
728             resize(qualSum, len, 0u);
729             resize(options.readLengths, len + 1, 0u);
730         }
731         ++options.readLengths[len];
732 
733         for (unsigned i = 0; i < len; ++i)
734             qualSum[i] += getQualityValue(seq[i]);
735     }
736 
737     // memory optimization
738     // we store reads in a concat-direct stringset and can shrink its size
739     shrinkToFit(store.readSeqStore.concat);
740     shrinkToFit(store.readSeqStore.limits);
741 
742     // compute error probabilities
743     resize(options.avrgQuality, length(qualSum));
744     unsigned coverage = 0;
745     for (unsigned i = length(qualSum); i != 0; )
746     {
747         coverage += options.readLengths[i];
748         --i;
749         options.avrgQuality[i] = (double)qualSum[i] / (double)coverage;
750     }
751     estimateErrorDistributionFromQualities(options);
752 
753     typedef Shape<Dna, SimpleShape> TShape;
754     typedef typename SAValue<Index<StringSet<Dna5String>, IndexQGram<TShape, OpenAddressing> > >::Type TSAValue;
755     TSAValue sa(0, 0);
756     sa.i1 = ~sa.i1;
757     sa.i2 = ~sa.i2;
758 
759     if ((unsigned)sa.i1 < length(store.readSeqStore) - 1)
760     {
761         std::cerr << "Maximal read number of " << (unsigned)sa.i1 + 1 << " exceeded. Please remove \"#define RAZERS_MEMOPT\" in razers.cpp and recompile." << std::endl;
762         seqCount = 0;
763     }
764     if ((unsigned)sa.i2 < length(options.readLengths) - 2)
765     {
766         std::cerr << "Maximal read length of " << (unsigned)sa.i2 + 1 << " bps exceeded. Please remove \"#define RAZERS_MEMOPT\" in razers.cpp and recompile." << std::endl;
767         seqCount = 0;
768     }
769 
770     if (options._debugLevel > 1 && kickoutcount > 0)
771         std::cerr << "Ignoring " << kickoutcount << " low quality reads.\n";
772 
773     if (options._debugLevel > 1)
774     {
775         std::cerr << std::endl;
776         std::cerr << "Average quality profile:" << std::endl;
777         unsigned cols = length(options.avrgQuality);
778         if (cols > 40)
779             cols = 40;
780 
781         for (int j = 20; j >= 0; --j)
782         {
783             std::cout.width(3);
784             if (j % 5 == 0)
785                 std::cout << 2 * j;
786             else
787                 std::cout << ' ';
788             std::cout << " | ";
789             for (unsigned i = 0; i < cols; ++i)
790             {
791                 unsigned c = i * (length(options.avrgQuality) - 1) / (cols - 1);
792                 double x = options.avrgQuality[c];
793                 std::cout << ((2 * j + 1 <= x) ? '*' : ' ');
794             }
795             std::cout << std::endl;
796         }
797         std::cout << "    +-";
798         for (unsigned i = 0; i < cols; ++i)
799             std::cout << ((i % 5 == 0) ? '+' : '-');
800         std::cout << std::endl << "  ";
801         for (unsigned i = 0; i < cols; ++i)
802         {
803             unsigned c = i * (length(options.avrgQuality) - 1) / (cols - 1);
804             std::cout.width(5);
805             if (i % 5 == 0)
806                 std::cout << c + 1;
807         }
808         std::cout << std::endl;
809     }
810 
811     return seqCount > 0;
812 }
813 
814 //////////////////////////////////////////////////////////////////////////////
815 // Read the first sequence of a multi-sequence file
816 // and return its length
817 inline int estimateReadLength(SeqFileIn &seqFile)
818 {
819 	if (atEnd(seqFile))
820 		return 0;
821 
822     typedef String<char, Array<1000> > TBuffer;
823 
824     // read chunk into buffer
825     TBuffer buffer;
826     resize(buffer, capacity(buffer));
827     size_t len = seqFile.stream.readsome(&buffer[0], length(buffer));
828     for (size_t i = 0; i < len; ++i)
829         seqFile.stream.unget();
830     resize(buffer, len);
831 
832     // parse record from buffer
833     DirectionIterator<TBuffer, Input>::Type iter = directionIterator(buffer, Input());
834     CharString fastaId, seq;
835     readRecord(fastaId, seq, iter, seqFile.format);
836     return length(seq);
837 }
838 
839 // Comparators for RazerS1-style matches.
840 
841 // TODO(holtgrew): Slightly different comparators than in previous RazerS 3 version, add back the additional checks?
842 
843 template <typename TReadMatch>
844 struct LessBeginPos :
845     public std::binary_function<TReadMatch, TReadMatch, bool>
846 {
847     inline bool operator()(TReadMatch const & a, TReadMatch const & b) const
848     {
849         // genome position and orientation
850         if (a.contigId < b.contigId) return true;
851         if (a.contigId > b.contigId) return false;
852 
853         if (a.beginPos < b.beginPos) return true;
854 
855         return false;
856     }
857 
858 };
859 
860 template <typename TReadMatch>
861 struct LessRNoBeginPos :
862     public std::binary_function<TReadMatch, TReadMatch, bool>
863 {
864     inline bool operator()(TReadMatch const & a, TReadMatch const & b) const
865     {
866         // read number
867         if (a.readId < b.readId) return true;
868         if (a.readId > b.readId) return false;
869 
870         // genome position and orientation
871         if (a.contigId < b.contigId) return true;
872         if (a.contigId > b.contigId) return false;
873 
874         if (a.beginPos < b.beginPos) return true;
875         if (a.beginPos > b.beginPos) return false;
876 
877         if (a.orientation == '-') return false;
878         if (b.orientation == '-') return true;
879 
880         if (a.orientation < b.orientation) return true;
881         if (a.orientation > b.orientation) return false;
882 
883         // quality
884         if (a.score > b.score) return true;
885         if (b.score > a.score) return false;
886 
887         if (a.endPos > b.endPos) return true;
888 
889         return false;
890     }
891 
892 };
893 
894 template <typename TReadMatch>
895 struct LessRNoBeginPosMP :
896     public std::binary_function<TReadMatch, TReadMatch, bool>
897 {
898     inline bool operator()(TReadMatch const & a, TReadMatch const & b) const
899     {
900         // read number
901         if (a.readId < b.readId) return true;
902         if (a.readId > b.readId) return false;
903 
904         // genome position and orientation
905         if (a.contigId < b.contigId) return true;
906         if (a.contigId > b.contigId) return false;
907 
908         if (a.beginPos < b.beginPos) return true;
909         if (a.beginPos > b.beginPos) return false;
910 
911         if (a.orientation == '-') return false;
912         if (b.orientation == '-') return true;
913 
914         if (a.orientation < b.orientation) return true;
915         if (a.orientation > b.orientation) return false;
916 
917         // quality
918         if (a.pairScore > b.pairScore) return true;
919         if (a.pairScore < b.pairScore) return false;
920 
921         if (a.libDiff < b.libDiff) return true;
922         if (a.libDiff > b.libDiff) return false;
923 
924         if (a.endPos > b.endPos) return true;
925 
926         return false;
927     }
928 
929 };
930 
931 // ... to sort matches and remove duplicates with equal gEnd
932 template <typename TReadMatch>
933 struct LessRNoEndPos :
934     public std::binary_function<TReadMatch, TReadMatch, bool>
935 {
936     inline bool operator()(TReadMatch const & a, TReadMatch const & b) const
937     {
938         // read number
939         if (a.readId < b.readId) return true;
940         if (a.readId > b.readId) return false;
941 
942         // genome position and orientation
943         if (a.contigId < b.contigId) return true;
944         if (a.contigId > b.contigId) return false;
945 
946         if (a.endPos   < b.endPos) return true;
947         if (a.endPos   > b.endPos) return false;
948 
949         if (a.orientation == '-') return false;
950         if (b.orientation == '-') return true;
951 
952         if (a.orientation < b.orientation) return true;
953         if (a.orientation > b.orientation) return false;
954 
955         // quality
956         if (a.score > b.score) return true;
957         if (b.score > a.score) return false;
958 
959         if (a.beginPos < b.beginPos) return true;
960 
961         return false;
962     }
963 
964 };
965 
966 template <typename TReadMatch>
967 struct LessRNoEndPosMP :
968     public std::binary_function<TReadMatch, TReadMatch, bool>
969 {
970     int libSize;
971     LessRNoEndPosMP(int _libSize) :
972         libSize(_libSize) {}
973 
974     inline bool operator()(TReadMatch const & a, TReadMatch const & b) const
975     {
976         // read number
977         if (a.readId < b.readId) return true;
978         if (a.readId > b.readId) return false;
979 
980         // genome position and orientation
981         if (a.contigId < b.contigId) return true;
982         if (a.contigId > b.contigId) return false;
983 
984         if (a.endPos   < b.endPos) return true;
985         if (a.endPos   > b.endPos) return false;
986 
987         if (a.orientation == '-') return false;
988         if (b.orientation == '-') return true;
989 
990         if (a.orientation < b.orientation) return true;
991         if (a.orientation > b.orientation) return false;
992 
993         // quality
994         if (a.pairScore > b.pairScore) return true;
995         if (a.pairScore < b.pairScore) return false;
996 
997         if (a.libDiff < b.libDiff) return true;
998         if (a.libDiff > b.libDiff) return false;
999 
1000         if (a.beginPos < b.beginPos) return true;
1001 
1002         return false;
1003     }
1004 
1005 };
1006 
1007 template <typename TReadMatch>
1008 struct LessScoreBackport :
1009     public std::binary_function<TReadMatch, TReadMatch, bool>
1010 {
1011     inline bool operator()(TReadMatch const & a, TReadMatch const & b) const
1012     {
1013         // read number
1014         if (a.readId < b.readId) return true;
1015         if (a.readId > b.readId) return false;
1016 
1017         // quality
1018         if (a.orientation == '-') return false;
1019         if (b.orientation == '-') return true;
1020 
1021         if (a.score > b.score) return true;
1022         if (b.score > a.score) return false;
1023 
1024         // Sort by leftmost begin pos, longest end pos on ties.
1025         if (a.contigId < b.contigId) return true;
1026         if (a.contigId > b.contigId) return false;
1027 
1028         if (a.orientation < b.orientation) return true;
1029         if (a.orientation > b.orientation) return false;
1030 
1031         if (a.beginPos < b.beginPos) return true;
1032         if (a.beginPos > b.beginPos) return false;
1033 
1034         if (a.endPos < b.endPos) return false;
1035         if (a.endPos > b.endPos) return true;
1036 
1037         return false;
1038     }
1039 
1040 };
1041 
1042 // TODO(holtgrew): Merge with above.
1043 
1044 template <typename TReadMatch>
1045 struct LessScoreBackport3Way :
1046     public std::binary_function<TReadMatch, TReadMatch, int>
1047 {
1048     inline int operator()(TReadMatch const & a, TReadMatch const & b) const
1049     {
1050         // read number
1051         if (a.readId < b.readId) return -1;
1052         if (a.readId > b.readId) return 1;
1053 
1054         // quality
1055         if (a.orientation != '-' || b.orientation != '-')
1056         {
1057             if (a.orientation == '-') return -1;
1058             if (b.orientation == '-') return 1;
1059         }
1060 
1061         if (a.score > b.score) return -1;
1062         if (b.score > a.score) return 1;
1063 
1064         // Sort by leftmost begin pos, longest end pos on ties.
1065         if (a.contigId < b.contigId) return -1;
1066         if (a.contigId > b.contigId) return 1;
1067 
1068         if (a.orientation < b.orientation) return -1;
1069         if (a.orientation > b.orientation) return 1;
1070 
1071         if (a.beginPos < b.beginPos) return -1;
1072         if (a.beginPos > b.beginPos) return 1;
1073 
1074         if (a.endPos < b.endPos) return 1;
1075         if (a.endPos > b.endPos) return -1;
1076 
1077         return 0;
1078     }
1079 
1080 };
1081 
1082 
1083 // Comparators for Fragment Store
1084 
1085 template <typename TAlignedReadStore, typename TLessScore>
1086 struct LessRNoGPos :
1087     public std::binary_function<typename Value<TAlignedReadStore>::Type, typename Value<TAlignedReadStore>::Type, bool>
1088 {
1089     typedef typename Value<TAlignedReadStore>::Type TAlignedRead;
1090     TLessScore lessScore;
1091 
1092     LessRNoGPos(TLessScore const & _lessScore) :
1093         lessScore(_lessScore) {}
1094 
1095     inline bool operator()(TAlignedRead const & a, TAlignedRead const & b) const
1096     {
1097         // read number
1098         if (a.readId < b.readId) return true;
1099         if (a.readId > b.readId) return false;
1100 
1101         // contig number
1102         if (a.contigId < b.contigId) return true;
1103         if (a.contigId > b.contigId) return false;
1104 
1105         // beginning position
1106         typename TAlignedRead::TPos ba = _min(a.beginPos, a.endPos);
1107         typename TAlignedRead::TPos bb = _min(b.beginPos, b.endPos);
1108         if (ba < bb) return true;
1109         if (ba > bb) return false;
1110 
1111         // orientation
1112         bool oa = a.beginPos < a.endPos;
1113         bool ob = b.beginPos < b.endPos;
1114         if (oa != ob) return oa;
1115 
1116         int result = lessScore.compare(a, b);
1117         if (result == 0)
1118         {
1119             // prefer reads that support more of the reference
1120             return _max(a.beginPos, a.endPos) > _max(b.beginPos, b.endPos);
1121         }
1122         return result == -1;
1123     }
1124 
1125 };
1126 
1127 // ... to sort matches and remove duplicates with equal gEnd
1128 template <typename TAlignedReadStore, typename TLessScore>
1129 struct LessRNoGEndPos :
1130     public std::binary_function<typename Value<TAlignedReadStore>::Type, typename Value<TAlignedReadStore>::Type, bool>
1131 {
1132     typedef typename Value<TAlignedReadStore>::Type TAlignedRead;
1133     TLessScore lessScore;
1134 
1135     LessRNoGEndPos(TLessScore const & _lessScore) :
1136         lessScore(_lessScore) {}
1137 
1138     inline bool operator()(
1139         typename Value<TAlignedReadStore>::Type const & a,
1140         typename Value<TAlignedReadStore>::Type const & b) const
1141     {
1142         // read number
1143         if (a.readId < b.readId) return true;
1144         if (a.readId > b.readId) return false;
1145 
1146         // contig number
1147         if (a.contigId < b.contigId) return true;
1148         if (a.contigId > b.contigId) return false;
1149 
1150         // end position
1151         typename TAlignedRead::TPos ea = _max(a.beginPos, a.endPos);
1152         typename TAlignedRead::TPos eb = _max(b.beginPos, b.endPos);
1153         if (ea < eb) return true;
1154         if (ea > eb) return false;
1155 
1156         // orientation
1157         bool oa = a.beginPos < a.endPos;
1158         bool ob = b.beginPos < b.endPos;
1159         if (oa != ob) return oa;
1160 
1161         int result = lessScore.compare(a, b);
1162         if (result == 0)
1163         {
1164             // prefer reads that support more of the reference
1165             return _min(a.beginPos, a.endPos) < _min(b.beginPos, b.endPos);
1166         }
1167         return result == -1;
1168     }
1169 
1170 };
1171 
1172 template <typename TAlignedReadStore, typename TAlignedReadQualityStore, typename TRazerSMode>
1173 struct LessScore :
1174     public std::binary_function<typename Value<TAlignedReadStore>::Type, typename Value<TAlignedReadStore>::Type, bool>
1175 {
1176     TAlignedReadQualityStore & qualStore;
1177 
1178     LessScore(TAlignedReadQualityStore & _qualStore) :
1179         qualStore(_qualStore) {}
1180 
1181     inline int compare(
1182         typename Value<TAlignedReadStore>::Type const & a,
1183         typename Value<TAlignedReadStore>::Type const & b) const
1184     {
1185         typedef typename Value<TAlignedReadStore>::Type TAlignedRead;
1186 
1187         // read number
1188         if (a.readId < b.readId) return -1;
1189         if (a.readId > b.readId) return 1;
1190 
1191         // quality
1192         if (a.id == TAlignedRead::INVALID_ID) return 1;
1193         if (b.id == TAlignedRead::INVALID_ID) return -1;
1194 
1195         typename GetValue<TAlignedReadQualityStore>::Type qa = getValue(qualStore, a.id);
1196         typename GetValue<TAlignedReadQualityStore>::Type qb = getValue(qualStore, b.id);
1197         if (qa.pairScore > qb.pairScore) return -1;
1198         if (qa.pairScore < qb.pairScore) return 1;
1199 
1200         if (qa.score > qb.score) return -1;
1201         if (qb.score > qa.score) return 1;
1202 
1203         return 0;
1204     }
1205 
1206     inline bool operator()(
1207         typename Value<TAlignedReadStore>::Type const & a,
1208         typename Value<TAlignedReadStore>::Type const & b) const
1209     {
1210         return compare(a, b) == -1;
1211     }
1212 
1213 };
1214 
1215 // longest prefix mapping
1216 template <typename TAlignedReadStore, typename TAlignedReadQualityStore, typename TGapMode, typename TScoreMode, typename TMatchNPolicy>
1217 struct LessScore<TAlignedReadStore, TAlignedReadQualityStore, RazerSMode<RazerSPrefix, TGapMode, TScoreMode, TMatchNPolicy> >:
1218     public std::binary_function<typename Value<TAlignedReadStore>::Type, typename Value<TAlignedReadStore>::Type, bool>
1219 {
1220     TAlignedReadQualityStore & qualStore;
1221 
1222     LessScore(TAlignedReadQualityStore & _qualStore) :
1223         qualStore(_qualStore) {}
1224 
1225     inline int compare(
1226         typename Value<TAlignedReadStore>::Type const & a,
1227         typename Value<TAlignedReadStore>::Type const & b) const
1228     {
1229         typedef typename Value<TAlignedReadStore>::Type TAlignedRead;
1230 
1231         // read number
1232         if (a.readId < b.readId) return -1;
1233         if (a.readId > b.readId) return 1;
1234 
1235         // quality
1236         if (a.id == TAlignedRead::INVALID_ID) return 1;
1237         if (b.id == TAlignedRead::INVALID_ID) return -1;
1238 
1239         typename GetValue<TAlignedReadQualityStore>::Type qa = getValue(qualStore, a.id);
1240         typename GetValue<TAlignedReadQualityStore>::Type qb = getValue(qualStore, b.id);
1241         if (qa.errors < qb.errors) return -1;
1242         if (qa.errors > qb.errors) return 1;
1243 
1244         if (qa.score > qb.score) return -1;
1245         if (qb.score > qa.score) return 1;
1246 
1247         return 0;
1248     }
1249 
1250     inline bool operator()(
1251         typename Value<TAlignedReadStore>::Type const & a,
1252         typename Value<TAlignedReadStore>::Type const & b) const
1253     {
1254         return compare(a, b) == -1;
1255     }
1256 
1257 };
1258 
1259 //////////////////////////////////////////////////////////////////////////////
1260 
1261 template <typename TAlignedReadQualityStore, typename TRazerSMode>
1262 struct BinFunctorDefault
1263 {
1264     TAlignedReadQualityStore & qualStore;
1265 
1266     BinFunctorDefault(TAlignedReadQualityStore & _qualStore) :
1267         qualStore(_qualStore) {}
1268 
1269     template <typename TAlignedRead>
1270     inline int operator()(TAlignedRead & alignedRead) const
1271     {
1272         return qualStore[alignedRead.id].errors;
1273     }
1274 
1275 };
1276 
1277 
1278 //////////////////////////////////////////////////////////////////////////////
1279 // Mark duplicate matches for deletion
1280 template <typename TMatches, typename TIterator, typename TOptions, typename TRazerSMode>
1281 void maskDuplicates(TMatches &, TIterator const itBegin, TIterator const itEnd, TOptions & options, TRazerSMode)
1282 {
1283     typedef typename Value<TMatches>::Type  TMatch;
1284     typedef typename TMatch::TContigPos     TContigPos;
1285 
1286     TContigPos  beginPos, endPos;
1287     unsigned    contigId, readId;
1288     char        orientation;
1289     unsigned    masked;
1290     TIterator   it;
1291     double      beginTime = sysTime();
1292 
1293     //////////////////////////////////////////////////////////////////////////////
1294     // remove matches with equal ends
1295 
1296     // we can skip one sort step in no-gap mode and with pigeonhole filter
1297     if (IsSameType<typename TRazerSMode::TGapMode, RazerSGapped>::VALUE || options.threshold != 0)
1298     {
1299 #ifdef RAZERS_PROFILE
1300         timelineBeginTask(TASK_SORT);
1301 #endif
1302         if (options.libraryLength >= 0)
1303             std::sort(itBegin, itEnd, LessRNoEndPosMP<TMatch>(options.libraryLength));
1304         else
1305             std::sort(itBegin, itEnd, LessRNoEndPos<TMatch>());
1306 #ifdef RAZERS_PROFILE
1307         timelineEndTask(TASK_SORT);
1308 #endif
1309 
1310         beginPos = -1;
1311         endPos = -1;
1312         contigId = TMatch::INVALID_ID;
1313         readId = TMatch::INVALID_ID;
1314         orientation = '-';
1315         masked = 0;
1316         it = itBegin;
1317 
1318         for (; it != itEnd; ++it)
1319         {
1320             if ((*it).pairMatchId != TMatch::INVALID_ID && (it->readId & 1) != 0)
1321                 continue;                                                                   // remove only single reads or left mates
1322 
1323             TContigPos itEndPos = _max((*it).beginPos, (*it).endPos);
1324             if (endPos == itEndPos && orientation == (*it).orientation &&
1325                 contigId == (*it).contigId && readId == (*it).readId)
1326             {
1327                 (*it).orientation = '-';
1328                 masked += 1;
1329                 continue;
1330             }
1331             readId = (*it).readId;
1332             contigId = (*it).contigId;
1333             endPos = itEndPos;
1334             orientation = (*it).orientation;
1335         }
1336     }
1337 
1338     //////////////////////////////////////////////////////////////////////////////
1339     // remove matches with equal begins
1340 
1341 #ifdef RAZERS_PROFILE
1342     timelineBeginTask(TASK_SORT);
1343 #endif  // #ifdef RAZERS_PROFILE
1344     if (options.libraryLength >= 0)
1345         std::sort(itBegin, itEnd, LessRNoBeginPosMP<TMatch>());
1346     else
1347         std::sort(itBegin, itEnd, LessRNoBeginPos<TMatch>());
1348     // std::cerr << "(SORTING " << itEnd-itBegin << " MATCHES)";
1349     // sortAlignedReads(store.alignedReadStore, TLessBeginPos(TLessScore(store.alignQualityStore)));
1350 #ifdef RAZERS_PROFILE
1351     timelineEndTask(TASK_SORT);
1352 #endif  // #ifdef RAZERS_PROFILE
1353 
1354     beginPos = -1;
1355     endPos = -1;
1356     contigId = TMatch::INVALID_ID;
1357     readId = TMatch::INVALID_ID;
1358     orientation = '-';
1359     masked = 0;
1360     it = itBegin;
1361 
1362     for (; it != itEnd; ++it)
1363     {
1364         if ((*it).orientation == '-')
1365             continue;
1366         if ((*it).pairMatchId != TMatch::INVALID_ID && (it->readId & 1) != 0)
1367             continue;                                                                   // remove only single reads or left mates
1368 
1369         TContigPos itBeginPos = _min((*it).beginPos, (*it).endPos);
1370         if (beginPos == itBeginPos && readId == (*it).readId &&
1371             contigId == (*it).contigId && orientation == ((*it).beginPos < (*it).endPos))
1372         {
1373             (*it).orientation = '-';
1374             masked += 1;
1375             continue;
1376         }
1377         readId = (*it).readId;
1378         contigId = (*it).contigId;
1379         beginPos = itBeginPos;
1380         orientation = (*it).beginPos < (*it).endPos;
1381     }
1382 
1383 #ifdef RAZERS_DEFER_COMPACTION
1384 #ifdef RAZERS_PROFILE
1385     timelineBeginTask(TASK_SORT);
1386 #endif  // #ifdef RAZERS_PROFILE
1387     //////////////////////////////////////////////////////////////////////////////
1388     // sort matches by begin position when using defered compaction
1389     std::sort(itBegin, itEnd, LessBeginPos<TMatch>());
1390 #ifdef RAZERS_PROFILE
1391     timelineEndTask(TASK_SORT);
1392 #endif  // #ifdef RAZERS_PROFILE
1393 #endif  // #ifdef RAZERS_DEFER_COMPACTION
1394 
1395     options.timeMaskDuplicates += sysTime() - beginTime;
1396     if (options._debugLevel >= 2)
1397         fprintf(stderr, " [%u matches masked]", masked);
1398 }
1399 
1400 template <typename TMatches, typename TOptions, typename TRazerSMode>
1401 void maskDuplicates(TMatches & matches, TOptions & options, TRazerSMode const & mode)
1402 {
1403     maskDuplicates(matches, begin(matches, Standard()), end(matches, Standard()), options, mode);
1404 }
1405 
1406 /*
1407 //////////////////////////////////////////////////////////////////////////////
1408 // Count matches for each number of errors
1409 template <typename TFragmentStore, typename TCounts, typename TBinFunctor, typename TRazerSMode>
1410 void countMatches(TFragmentStore &store, TCounts &cnt, TBinFunctor &binF, TRazerSMode)
1411 {
1412     typedef typename TFragmentStore::TAlignedReadStore				TAlignedReadStore;
1413     typedef typename TFragmentStore::TAlignQualityStore				TAlignQualityStore;
1414 
1415     typedef typename Value<TAlignedReadStore>::Type					TAlignedRead;
1416     typedef typename Iterator<TAlignedReadStore, Standard>::Type	TIterator;
1417     typedef typename Value<TCounts>::Type							TRow;
1418     typedef typename Value<TRow>::Type								TValue;
1419 
1420     sortAlignedReads(store.alignedReadStore, LessScore<TAlignedReadStore, TAlignQualityStore, TRazerSMode>(store.alignQualityStore));
1421 
1422     TIterator it = begin(store.alignedReadStore, Standard());
1423     TIterator itEnd = end(store.alignedReadStore, Standard());
1424 
1425     unsigned readId = TAlignedRead::INVALID_ID;
1426     int lastBin = -1;
1427     int64_t count = 0;
1428 
1429     String<TValue> row, empty;
1430     for (; it != itEnd; ++it)
1431     {
1432         if ((*it).id == TAlignedRead::INVALID_ID) continue;
1433         int bin = binF((*it).id);
1434 
1435         if (readId == (*it).readId)
1436         {
1437             if (lastBin == bin)
1438                 ++count;
1439             else
1440             {
1441                 appendValue(row, TValue(bin, count), Generous());
1442                 lastBin = bin;
1443                 count = 1;
1444             }
1445         }
1446         else
1447         {
1448             while (length(cnt) < readId)
1449                 appendValue(cnt, empty, Generous());
1450             appendValue(cnt, row, Generous());
1451             clear(row);
1452             readId = (*it).readId;
1453             lastBin = bin;
1454             count = 1;
1455         }
1456     }
1457     while (length(cnt) < readId)
1458         appendValue(cnt, empty, Generous());
1459     appendValue(cnt, row, Generous());
1460 }
1461 */
1462 //////////////////////////////////////////////////////////////////////////////
1463 // Count matches for each number of errors
1464 template <typename TFragmentStore, typename TCounts, typename TRazerSMode>
1465 void countMatches(TFragmentStore & store, TCounts & cnt, TRazerSMode const &)
1466 {
1467     typedef typename TFragmentStore::TAlignedReadStore              TAlignedReadStore;
1468     typedef typename TFragmentStore::TAlignQualityStore             TAlignQualityStore;
1469 
1470     typedef typename Value<TAlignedReadStore>::Type                 TAlignedRead;
1471     typedef typename Iterator<TAlignedReadStore, Standard>::Type    TIterator;
1472     typedef typename Value<TCounts>::Type                           TRow;
1473     typedef typename Value<TRow>::Type                              TValue;
1474 
1475     TIterator it = begin(store.alignedReadStore, Standard());
1476     TIterator itEnd = end(store.alignedReadStore, Standard());
1477 
1478     unsigned readId = TAlignedRead::INVALID_ID;
1479     short errors = -1;
1480     int64_t count = 0;
1481     int64_t maxVal = std::numeric_limits<TValue>::max();
1482 
1483 #ifdef RAZERS_PROFILE
1484     timelineBeginTask(TASK_SORT);
1485 #endif  // #ifdef RAZERS_PROFILE
1486     std::sort(begin(store.alignedReadStore, Standard()), end(store.alignedReadStore, Standard()), LessScore<TAlignedReadStore, TAlignQualityStore, TRazerSMode>(store.alignQualityStore));
1487     //sortAlignedReads(store.alignedReadStore, LessScore<TAlignedReadStore, TAlignQualityStore, TRazerSMode>(store.alignQualityStore));
1488 #ifdef RAZERS_PROFILE
1489     timelineEndTask(TASK_SORT);
1490 #endif  // #ifdef RAZERS_PROFILE
1491 
1492     for (; it != itEnd; ++it)
1493     {
1494         if ((*it).id == TAlignedRead::INVALID_ID)
1495             continue;
1496         if (readId == (*it).readId && errors == store.alignQualityStore[(*it).id].errors)
1497             ++count;
1498         else
1499         {
1500             if (readId != TAlignedRead::INVALID_ID && (unsigned)errors < length(cnt))
1501                 cnt[errors][readId] = (maxVal < count) ? (TValue)maxVal : (TValue)count;
1502             readId = (*it).readId;
1503             errors = store.alignQualityStore[(*it).id].errors;
1504             count = 1;
1505         }
1506     }
1507     if (readId != TAlignedRead::INVALID_ID && (unsigned)errors < length(cnt))
1508         cnt[errors][readId] = (TValue)count;
1509 }
1510 
1511 //////////////////////////////////////////////////////////////////////////////
1512 
1513 template <typename TFilterPattern, typename TReadNo, typename TMaxErrors>
1514 inline void
1515 setMaxErrors(TFilterPattern &, TReadNo, TMaxErrors)
1516 {}
1517 
1518 template <typename TIndex, typename TPigeonholeSpec, typename TReadNo, typename TMaxErrors>
1519 inline void
1520 setMaxErrors(Pattern<TIndex, Pigeonhole<TPigeonholeSpec> > & filterPattern, TReadNo readNo, TMaxErrors maxErrors)
1521 {
1522     if (maxErrors < 0)
1523         maskPatternSequence(filterPattern, readNo, maxErrors >= 0);
1524 }
1525 
1526 template <typename TIndex, typename TSwiftSpec, typename TReadNo, typename TMaxErrors>
1527 inline void
1528 setMaxErrors(Pattern<TIndex, Swift<TSwiftSpec> > & filterPattern, TReadNo readNo, TMaxErrors maxErrors)
1529 {
1530     // if (readNo==643)
1531     //  std::cout<<"dman"<<std::endl;
1532     int minT = _qgramLemma(filterPattern, readNo, maxErrors);
1533     if (minT > 1)
1534     {
1535 //		std::cout<<" read:"<<readNo<<" newThresh:"<<minT;
1536         if (maxErrors < 0)
1537             minT = std::numeric_limits<int>::max();
1538         setMinThreshold(filterPattern, readNo, (unsigned)minT);
1539     }
1540 }
1541 
1542 //////////////////////////////////////////////////////////////////////////////
1543 // Remove low quality matches
1544 template <
1545     typename TMatches,
1546     typename TCounts,
1547     typename TSpec,
1548     typename TAlignMode,
1549     typename TGapMode,
1550     typename TScoreMode,
1551     typename TSwift,
1552     typename TMatchNPolicy
1553     >
1554 void compactMatches(
1555     TMatches & matches,
1556     TCounts &,
1557     RazerSCoreOptions<TSpec> & options,
1558     RazerSMode<TAlignMode, TGapMode, TScoreMode, TMatchNPolicy> const &,
1559     TSwift & swift,
1560     CompactMatchesMode compactMode)
1561 {
1562     // fprintf(stderr, "[compact]");
1563     double beginTime = sysTime();
1564     typedef typename Value<TMatches>::Type              TMatch;
1565     typedef typename Iterator<TMatches, Standard>::Type TIterator;
1566     //typedef RazerSMode<TAlignMode, TGapMode, TScoreMode, TMatchNPolicy> TRazerSMode;
1567 
1568     unsigned readNo = -1;
1569     unsigned hitCount = 0;
1570     unsigned hitCountCutOff = options.maxHits;
1571     int scoreCutOff = std::numeric_limits<int>::min();
1572     int scoreRangeBest = (IsSameType<TAlignMode, RazerSGlobal>::VALUE && !IsSameType<TScoreMode, RazerSScore>::VALUE) ? -(int)options.scoreDistanceRange : std::numeric_limits<int>::max();
1573     ignoreUnusedVariableWarning(scoreRangeBest);
1574     ignoreUnusedVariableWarning(compactMode);
1575 
1576 #ifdef RAZERS_PROFILE
1577     timelineBeginTask(TASK_SORT);
1578 #endif  // #ifdef RAZERS_PROFILE
1579 #ifdef RAZERS_EXTERNAL_MATCHES
1580     if (compactMode == COMPACT_FINAL_EXTERNAL)
1581     {
1582         typedef Pipe<TMatches, Source<> > TSource;
1583         typedef LessScoreBackport3Way<TMatch> TLess;
1584         typedef Pool<TMatch, SorterSpec<SorterConfigSize<TLess, typename Size<TSource>::Type> > > TSorterPool;
1585 
1586         TSource source(matches);
1587         TSorterPool sorter;
1588         sorter << source;
1589         matches << sorter;
1590 
1591         for (unsigned i = 1; i < length(matches); ++i)
1592             SEQAN_ASSERT_LEQ(TLess() (matches[i - 1], matches[i]), 0);
1593     }
1594     else
1595     {
1596 #endif  // #ifdef RAZERS_EXTERNAL_MATCHES
1597     std::sort(begin(matches, Standard()), end(matches, Standard()), LessScoreBackport<TMatch>());
1598     // sortAlignedReads(store.alignedReadStore, LessScore<TAlignedReadStore, TAlignQualityStore, TRazerSMode>(store.alignQualityStore));
1599 #ifdef RAZERS_EXTERNAL_MATCHES
1600 }
1601 
1602 #endif  // #ifdef RAZERS_EXTERNAL_MATCHES
1603 #ifdef RAZERS_PROFILE
1604     timelineEndTask(TASK_SORT);
1605 #endif  // #ifdef RAZERS_PROFILE
1606 
1607     TIterator it = begin(matches, Standard());
1608     TIterator itEnd = end(matches, Standard());
1609     TIterator dit = it;
1610     TIterator ditBeg = it;
1611     // fprintf(stderr, "[%u matches to compact]", unsigned(itEnd - it));
1612     unsigned disabled = 0;
1613 
1614     for (; it != itEnd; ++it)
1615     {
1616         if ((*it).orientation == '-')
1617             continue;
1618         int score = (*it).score;
1619         int errors = -(*it).score;
1620         ignoreUnusedVariableWarning(errors);
1621 
1622         //if (readNo == 643) std::cerr <<"["<<score<<","<<errors<<"] "<<std::flush;
1623         if (readNo == (*it).readId && (*it).pairMatchId == TMatch::INVALID_ID)  // Only compact unpaired matches.
1624         {
1625             if (score <= scoreCutOff)
1626             {
1627 //    std::cout<<"decreased errCutOff["<<readNo<<"] from "<<(unsigned)options.errorCutOff[readNo];
1628                 options.errorCutOff[readNo] = -scoreCutOff;
1629 //    std::cout<<"to "<<(unsigned)options.errorCutOff[readNo]<<std::endl;
1630                 setMaxErrors(swift, readNo, -scoreCutOff - 1);
1631                 while (it != itEnd && readNo == (*it).readId)
1632                     ++it;
1633                 --it;
1634                 continue;
1635             }
1636             if (++hitCount >= hitCountCutOff)
1637             {
1638 #ifdef RAZERS_MASK_READS
1639                 if (hitCount == hitCountCutOff)
1640                 {
1641                     // we have enough, now look for better matches
1642                     if (options.purgeAmbiguous && (options.scoreDistanceRange == 0 || score > scoreRangeBest))
1643                     {
1644                         // std::cerr << "PURGED " << readNo << std::endl;
1645 //    std::cout<<"decreased errCutOff["<<readNo<<"] from "<<(unsigned)options.errorCutOff[readNo];
1646                         options.errorCutOff[readNo] = 0;
1647 //    std::cout<<"to "<<(unsigned)options.errorCutOff[readNo]<<std::endl;
1648                         setMaxErrors(swift, readNo, -1);
1649                         ++disabled;
1650                         // if (options._debugLevel >= 2)
1651                         //  std::cerr << "(read #" << readNo << " disabled)";
1652                     }
1653                     else
1654                     // we only need better matches
1655                     if (IsSameType<TScoreMode, RazerSErrors>::VALUE)
1656                     {
1657                         // std::cerr << "LIMITED " << readNo << std::endl;
1658 //    std::cout<<"decreased errCutOff["<<readNo<<"] from "<<(unsigned)options.errorCutOff[readNo];
1659                         options.errorCutOff[readNo] = errors;
1660 //    std::cout<<"to "<<(unsigned)options.errorCutOff[readNo]<<std::endl;
1661                         setMaxErrors(swift, readNo, errors - 1);
1662                         if (errors == 0)
1663                             ++disabled;
1664                         // if (errors == 0 && options._debugLevel >= 2)
1665                         //  std::cerr << "(read #" << readNo << " disabled)";
1666                     }
1667 
1668                     if (options.purgeAmbiguous && (compactMode == COMPACT_FINAL || compactMode == COMPACT_FINAL_EXTERNAL))
1669                     {
1670                         if (options.scoreDistanceRange == 0 || score > scoreRangeBest || compactMode == COMPACT_FINAL || compactMode == COMPACT_FINAL_EXTERNAL)
1671                         {
1672                             dit = ditBeg;
1673                         }
1674                         else
1675                         {
1676                             *dit = *it;
1677                             ++dit;
1678                         }
1679                     }
1680                 }
1681 #endif
1682                 continue;
1683             }
1684         }
1685         else
1686         {
1687             readNo = (*it).readId;
1688             hitCount = 0;
1689             if (options.scoreDistanceRange > 0)
1690                 scoreCutOff = score - options.scoreDistanceRange;
1691             ditBeg = dit;
1692         }
1693         *dit = *it;
1694         ++dit;
1695     }
1696     unsigned origSize = length(matches);
1697     resize(matches, dit - begin(matches, Standard()));
1698     // compactAlignedReads(store);
1699     options.timeCompactMatches += sysTime() - beginTime;
1700     // fprintf(stderr, "[compacted in %f s]", endTime - beginTime);
1701     unsigned newSize = length(matches);
1702     if (options._debugLevel >= 2)
1703     {
1704         fprintf(stderr, " [%u matches removed]", unsigned(origSize - newSize));
1705         if (disabled > 0)
1706             fprintf(stderr, " [%u reads disabled]", disabled);
1707     }
1708 }
1709 
1710 //////////////////////////////////////////////////////////////////////////////
1711 // Remove low quality matches
1712 template <
1713     typename TMatches,
1714     typename TCounts,
1715     typename TSpec,
1716     typename TGapMode,
1717     typename TSwift,
1718     typename TMatchNPolicy
1719     >
1720 void compactMatches(
1721     TMatches & matches,
1722     TCounts & cnts,
1723     RazerSCoreOptions<TSpec> &,
1724     RazerSMode<RazerSGlobal, TGapMode, RazerSQuality<RazerSMAQ>, TMatchNPolicy> const &,
1725     TSwift &,
1726     CompactMatchesMode compactMode)
1727 {
1728     typedef typename Value<TMatches>::Type                      TMatch;
1729     typedef typename Iterator<TMatches, Standard>::Type         TIterator;
1730     //typedef RazerSMode<RazerSGlobal, TGapMode, RazerSQuality<RazerSMAQ>, TMatchNPolicy> TRazerSMode;
1731 
1732     unsigned readNo = -1;
1733 
1734 #ifdef RAZERS_PROFILE
1735     timelineBeginTask(TASK_SORT);
1736 #endif  // #ifdef RAZERS_PROFILE
1737     std::sort(
1738         begin(matches, Standard()),
1739         end(matches, Standard()),
1740         LessScoreBackport<TMatch>());
1741     // sortAlignedReads(store.alignedReadStore, LessScore<TAlignedReadStore, TAlignQualityStore, TRazerSMode>(store.alignQualityStore));
1742 #ifdef RAZERS_PROFILE
1743     timelineEndTask(TASK_SORT);
1744 #endif  // #ifdef RAZERS_PROFILE
1745 
1746     TIterator it = begin(matches, Standard());
1747     TIterator itEnd = end(matches, Standard());
1748     TIterator dit = it;
1749 
1750     //number of errors may not exceed 31!
1751     bool second = true;
1752     for (; it != itEnd; ++it)
1753     {
1754         if ((*it).orientation == '-')
1755             continue;
1756         if (readNo == (*it).readId)
1757         {
1758             //second best match
1759             if (second)
1760             {
1761                 second = false;
1762                 if ((cnts[1][(*it).readId] & 31)  > (*it).editDist)
1763                 {
1764                     //this second best match is better than any second best match before
1765                     cnts[1][(*it).readId] = (*it).editDist; // second best dist is this editDist
1766                     // count is 0 (will be updated if countFirstTwo)
1767                 }
1768                 if (compactMode == COMPACT_FINAL)
1769                     if ((cnts[1][(*it).readId] >> 5) != 2047)
1770                         cnts[1][(*it).readId] += 32;
1771             }
1772             else
1773             {
1774                 if ((*it).editDist <= (cnts[0][(*it).readId] & 31))
1775                     if (cnts[0][(*it).readId] >> 5 != 2047)
1776                         cnts[0][(*it).readId] += 32;
1777                 if ((*it).editDist <= (cnts[1][(*it).readId] & 31))
1778                     if ((cnts[1][(*it).readId] >> 5) != 2047)
1779                         cnts[1][(*it).readId] += 32;
1780                 continue;
1781             }
1782         }
1783         else //best match
1784         {
1785             second = true;
1786             readNo = (*it).readId;
1787             //cnts has 16bits, 11:5 for count:dist
1788             if ((cnts[0][(*it).readId] & 31)  > (*it).editDist)
1789             {
1790                 //this match is better than any match before
1791                 cnts[1][(*it).readId] = cnts[0][(*it).readId]; // best before is now second best
1792                 // (count will be updated when match is removed)
1793                 cnts[0][(*it).readId] = (*it).editDist; // best dist is this editDist
1794                 // count is 0 (will be updated if countFirstTwo)
1795             }
1796             if (compactMode == COMPACT_FINAL)
1797                 if ((cnts[0][(*it).readId] >> 5) != 2047)
1798                     cnts[0][(*it).readId] += 32;
1799             // shift 5 to the right, add 1, shift 5 to the left, and keep count
1800         }
1801         *dit = *it;
1802         ++dit;
1803     }
1804 
1805     resize(matches, dit - begin(matches, Standard()));
1806 }
1807 
1808 /* // fallback
1809 template <
1810     typename TFragmentStore,
1811     typename TCounts,
1812     typename TSpec,
1813     typename TAlignMode,
1814     typename TGapMode,
1815     typename TScoreMode,
1816     typename TSwift
1817 >
1818 void compactMatches(
1819     TFragmentStore &,
1820     TCounts &,
1821     RazerSCoreOptions<TSpec> &,
1822     RazerSMode<TAlignMode, TGapMode, TScoreMode> const,
1823     TSwift &,
1824     CompactMatchesMode)
1825 {
1826 }
1827 */
1828 
1829 //////////////////////////////////////////////////////////////////////////////
1830 // Best Hamming prefix verification
1831 template <
1832     typename TMatchVerifier,
1833     typename TGenome,
1834     typename TRead,
1835     typename TMatchNPolicy>
1836 inline bool
1837 matchVerify(
1838     TMatchVerifier & verifier,
1839     Segment<TGenome, InfixSegment> inf,                                 // potential match genome region
1840     unsigned readId,                                                    // read number
1841     TRead const & read,                                                 // read
1842     RazerSMode<RazerSPrefix, RazerSUngapped, RazerSErrors, TMatchNPolicy> const &)  // Hamming only
1843 {
1844     typedef Segment<TGenome, InfixSegment>                  TGenomeInfix;
1845     typedef typename Iterator<TGenomeInfix, Standard>::Type TGenomeIterator;
1846     typedef typename Iterator<TRead const, Standard>::Type  TReadIterator;
1847 
1848 //	unsigned maxErrors = (unsigned)(verifier.options->prefixSeedLength * verifier.options->errorRate);
1849     unsigned maxErrors = verifier.options->errorCutOff[readId];
1850     if (maxErrors == 0)
1851         return false;
1852 
1853     --maxErrors;
1854 
1855     // verify
1856     TReadIterator ritBeg    = begin(read, Standard());
1857     TReadIterator ritEnd    = end(read, Standard());
1858     unsigned ndlLength      = ritEnd - ritBeg;
1859 
1860     if (length(inf) < ndlLength)
1861         return false;
1862 
1863     TGenomeIterator git     = begin(inf, Standard());
1864     TGenomeIterator gitEnd  = end(inf, Standard()) - (ndlLength - 1);
1865 
1866     unsigned errorThresh = (verifier.oneMatchPerBucket) ? std::numeric_limits<unsigned>::max() : maxErrors;
1867     unsigned minErrors = maxErrors + 2;
1868     int bestHitLength = 0;
1869 
1870     for (; git < gitEnd; ++git)
1871     {
1872         unsigned errors = 0;
1873         TReadIterator r = ritBeg;
1874         TGenomeIterator g = git;
1875         for (; r != ritEnd; ++r, ++g)
1876             if ((verifier.options->compMask[ordValue(*g)] & verifier.options->compMask[ordValue(*r)]) == 0)
1877             {
1878                 if (r - ritBeg < (int)verifier.options->prefixSeedLength)   // seed
1879                 {
1880                     if (++errors > maxErrors)               // doesn't work for islands with errorThresh > maxErrors
1881                         break;
1882                 }
1883                 else
1884                     break;
1885             }
1886 
1887         if (errors < minErrors)
1888         {
1889             minErrors = errors;
1890             bestHitLength = r - ritBeg;
1891             verifier.m.beginPos = git - begin(host(inf), Standard());
1892         }
1893         else if (errors == minErrors && bestHitLength < r - ritBeg)
1894         {
1895             bestHitLength = r - ritBeg;
1896             verifier.m.beginPos = git - begin(host(inf), Standard());
1897         }
1898         else if (errorThresh < errors)
1899         {
1900             if (minErrors <= maxErrors)
1901             {
1902                 verifier.m.endPos = verifier.m.beginPos + bestHitLength;
1903                 verifier.q.pairScore = verifier.q.score = bestHitLength;
1904                 verifier.q.errors = minErrors;
1905 
1906                 if (maxErrors > minErrors + verifier.options->dRange)
1907                     maxErrors = minErrors + verifier.options->dRange;
1908                 minErrors = maxErrors + 2;
1909 
1910                 verifier.push();
1911             }
1912         }
1913     }
1914 
1915     if (minErrors <= maxErrors)
1916     {
1917         verifier.m.endPos = verifier.m.beginPos + bestHitLength;
1918         verifier.q.pairScore = verifier.q.score = bestHitLength;
1919         verifier.q.errors = minErrors;
1920         if (!verifier.oneMatchPerBucket)
1921         {
1922             if (maxErrors > minErrors + verifier.options->dRange)
1923                 maxErrors = minErrors + verifier.options->dRange;
1924             verifier.push();
1925             // update maximal errors per read
1926             unsigned cutOff = maxErrors + 1;
1927             if ((unsigned)verifier.options->errorCutOff[readId] > cutOff)
1928                 verifier.options->errorCutOff[readId] = cutOff;
1929         }
1930         return true;
1931     }
1932     return false;
1933 }
1934 
1935 template <typename TRazerSMode>
1936 struct UseQualityValues__
1937 {
1938     enum
1939     {
1940         VALUE = false
1941     };
1942 };
1943 template <typename TAlignMode, typename TGapMode, typename TSpec, typename TMatchNPolicy>
1944 struct UseQualityValues__<RazerSMode<TAlignMode, TGapMode, RazerSQuality<TSpec>, TMatchNPolicy> >
1945 {
1946     enum
1947     {
1948         VALUE = true
1949     };
1950 };
1951 
1952 //////////////////////////////////////////////////////////////////////////////
1953 // Hamming verification
1954 template <
1955     typename TMatchVerifier,
1956     typename TGenome,
1957     typename TRead,
1958     typename TScoreMode,
1959     typename TMatchNPolicy>
1960 inline bool
1961 matchVerify(
1962     TMatchVerifier & verifier,
1963     Segment<TGenome, InfixSegment> inf,                             // potential match genome region
1964     unsigned readId,                                            // read number
1965     TRead const & read,                                             // reads
1966     RazerSMode<RazerSGlobal, RazerSUngapped, TScoreMode, TMatchNPolicy> const &) // Semi-global, no gaps
1967 {
1968     typedef Segment<TGenome, InfixSegment>                          TGenomeInfix;
1969     typedef typename Iterator<TGenomeInfix, Standard>::Type         TGenomeIterator;
1970     typedef typename Iterator<TRead const, Standard>::Type          TReadIterator;
1971     typedef RazerSMode<RazerSGlobal, RazerSUngapped, TScoreMode, TMatchNPolicy> TRazerSMode;
1972 
1973 #ifdef RAZERS_DEBUG
1974     std::cout << "Verify: " << std::endl;
1975     std::cout << "Genome: " << inf << "\t" << beginPosition(inf) << "," << endPosition(inf) << std::endl;
1976     std::cout << "Read:   " << read << std::endl;
1977 #endif
1978 
1979     int mismatchDelta, scoreInit;
1980     int minScore;
1981     if (IsSameType<TScoreMode, RazerSErrors>::VALUE)
1982     {
1983         minScore = verifier.options->errorCutOff[readId];
1984         if (minScore == 0)
1985             return false;
1986 
1987         minScore = -minScore + 1;
1988     }
1989     else if (UseQualityValues__<TRazerSMode>::VALUE)
1990         minScore = -verifier.options->absMaxQualSumErrors;
1991     else if (IsSameType<TScoreMode, RazerSScore>::VALUE)
1992     {
1993         minScore = verifier.options->minScore;
1994         mismatchDelta = scoreMatch(verifier.options->scoringScheme) - scoreMismatch(verifier.options->scoringScheme);
1995         scoreInit = scoreMatch(verifier.options->scoringScheme) * length(read);
1996     }
1997 
1998     // verify
1999     TReadIterator ritBeg    = begin(read, Standard());
2000     TReadIterator ritEnd    = end(read, Standard());
2001     unsigned ndlLength      = ritEnd - ritBeg;
2002 
2003     if (length(inf) < ndlLength)
2004         return false;
2005 
2006     TGenomeIterator git     = begin(inf, Standard());
2007     TGenomeIterator gitEnd  = end(inf, Standard()) - (ndlLength - 1);
2008 
2009     int maxScore = minScore - 1;
2010     int scoreThresh = (verifier.oneMatchPerBucket) ? std::numeric_limits<int>::max() : minScore;
2011     int score, errors;
2012 
2013     for (; git < gitEnd; ++git)
2014     {
2015         if (!IsSameType<TScoreMode, RazerSScore>::VALUE)
2016             score = 0;
2017         else
2018             score = scoreInit;
2019 
2020         if (!IsSameType<TScoreMode, RazerSErrors>::VALUE)
2021             errors = 0;
2022 
2023         TGenomeIterator g = git;
2024         for (TReadIterator r = ritBeg; r != ritEnd; ++r, ++g)
2025             if ((verifier.options->compMask[ordValue(*g)] & verifier.options->compMask[ordValue(*r)]) == 0)
2026             {
2027                 if (IsSameType<TScoreMode, RazerSErrors>::VALUE)
2028                 {
2029                     // A. Count mismatches only
2030                     --score;
2031                 }
2032                 else
2033                 {
2034                     ++errors;
2035                     if (UseQualityValues__<TRazerSMode>::VALUE)
2036                         // B. Count mismatches and mismatch qualities
2037                         score -= getQualityValue(*g);
2038                     else if (IsSameType<TScoreMode, RazerSScore>::VALUE)
2039                         // C. Count mismatches and alignment score
2040                         score -= mismatchDelta;
2041                     else
2042                         SEQAN_FAIL("Unsupported score mode!");
2043                 }
2044                 if (score < minScore)   // doesn't work for islands with errorThresh > maxErrors
2045                     break;
2046             }
2047 
2048         if (score > maxScore)
2049         {
2050             maxScore = score;
2051             if (IsSameType<TScoreMode, RazerSErrors>::VALUE)
2052                 verifier.m.score = score;
2053             else
2054                 verifier.m.score = errors;
2055             verifier.m.beginPos = git - begin(host(inf), Standard());
2056         }
2057 #ifdef RAZERS_ISLAND_CRITERION
2058         else if (scoreThresh > score)
2059         {
2060             if (maxScore >= minScore)
2061             {
2062                 // for RazerSErrors bestErrors == -maxScore
2063                 verifier.m.endPos = verifier.m.beginPos + ndlLength;
2064                 verifier.m.pairScore = verifier.m.score = maxScore;
2065                 if (!verifier.oneMatchPerBucket)
2066                 {
2067                     if (minScore < maxScore - verifier.options->dRange)
2068                         minScore = maxScore - verifier.options->dRange;
2069                     verifier.push();
2070                 }
2071                 maxScore = minScore - 1;
2072             }
2073         }
2074 #else
2075         (void)scoreThresh;
2076 #endif
2077     }
2078 
2079     if (maxScore >= minScore)
2080     {
2081         verifier.m.endPos = verifier.m.beginPos + ndlLength;
2082         verifier.m.pairScore = verifier.m.score = maxScore;
2083         if (!verifier.oneMatchPerBucket)
2084         {
2085             if (minScore < maxScore - verifier.options->dRange)
2086                 minScore = maxScore - verifier.options->dRange;
2087             verifier.push();
2088 
2089             // update maximal errors per read
2090             unsigned cutOff = -minScore + 1;
2091             if ((unsigned)verifier.options->errorCutOff[readId] > cutOff)
2092                 verifier.options->errorCutOff[readId] = cutOff;
2093         }
2094         return true;
2095     }
2096     return false;
2097 }
2098 
2099 //////////////////////////////////////////////////////////////////////////////
2100 // Edit distance verification
2101 template <
2102     typename TMatchVerifier,
2103     typename TGenome,
2104     typename TRead,
2105     typename TMatchNPolicy>
2106 inline bool
2107 matchVerify(
2108     TMatchVerifier & verifier,
2109     Segment<TGenome, InfixSegment> inf,                                 // potential match genome region
2110     unsigned readId,                                                    // read number
2111     TRead const & read,                                                 // reads
2112     RazerSMode<RazerSGlobal, RazerSGapped, RazerSErrors, TMatchNPolicy> const &) // Mismatches and Indels
2113 {
2114     if (empty(inf))
2115         return false;
2116 
2117     typedef Segment<TGenome, InfixSegment>                  TGenomeInfix;
2118     typedef typename Prefix<TRead const>::Type              TReadPrefix SEQAN_UNUSED_TYPEDEF;
2119     typedef typename Position<TGenomeInfix>::Type           TPosition;
2120     typedef typename MakeSigned_<TPosition>::Type           TDistance;
2121 
2122     // find read match end
2123     typedef Finder<TGenomeInfix>                            TMyersFinder;
2124     typedef typename TMatchVerifier::TOptions::TMyersPattern TMyersPattern SEQAN_UNUSED_TYPEDEF;
2125     typedef typename TMatchVerifier::TPatternState          TPatternState;
2126 
2127     // find read match begin
2128     // TODO(holtgrew): Use reverse-search here, as well!
2129     typedef ModifiedString<TGenomeInfix, ModReverse>        TGenomeInfixRev;
2130     typedef Finder<TGenomeInfixRev>                         TMyersFinderRev;
2131 
2132 #ifdef RAZERS_NOOUTERREADGAPS
2133     typedef ModifiedString<TReadPrefix, ModReverse>         TReadRev;
2134 #else
2135     typedef ModifiedString<TRead const, ModReverse>         TReadRev;
2136 #endif
2137     typedef Pattern<TReadRev, MyersUkkonenGlobal>           TMyersPatternRev;
2138 
2139     unsigned ndlLength = length(read);
2140     int maxScore = std::numeric_limits<int>::min();
2141     int minScore = verifier.options->errorCutOff[readId];
2142     if (minScore == 0)
2143         return false;
2144 
2145     minScore = -minScore + 1;
2146 
2147     TDistance minSinkDistance = std::numeric_limits<TDistance>::max();
2148     TPosition maxPos = 0;
2149     TPosition lastPos = length(inf);
2150 #ifdef RAZERS_ISLAND_CRITERION
2151     unsigned minDistance = (verifier.oneMatchPerBucket) ? lastPos : 1;
2152 #endif
2153 
2154 #ifdef RAZERS_NOOUTERREADGAPS
2155     TGenomeInfix origInf(inf);
2156     setEndPosition(inf, endPosition(inf) - 1);
2157     --ndlLength;
2158     TReadPrefix readPrefix = prefix(read, ndlLength);
2159 #else
2160     TRead readPrefix(read);  // here only infixes (no sequence) is copied
2161 #endif
2162 
2163     TMyersFinder myersFinder(inf);
2164 #ifndef RAZERS_BANDED_MYERS
2165     TMyersPattern & myersPattern = verifier.options->forwardPatterns[readId];
2166 #endif  // #ifdef RAZERS_BANDED_MYERS
2167     TPatternState & state = verifier.patternState;
2168 
2169 #ifdef RAZERS_DEBUG
2170     std::cout << "Verify: " << std::endl;
2171     std::cout << "Genome: " << inf << "\t" << beginPosition(inf) << "," << endPosition(inf) << std::endl;
2172     std::cout << "Read:   " << read << "(id: " << readId << ")" << std::endl;
2173 #endif
2174 
2175     // find end of best semi-global alignment
2176 #ifdef RAZERS_BANDED_MYERS
2177     while (find(myersFinder, readPrefix, state, minScore))
2178 #else  // #ifdef RAZERS_BANDED_MYERS
2179     while (find(myersFinder, myersPattern, state, minScore))
2180 #endif  // #ifdef RAZERS_BANDED_MYERS
2181     {
2182         TPosition const pos = position(hostIterator(myersFinder));
2183         int score = getScore(state);
2184 
2185 #ifdef RAZERS_NOOUTERREADGAPS
2186         // Manually align the last base of the read
2187         //
2188         // In this case myersPattern contains the whole read without the
2189         // last base. We compare the bases and adjust the score.
2190         // We also have to adjust inf and remove the last base of the
2191         // genomic region that has to be verified.
2192         SEQAN_ASSERT_LT(pos + 1, length(origInf));
2193         if ((verifier.options->compMask[ordValue(origInf[pos + 1])] & verifier.options->compMask[ordValue(back(read))]) == 0)
2194             if (--score < minScore)
2195                 continue;
2196 #endif
2197 #ifdef RAZERS_ISLAND_CRITERION
2198         if (lastPos + minDistance < pos)
2199         {
2200             if (minScore <= maxScore)
2201             {
2202                 verifier.m.endPos = beginPosition(inf) + maxPos + 1;
2203                 verifier.m.pairScore = verifier.m.score = maxScore;
2204 //              verifier.m.errors = -maxScore;
2205 
2206                 if (maxScore == 0)
2207                     verifier.m.beginPos = verifier.m.endPos - ndlLength;
2208                 else
2209                 {
2210                     // find beginning of best semi-global alignment
2211                     TPosition infBeginPos = beginPosition(inf);
2212                     TPosition infEndPos = endPosition(inf);
2213                     TPosition newInfEndPos = verifier.m.endPos;
2214 
2215 #ifdef RAZERS_BANDED_MYERS
2216                     verifier.revPatternState.leftClip = infEndPos - newInfEndPos + verifier.rightClip;
2217 #endif
2218                     setEndPosition(inf, newInfEndPos);
2219 
2220 //					// limit the beginning to needle length plus errors (== -maxScore)
2221 //					if (length(inf) > ndlLength - maxScore)
2222 //						setBeginPosition(inf, endPosition(inf) - ndlLength + maxScore);
2223 
2224                     // we eventually have to search before the beginning of our parallelogram
2225                     // otherwise alignments of an island in the previous parallelogram
2226                     // could be cut and prevent that an island in this parallelgram is found
2227                     if (endPosition(inf) > (unsigned)(ndlLength - maxScore))
2228                         setBeginPosition(inf, endPosition(inf) - ndlLength + maxScore);
2229                     else
2230                         setBeginPosition(inf, 0);
2231 
2232 #ifdef RAZERS_NOOUTERREADGAPS
2233                     // The best score must be corrected to hold the score of the prefix w/o the last read base
2234                     if ((verifier.options->compMask[ordValue(origInf[maxPos + 1])] & verifier.options->compMask[ordValue(back(read))]) == 0)
2235                         ++maxScore;
2236 #endif
2237 
2238                     TReadRev            readRev(readPrefix);
2239                     TGenomeInfixRev     infRev(inf);
2240                     TMyersPatternRev    myersPatternRev(readRev);
2241                     TMyersFinderRev     myersFinderRev(infRev);
2242 
2243                     verifier.m.beginPos = verifier.m.endPos;
2244 #ifdef RAZERS_BANDED_MYERS
2245                     while (find(myersFinderRev, readRev, verifier.revPatternState, maxScore))
2246                     {
2247                         if (maxScore <= getScore(verifier.revPatternState))
2248                         {
2249                             maxScore = getScore(verifier.revPatternState);
2250                             verifier.m.beginPos = verifier.m.endPos - (position(myersFinderRev) + 1);
2251                         }
2252                     }
2253 #else
2254                     _patternMatchNOfPattern(myersPatternRev, verifier.options->matchN);
2255                     _patternMatchNOfFinder(myersPatternRev, verifier.options->matchN);
2256                     while (find(myersFinderRev, myersPatternRev, maxScore))
2257                     {
2258                         if (maxScore <= getScore(myersPatternRev))
2259                         {
2260                             maxScore = getScore(myersPatternRev);
2261                             verifier.m.beginPos = verifier.m.endPos - (position(myersFinderRev) + 1);
2262                         }
2263                     }
2264 #endif
2265                     setBeginPosition(inf, infBeginPos);
2266                     setEndPosition(inf, infEndPos);
2267 #ifdef RAZERS_BANDED_MYERS
2268                     if (verifier.m.beginPos == verifier.m.endPos)
2269                         continue;
2270 #endif
2271                 }
2272                 // minDistance implicitly forbids to get here with verifier.oneMatchPerBucket == true
2273                 SEQAN_ASSERT_NOT(verifier.oneMatchPerBucket);
2274                 SEQAN_ASSERT_LT(verifier.m.beginPos, verifier.m.endPos);
2275 
2276 #ifdef RAZERS_NOOUTERREADGAPS
2277                 // The match end position must be increased by the omitted base.
2278                 ++verifier.m.endPos;
2279 #endif
2280                 if (minScore < verifier.m.score - verifier.options->dRange)
2281                     minScore = verifier.m.score - verifier.options->dRange;
2282 
2283                 verifier.push();
2284                 maxScore = minScore - 1;
2285                 minSinkDistance = std::numeric_limits<TDistance>::max();
2286             }
2287         }
2288 #endif  // #ifdef RAZERS_ISLAND_CRITERION
2289 
2290         // minimize distance between sink and estimated match begin
2291         TDistance sinkDistance = verifier.sinkPos - ((TDistance)(beginPosition(inf) + pos) - (TDistance)ndlLength);
2292 //        sinkDistance = maxValue(sinkDistance);
2293         if (sinkDistance < (TDistance)0)
2294             sinkDistance = -sinkDistance;
2295 
2296         if (score > maxScore || (score == maxScore && sinkDistance <= minSinkDistance))
2297         {
2298             maxScore = score;
2299             minSinkDistance = sinkDistance;
2300             maxPos = pos;
2301         }
2302         lastPos = pos;
2303     }
2304 
2305     if (minScore <= maxScore)
2306     {
2307         verifier.m.endPos = beginPosition(inf) + maxPos + 1;
2308         verifier.m.pairScore = verifier.m.score = maxScore;
2309 //		verifier.m.errors = -maxScore;
2310 
2311         if (maxScore == 0)
2312             verifier.m.beginPos = verifier.m.endPos - ndlLength;
2313         else
2314         {
2315             // find beginning of best semi-global alignment
2316             TPosition newInfEndPos = verifier.m.endPos;
2317 
2318 #ifdef RAZERS_BANDED_MYERS
2319             verifier.revPatternState.leftClip = endPosition(inf) - newInfEndPos + verifier.rightClip;
2320 #endif
2321             setEndPosition(inf, newInfEndPos);
2322 
2323 //					// limit the beginning to needle length plus errors (== -maxScore)
2324 //					if (length(inf) > ndlLength - maxScore)
2325 //						setBeginPosition(inf, endPosition(inf) - ndlLength + maxScore);
2326 
2327             // we eventually have to search before the beginning of our parallelogram
2328             // otherwise alignments of an island in the previous parallelogram
2329             // could be cut and prevent that an island in this parallelgram is found
2330             if (endPosition(inf) > (unsigned)(ndlLength - maxScore))
2331                 setBeginPosition(inf, endPosition(inf) - ndlLength + maxScore);
2332             else
2333                 setBeginPosition(inf, 0);
2334 
2335 #ifdef RAZERS_NOOUTERREADGAPS
2336             // The best score must be corrected to hold the score of the prefix w/o the last read base
2337             if ((verifier.options->compMask[ordValue(origInf[maxPos + 1])] & verifier.options->compMask[ordValue(back(read))]) == 0)
2338                 ++maxScore;
2339 #endif
2340 
2341             TReadRev            readRev(readPrefix);
2342             TGenomeInfixRev     infRev(inf);
2343             TMyersPatternRev    myersPatternRev(readRev);
2344             TMyersFinderRev     myersFinderRev(infRev);
2345 
2346             verifier.m.beginPos = verifier.m.endPos;
2347 #ifdef RAZERS_BANDED_MYERS
2348             while (find(myersFinderRev, readRev, verifier.revPatternState, maxScore))
2349             {
2350                 if (maxScore <= getScore(verifier.revPatternState))
2351                 {
2352                     maxScore = getScore(verifier.revPatternState);
2353                     verifier.m.beginPos = verifier.m.endPos - (position(myersFinderRev) + 1);
2354                 }
2355             }
2356 #else
2357             _patternMatchNOfPattern(myersPatternRev, verifier.options->matchN);
2358             _patternMatchNOfFinder(myersPatternRev, verifier.options->matchN);
2359             while (find(myersFinderRev, myersPatternRev, maxScore))
2360             {
2361                 if (maxScore <= getScore(myersPatternRev))
2362                 {
2363                     maxScore = getScore(myersPatternRev);
2364                     verifier.m.beginPos = verifier.m.endPos - (position(myersFinderRev) + 1);
2365                 }
2366             }
2367 #endif
2368 #ifdef RAZERS_BANDED_MYERS
2369             if (verifier.m.beginPos == verifier.m.endPos)
2370             {
2371 #ifdef RAZERS_DEBUG
2372                 std::cout << "FAILED2" << std::endl;
2373 #endif
2374                 return false;
2375             }
2376 #endif // RAZERS_BANDED_MYERS
2377         }
2378         SEQAN_ASSERT_LT(verifier.m.beginPos, verifier.m.endPos);
2379 
2380 #ifdef RAZERS_NOOUTERREADGAPS
2381         // The match end position must be increased by the omitted base.
2382         ++verifier.m.endPos;
2383 #endif
2384 
2385         if (!verifier.oneMatchPerBucket)
2386         {
2387             if (minScore < verifier.m.score - verifier.options->dRange)
2388                 minScore = verifier.m.score - verifier.options->dRange;
2389             verifier.push();
2390 
2391             // update maximal errors per read
2392             unsigned cutOff = -minScore + 1;
2393             if ((unsigned)verifier.options->errorCutOff[readId] > cutOff)
2394             {
2395 //    std::cout<<"maxScore="<<verifier.m.score  << " minScore=" << minScore<<std::endl;
2396 //    std::cout<<"decreased errCutOff["<<readId<<"] from "<<(unsigned)verifier.options->errorCutOff[readId];
2397                 verifier.options->errorCutOff[readId] = cutOff;
2398 //    std::cout<<"to "<<(unsigned)verifier.options->errorCutOff[readId]<<std::endl;
2399             }
2400         }
2401 
2402 #ifdef RAZERS_DEBUG
2403         std::cout << "OK" << std::endl;
2404 #endif
2405         return true;
2406     }
2407 #ifdef RAZERS_DEBUG
2408     std::cout << "FAILED3" << std::endl;
2409 #endif
2410     return false;
2411 }
2412 
2413 template <
2414     typename TMatchVerifier,
2415     typename TGenome,
2416     typename TRead,
2417     typename TAlignMode,
2418     typename TGapMode,
2419     typename TScoreMode,
2420     typename TMatchNPolicy>
2421 inline bool
2422 matchVerify(
2423     TMatchVerifier &,
2424     Segment<TGenome, InfixSegment>,                             // potential match genome region
2425     unsigned,                                                   // read number
2426     TRead const &,                                              // read
2427     RazerSMode<TAlignMode, TGapMode, TScoreMode, TMatchNPolicy> const &)
2428 {
2429     SEQAN_FAIL("Verification not implemented!");
2430     return false;
2431 }
2432 
2433 template <typename TOptions>
2434 inline void
2435 estimateErrorDistributionFromQualities(TOptions & options)
2436 {
2437     typedef typename TOptions::TProb TFloat;
2438 
2439     resize(options.errorProb, length(options.avrgQuality));
2440 //	std::cout<< "ERROR PROBS:"<<std::endl;
2441     for (unsigned i = 0; i < length(options.avrgQuality); ++i)
2442     {
2443         //    qual = -10 log_10 p
2444         //         = -10 log p / log 10
2445         //       p = exp^(qual * log 10 / -10)
2446         //   log p = qual * log 10 / -10;
2447         double e = options.avrgQuality[i] * log(10.0) / -10.0;
2448         TFloat sequencingError;
2449         sequencingError.data_value = e;
2450 //		sequencingError = exp(e);
2451         options.errorProb[i] = (TFloat)1.0 - ((TFloat)1.0 - sequencingError) * ((TFloat)1.0 - options.mutationRate);
2452 //		std::cout<<e<<':'<<options.errorProb[i]<<'\t';
2453     }
2454 //	std::cout<<std::endl<<std::endl;
2455 }
2456 
2457 //////////////////////////////////////////////////////////////////////////////
2458 // Customize filter
2459 template <typename TDelta, typename TOptions>
2460 void computeQGramLengths(TDelta & minDelta, TOptions const & options)
2461 {
2462     const unsigned maxLength1 = length(options.readLengths);
2463 //    const unsigned maxLength = maxLength1 - 1;
2464 //    const unsigned maxErrors = (unsigned) floor(options.errorRate * maxLength);
2465 //    const unsigned maxErrors1 = maxErrors + 1;
2466 
2467     unsigned seqCount = 0;
2468     String<unsigned> maxDelta;
2469     resize(minDelta, options.maxOverlap + 1, std::numeric_limits<unsigned>::max());
2470     resize(maxDelta, options.maxOverlap + 1, 3);
2471 
2472     // compute delta (==stepSize) for different overlaps
2473     for (unsigned len = 0; len < maxLength1; ++len)
2474     {
2475         if (options.readLengths[len] == 0)
2476             continue;
2477 
2478         seqCount += options.readLengths[len];
2479         for (unsigned ol = 0; ol <= options.maxOverlap; ++ol)
2480         {
2481             // sequence must have sufficient length
2482             if (len <= ol)
2483                 continue;
2484 
2485             // cut overlap many characters from the end
2486             unsigned errors = (unsigned) floor(options.errorRate * len);
2487             unsigned delta = (len - ol) / (errors + 1);
2488 
2489             // ignore too short delta-grams
2490             if (delta < 3)
2491                 continue;
2492             if (minDelta[ol] > delta)
2493                 minDelta[ol] = delta;
2494             if (maxDelta[ol] < delta)
2495                 maxDelta[ol] = delta;
2496         }
2497     }
2498 
2499 //	std::cout<< "Deltas:"<<std::endl;
2500 //    for (unsigned ol = 0; ol < length(minDelta); ++ol)
2501 //	{
2502 //        if (minDelta[ol] < 3) minDelta[ol] = maxDelta[ol];
2503 //		std::cout<<minDelta[ol]+ol<<'\t';
2504 //	}
2505 //	std::cout<<std::endl<<std::endl;
2506 }
2507 
2508 template <typename TEstLosses, typename TDelta, typename TOptions>
2509 unsigned estimatePigeonholeLosses(TEstLosses & estLosses, TDelta const & delta, TOptions const & options)
2510 {
2511     typedef typename Value<TEstLosses>::Type TFloat;
2512 
2513     const unsigned maxLength1 = length(options.readLengths);
2514     const unsigned maxLength = maxLength1 - 1;
2515     const unsigned maxErrors = (unsigned) floor(options.errorRate * maxLength);
2516     const unsigned maxErrors1 = maxErrors + 1;
2517 
2518     // ------------------------------------------------------------------------------------------------
2519     // compute probs to have 0,1,...,maxErrors errors in the prefix of length 1,...,maxLength
2520     // ------------------------------------------------------------------------------------------------
2521     String<TFloat> p_prefix;
2522     resize(p_prefix, maxLength * maxErrors1);
2523     p_prefix[0] = (TFloat)1.0 - options.errorProb[0];   // 0 error
2524     p_prefix[1] = options.errorProb[0];                 // 1 error
2525     for (unsigned e = 2; e <= maxErrors; ++e)           // 2 errors are not possible in a sequence of length 1
2526         p_prefix[e] = 0.0;
2527     for (unsigned i = 1, idx = maxErrors1; i < maxLength; ++i)
2528     {
2529         p_prefix[idx] = p_prefix[idx - maxErrors1] * ((TFloat)1.0 - options.errorProb[i]);
2530         ++idx;
2531         for (unsigned e = 1; e <= maxErrors; ++e, ++idx)
2532             p_prefix[idx] = p_prefix[idx - maxErrors1] * ((TFloat)1.0 - options.errorProb[i]) + p_prefix[idx - maxErrors1 - 1] * options.errorProb[i];
2533     }
2534 
2535     // ------------------------------------------------------------------------------------------------
2536     // compute expected numbers of reads with 0,1,...,maxErrors errors
2537     // ------------------------------------------------------------------------------------------------
2538 
2539     clear(estLosses);
2540     resize(estLosses, maxErrors1 + 2, 0.0);
2541     double maxLoss = 0.0;
2542 //		std::cout << "len:"<<0<<"  "<<options.readLengths[0]<<std::endl;
2543     for (unsigned len = 1; len <= maxLength; ++len)
2544     {
2545         if (options.readLengths[len] == 0)
2546             continue;
2547 //		std::cout << "len:"<<len<<"  "<<options.readLengths[len]<<'\t'<<(TFloat)options.readLengths[len]<<'\t'<<(double)(TFloat)options.readLengths[len]<<'\t'<<std::endl;
2548 
2549         unsigned errors = (unsigned) floor(options.errorRate * len);
2550         TFloat sum = 0.0;
2551         for (unsigned e = 0; e <= errors; ++e)
2552         {
2553             sum += p_prefix[(len - 1) * maxErrors1 + e];
2554             estLosses[2 + e] += p_prefix[(len - 1) * maxErrors1 + e] * (TFloat)options.readLengths[len];
2555         }
2556         maxLoss += (double)sum * options.readLengths[len];
2557     }
2558     maxLoss *= options.lossRate;
2559 
2560     unsigned overlap = 0;
2561 
2562     // ------------------------------------------------------------------------------------------------
2563     // compute loss for each overlap and select best the shape
2564     // ------------------------------------------------------------------------------------------------
2565     for (unsigned ol = 0; ol < length(delta); ++ol)
2566     {
2567         unsigned stepSize = delta[ol];
2568         unsigned q = stepSize + ol;
2569 
2570         // if we had the same q-gram before
2571         if (ol > 0 && delta[ol - 1] + ol - 1 == q)
2572             continue;
2573 
2574         // don't allow more than two overlapping q-grams
2575         if (2 * ol > stepSize)
2576             break;
2577 
2578         // ------------------------------------------------------------------------------------------------
2579         // compute probs to have 0,1,...,maxErrors errors in a segment
2580         // ------------------------------------------------------------------------------------------------
2581 
2582         String<TFloat> p;
2583         resize(p, maxLength * maxErrors1);
2584         for (unsigned i = 0, idx = 0; i < maxLength; ++i)
2585         {
2586             if (i % stepSize == 0 || (i >= stepSize && i % stepSize == ol))
2587             {
2588                 // initialization
2589                 p[idx++] = (TFloat)1.0 - options.errorProb[i];      // 0 error
2590                 p[idx++] = options.errorProb[i];                    // 1 error
2591                 for (unsigned e = 2; e <= maxErrors; ++e, ++idx)    // 2 errors are not possible in a sequence of length 1
2592                     p[idx] = 0.0;
2593             }
2594             else
2595             {
2596                 p[idx] = p[idx - maxErrors1] * ((TFloat)1.0 - options.errorProb[i]);
2597                 ++idx;
2598                 for (unsigned e = 1; e <= maxErrors; ++e, ++idx)
2599                 {
2600                     // a sequence of length i with e errors has either:
2601                     //   - e errors in prefix i-1 and no error at position i
2602                     //   - e-1 errors in prefix i-1 and an error at position i
2603                     p[idx] = p[idx - maxErrors1] * ((TFloat)1.0 - options.errorProb[i]) + p[idx - maxErrors1 - 1] * options.errorProb[i];
2604                 }
2605             }
2606         }
2607 
2608         String<TFloat> p_last;
2609         resize(p_last, maxLength * maxErrors1);
2610         for (unsigned i = 0, idx = 0; i < maxLength; ++i)
2611         {
2612             if (i == 0 || (i >= stepSize && i % stepSize == ol))
2613             {
2614                 // initialization
2615                 p_last[idx++] = (TFloat)1.0 - options.errorProb[i]; // 0 error
2616                 p_last[idx++] = options.errorProb[i];               // 1 error
2617                 for (unsigned e = 2; e <= maxErrors; ++e, ++idx)    // 2 errors are not possible in a sequence of length 1
2618                     p_last[idx] = 0.0;
2619             }
2620             else
2621             {
2622                 p_last[idx] = p_last[idx - maxErrors1] * ((TFloat)1.0 - options.errorProb[i]);
2623                 ++idx;
2624                 for (unsigned e = 1; e <= maxErrors; ++e, ++idx)
2625                     p_last[idx] = p_last[idx - maxErrors1] * ((TFloat)1.0 - options.errorProb[i]) + p_last[idx - maxErrors1 - 1] * options.errorProb[i];
2626             }
2627         }
2628 
2629         // ------------------------------------------------------------------------------------------------
2630         // compute probs to lose every prefix of pigeonhole segments (q-grams) with 0,...,maxErrors errors
2631         // ------------------------------------------------------------------------------------------------
2632         unsigned maxXErrors1 = _min(maxErrors, ol) + 1;
2633         unsigned maxSegments = (maxLength - ol) / stepSize;
2634         String<TFloat> M;
2635         resize(M, maxSegments * maxErrors1 * maxXErrors1, 0.0);
2636 
2637         unsigned idx = maxXErrors1;
2638         for (unsigned e0 = 1; e0 <= maxErrors; ++e0, idx += maxXErrors1)
2639         {
2640             if (q != stepSize)
2641             {
2642                 unsigned maxX = _min(e0, ol);
2643                 for (unsigned x0 = 0; x0 <= maxX; ++x0)
2644                 {
2645                     M[idx + x0] = p[(stepSize - 1) * maxErrors1 + (e0 - x0)] * p[(q - 1) * maxErrors1 + x0];
2646 //                  std::cout<<"M[0,"<<e0<<','<<x0<<"]="<<M[idx + x0]<<std::endl;
2647                 }
2648             }
2649             else
2650                 M[idx] = p[(stepSize - 1) * maxErrors1 + e0];
2651 //		std::cout<<"p(C_"<<0<<'='<<e0<<")="<<p[( (stepSize - 1)) * maxErrors1 + e0]<<std::endl;
2652 //		std::cout<<"p(X_"<<0<<'='<<e0<<")="<<p[( (q - 1)) * maxErrors1 + e0]<<std::endl;
2653         }
2654 /*
2655     SEQAN_OMP_PRAGMA(critical)
2656     {
2657         for (unsigned e0 = 1; e0 <= maxErrors; ++e0)
2658             for (unsigned i = 0; i < maxLength; ++i)
2659                 std::cerr<<"p("<<i<<'='<<e0<<")="<<p[i * maxErrors1 + e0]<<std::endl;
2660         for (unsigned e0 = 1; e0 <= maxErrors; ++e0)
2661             for (unsigned i = 0; i < maxLength; ++i)
2662                 std::cerr<<"p_last("<<i<<'='<<e0<<")="<<p_last[i * maxErrors1 + e0]<<std::endl;
2663     }
2664 */
2665 //		std::cout<<"=============================="<<std::endl;
2666 
2667         // M[s,e,x] at position ((s * maxErrors1) + e) * maxErrors1 + x
2668         for (unsigned s = 1; s < maxSegments; ++s)
2669             for (unsigned e = 0; e <= maxErrors; ++e, idx += maxXErrors1)
2670             {
2671                 if (ol != 0)
2672                 {
2673                     unsigned maxX = _min(e, ol);
2674                     for (unsigned x = 0; x <= maxX; ++x)
2675                     {
2676                         TFloat sum = 0.0;
2677                         for (unsigned _e = 0; _e <= e - x; ++_e)
2678                         {
2679                             unsigned max_X = _min(_e, ol);
2680                             for (unsigned _x = 0; _x <= max_X; ++_x)
2681                                 if (_e - _x < e)    // at least one error in seed X_{s-1} C_s X_s
2682                                     sum +=   M[((s - 1) * maxErrors1 + _e) * maxXErrors1 + _x]
2683                                            * p[(stepSize * s + (stepSize - 1)) * maxErrors1 + (e - _e - x)]
2684                                            * p[(stepSize * s + (q - 1)) * maxErrors1 + x];
2685                         }
2686 
2687                         M[idx + x] = sum;
2688 //                        std::cerr<<"M["<<s<<','<<e<<','<<x<<"]="<<M[idx + x]<<std::endl;
2689                     }
2690 //                  std::cerr<<"p(C_"<<s<<'='<<e<<")="<<p[(stepSize * s + (stepSize - 1)) * maxErrors1 + e]<<std::endl;
2691 //                  std::cerr<<"p(X_"<<s<<'='<<e<<")="<<p[(stepSize * s + (q - 1)) * maxErrors1 + e]<<std::endl;
2692                 }
2693                 else
2694                 {
2695                     TFloat sum = 0.0;
2696                     for (unsigned _e = 0; _e < e; ++_e)
2697                         sum +=   M[((s - 1) * maxErrors1 + _e) * maxXErrors1]
2698                                * p[(stepSize * s + (stepSize - 1)) * maxErrors1 + (e - _e)];
2699 
2700                     M[idx] = sum;
2701 //                    std::cerr<<"M["<<s<<','<<e<<",0]="<<M[idx]<<std::endl;
2702                 }
2703             }
2704 
2705 //		// no overlap => no loss => no estimation required
2706 //		if (ol == 0) continue;
2707 
2708         // ------------------------------------------------------------------------------------------------
2709         // sum up the expected numbers of lost reads for every read length 0,...,maxLength
2710         // ------------------------------------------------------------------------------------------------
2711         double loss = 0.0;
2712 //		std::cout<< "LOSSES FOR OVERLAP " << ol <<":"<<std::endl;
2713 
2714         appendValue(estLosses, (double)ol);
2715         appendValue(estLosses, (double)q);
2716         resize(estLosses, length(estLosses) + maxErrors1, 0.0);
2717         for (unsigned len = 1; len <= maxLength; ++len)
2718         {
2719             if (options.readLengths[len] == 0)
2720                 continue;
2721             if (len < q)
2722                 continue;
2723 
2724             unsigned errors = (unsigned) floor(options.errorRate * len);
2725             unsigned segments = (len - ol) / stepSize;
2726 
2727 //			TFloat divider = p_prefix[(len - 1) * maxErrors1];
2728 //			std::cout<<"DIVIDER"<<0<< ':'<<p_prefix[(len - 1) * maxErrors1]<<std::endl;
2729 //			for (unsigned e = 1; e <= errors; ++e)
2730 //			{
2731 //				divider += p_prefix[(len - 1) * maxErrors1 + e];
2732 //				std::cout<<"DIVIDER"<<e<< ':'<<p_prefix[(len - 1) * maxErrors1 + e]<<std::endl;
2733 //			}
2734 //			std::cout<<"DIVIDERSUM:"<<divider<<std::endl;
2735 
2736             TFloat lossPerLength = 0.0;
2737             unsigned segmentedLen = segments * stepSize + ol;
2738             for (unsigned e1 = 0; e1 <= errors; ++e1)
2739             {
2740                 unsigned maxX = _min(e1, ol);
2741                 for (unsigned x = 0; x <= maxX; ++x)
2742                 {
2743                     TFloat sum = 0.0;
2744                     if (segmentedLen < len)
2745                     {
2746                         unsigned maxE2 = _min(errors - e1, len - segmentedLen);
2747                         for (unsigned e2 = 0; e2 <= maxE2; ++e2)
2748                         {
2749                             sum += p_last[(len - 1) * maxErrors1 + e2];
2750                             estLosses[length(estLosses) - maxErrors1 + e1 + e2] += (M[((segments - 1) * maxErrors1 + e1) * maxXErrors1 + x] * p_last[(len - 1) * maxErrors1 + e2] /* / divider */) * options.readLengths[len];
2751                         }
2752                     }
2753                     else
2754                     {
2755                         sum = 1.0;
2756                         estLosses[length(estLosses) - maxErrors1 + e1] += (M[((segments - 1) * maxErrors1 + e1) * maxXErrors1 + x] /* / divider */) * options.readLengths[len];
2757                     }
2758                     lossPerLength += M[((segments - 1) * maxErrors1 + e1) * maxXErrors1 + x] * sum;
2759                 }
2760             }
2761 
2762 //			lossPerLength /= divider;
2763             loss += options.readLengths[len] * (double)lossPerLength;
2764 //			std::cout<<len<<':'<<options.readLengths[len] * (double)lossPerLength<<'\t';
2765 
2766 //			if (len >= delta[ol] * (errors + 2) + ol) continue;
2767 //			loss += options.readLengths[len] * (q0[len * options.maxOverlap + ol - 1] / p[len * maxErrors1 + errors]);
2768         }
2769         if (loss > maxLoss)
2770             break;
2771 //		std::cout<<std::endl<<std::endl;
2772 
2773         overlap = ol;
2774     }
2775     return overlap;
2776 }
2777 
2778 template <typename TIndex, typename TPigeonholeSpec, typename TOptions>
2779 void _applyFilterOptions(Pattern<TIndex, Pigeonhole<TPigeonholeSpec> > & filterPattern, TOptions const & options)
2780 {
2781     if (options.lossRate == 0.0)
2782     {
2783         filterPattern.params.delta = options.delta;
2784         filterPattern.params.overlap = options.overlap;
2785         _patternInit(filterPattern, options.errorRate);
2786 
2787         if (options._debugLevel >= 2)
2788         {
2789             SEQAN_OMP_PRAGMA(critical)
2790             {
2791                 CharString str;
2792                 shapeToString(str, filterPattern.shape);
2793                 std::cout << std::endl << "Pigeonhole settings:" << std::endl;
2794                 std::cout << "  shape:    " << length(str) << '\t' << str << std::endl;
2795                 std::cout << "  stepsize: " << getStepSize(host(filterPattern)) << std::endl;
2796             }
2797             return;
2798         }
2799     }
2800 
2801     typedef typename TOptions::TProb TFloat;
2802 
2803     String<TFloat> estLosses;
2804     const unsigned maxErrors = (unsigned) floor(options.errorRate * length(options.readLengths));
2805     const unsigned maxErrors1 = maxErrors + 1;
2806 
2807     String<unsigned> delta;
2808     computeQGramLengths(delta, options);
2809     unsigned maxWeight = _pigeonholeMaxShapeWeight(indexShape(host(filterPattern)));
2810 
2811     if (delta[0] < maxWeight && options.lossRate != 0.0)
2812     {
2813         // lossy filtration
2814 
2815         // we can stop if the proposed weight exceeds or reaches the maximum
2816         for (unsigned ol = 0; ol < length(delta); ++ol)
2817             if (delta[ol] + ol > maxWeight)
2818             {
2819                 resize(delta, ol);
2820                 break;
2821             }
2822 
2823         filterPattern.params.overlap = estimatePigeonholeLosses(estLosses, delta, options);
2824         _patternInit(filterPattern, options.errorRate);
2825 
2826         if (options._debugLevel >= 2)
2827         {
2828             SEQAN_OMP_PRAGMA(critical)
2829             {
2830                 std::cout << "     e | e error reads | loss ol =" << std::setw(2) << estLosses[maxErrors1 + 2];
2831                 for (unsigned i = 2 * (maxErrors1 + 2); i < length(estLosses); i += maxErrors1 + 2)
2832                     std::cout << " |      ol =" << std::setw(2) << estLosses[i];
2833                 std::cout << std::endl;
2834                 std::cout << "       |               |       q =" << std::setw(2) << estLosses[maxErrors1 + 3];
2835                 for (unsigned i = 2 * (maxErrors1 + 2); i < length(estLosses); i += maxErrors1 + 2)
2836                     std::cout << " |       q =" << std::setw(2) << estLosses[i + 1];
2837                 std::cout << std::endl;
2838                 std::cout << " ------+---------------";
2839                 for (unsigned i = maxErrors1 + 2; i < length(estLosses); i += maxErrors1 + 2)
2840                     std::cout << "+-------------";
2841                 std::cout << std::endl;
2842                 std::cout.setf(std::ios::fixed);
2843                 TFloat sumReads;
2844                 for (unsigned e = 0; e <= maxErrors; ++e)
2845                 {
2846                     std::cout.fill(' ');
2847                     std::cout << std::setprecision(0);
2848                     std::cout << std::setw(6) << e;
2849                     std::cout << " |";
2850                     std::cout << std::setw(14) << (unsigned)estLosses[e + 2];
2851                     std::cout << std::setprecision(2);
2852                     for (unsigned i = maxErrors1 + 2 + e + 2; i < length(estLosses); i += maxErrors1 + 2)
2853                         std::cout << " |" << std::setw(12) << estLosses[i];
2854                     std::cout << std::endl;
2855                     sumReads += estLosses[e + 2];
2856                 }
2857                 std::cout << " ------+---------------";
2858                 for (unsigned i = maxErrors1 + 2; i < length(estLosses); i += maxErrors1 + 2)
2859                     std::cout << "+-------------";
2860                 std::cout << std::endl;
2861 
2862                 std::cout << std::setprecision(0);
2863                 std::cout << " total |" << std::setw(14) << (unsigned)sumReads << ' ';
2864                 std::cout << std::setprecision(2);
2865                 for (unsigned i = maxErrors1 + 4; i < length(estLosses); i += maxErrors1 + 2)
2866                 {
2867                     sumReads = 0.0;
2868                     for (unsigned e = 0; e <= maxErrors; ++e)
2869                         sumReads += estLosses[i + e];
2870                     std::cout << '|' << std::setw(12) << sumReads;
2871                     std::cout << ((filterPattern.params.overlap == (unsigned)estLosses[i - 2]) ? '*' : ' ');
2872                 }
2873                 std::cout << std::endl;
2874 
2875                 CharString str;
2876                 shapeToString(str, filterPattern.shape);
2877                 std::cout << std::endl << "Pigeonhole settings:" << std::endl;
2878                 std::cout << "  shape:    " << length(str) << '\t' << str << std::endl;
2879                 std::cout << "  stepsize: " << getStepSize(host(filterPattern)) << std::endl;
2880             }
2881         }
2882     }
2883     else
2884     {
2885         // lossless filtration
2886         filterPattern.params.overlap = 0;
2887     }
2888 
2889 }
2890 
2891 template <typename TIndex, typename TSwiftSpec, typename TOptions>
2892 void _applyFilterOptions(Pattern<TIndex, Swift<TSwiftSpec> > & filterPattern, TOptions const & options)
2893 {
2894     filterPattern.params.minThreshold = options.threshold;
2895     filterPattern.params.tabooLength = options.tabooLength;
2896     _patternInit(filterPattern, options.errorRate, 0);
2897 }
2898 
2899 //////////////////////////////////////////////////////////////////////////////
2900 // Find read matches in a single genome sequence
2901 template <
2902     typename TMatches,
2903     typename TFragmentStore,
2904     typename TReadIndex,
2905     typename TFilterSpec,
2906     typename TCounts,
2907     typename TRazerSOptions,
2908     typename TRazerSMode>
2909 void _mapSingleReadsToContig(
2910     TMatches & matches,
2911     TFragmentStore & store,
2912     unsigned                                  contigId,             // ... and its sequence number
2913     Pattern<TReadIndex, TFilterSpec> & filterPattern,
2914     TCounts & cnts,
2915     char                                      orientation,              // q-gram index of reads
2916     TRazerSOptions & options,
2917     TRazerSMode                       const & mode)
2918 {
2919     // FILTRATION
2920     typedef typename TFragmentStore::TContigSeq             TContigSeq;
2921     typedef Finder<TContigSeq, TFilterSpec>                 TFilterFinder;
2922     typedef Pattern<TReadIndex, TFilterSpec>                TFilterPattern;
2923 
2924     // VERIFICATION
2925     typedef MatchVerifier<
2926         TFragmentStore,
2927         TMatches,
2928         TRazerSOptions,
2929         TRazerSMode,
2930         TFilterPattern,
2931         TCounts>                                            TVerifier;
2932     typedef typename Fibre<TReadIndex, FibreText>::Type     TReadSet;
2933 
2934     // iterate all genomic sequences
2935     if (options._debugLevel >= 1)
2936     {
2937         std::cerr << std::endl << "Process genome seq #" << contigId;
2938         if (orientation == 'F')
2939             std::cerr << "[fwd]";
2940         else
2941             std::cerr << "[rev]";
2942     }
2943     lockContig(store, contigId);
2944     TContigSeq & contigSeq = store.contigStore[contigId].seq;
2945     if (orientation == 'R')
2946         reverseComplement(contigSeq);
2947 
2948     TReadSet & readSet = indexText(host(filterPattern));
2949     TFilterFinder   filterFinder(contigSeq, options.repeatLength, 1);
2950     TVerifier       verifier(matches, options, filterPattern, cnts);
2951 
2952 #ifdef RAZERS_BANDED_MYERS
2953     typename Size<TContigSeq>::Type contigLength = length(contigSeq);
2954 #endif
2955 
2956     // initialize verifier
2957     verifier.onReverseComplement = (orientation == 'R');
2958     verifier.genomeLength = length(contigSeq);
2959     verifier.m.contigId = contigId;
2960 
2961     double beginTime = sysTime();
2962     // Build q-gram index separately, so we can better compute the time for it.
2963     indexRequire(host(filterPattern), QGramSADir());
2964     options.timeBuildQGramIndex += sysTime() - beginTime;
2965 
2966     // iterate all verification regions returned by SWIFT
2967     while (find(filterFinder, filterPattern, options.errorRate))
2968     {
2969         // std::cout << "read id = " << (*filterFinder.curHit).ndlSeqNo << ", " << beginPosition(filterFinder) << std::endl;
2970 
2971 //        if (length(infix(filterFinder)) < length(readSet[(*filterFinder.curHit).ndlSeqNo]))
2972 //            continue;  // Skip if hit length < read length.  TODO(holtgrew): David has to fix something in banded myers to make this work.
2973 #ifdef RAZERS_BANDED_MYERS
2974         verifier.patternState.leftClip = (beginPosition(filterFinder) >= 0) ? 0 : -beginPosition(filterFinder);           // left clip if match begins left of the genome
2975         verifier.rightClip = (endPosition(filterFinder) <= contigLength) ? 0 : endPosition(filterFinder) - contigLength;  // right clip if match end right of the genome
2976 #endif
2977         verifier.m.readId = (*filterFinder.curHit).ndlSeqNo;
2978         if (!options.spec.DONT_VERIFY)
2979             matchVerify(verifier, infix(filterFinder), verifier.m.readId, readSet[verifier.m.readId], mode);
2980         ++options.countFiltration;
2981     }
2982     if (!unlockAndFreeContig(store, contigId))                          // if the contig is still used
2983         if (orientation == 'R')
2984             reverseComplement(contigSeq);
2985     // we have to restore original orientation
2986 }
2987 
2988 //////////////////////////////////////////////////////////////////////////////
2989 // Find read matches in many genome sequences
2990 template <
2991     typename TFSSpec,
2992     typename TFSConfig,
2993     typename TCounts,
2994     typename TSpec,
2995     typename TAlignMode,
2996     typename TGapMode,
2997     typename TScoreMode,
2998     typename TReadIndex,
2999     typename TMatchNPolicy,
3000     typename TFilterSpec>
3001 int _mapSingleReads(
3002     FragmentStore<TFSSpec, TFSConfig> & store,
3003     TCounts & cnts,
3004     RazerSCoreOptions<TSpec> & options,
3005     RazerSMode<TAlignMode, TGapMode, TScoreMode, TMatchNPolicy>  const & mode,
3006     TReadIndex & readIndex,
3007     TFilterSpec)
3008 {
3009     typedef FragmentStore<TFSSpec, TFSConfig>           TFragmentStore;
3010     //typedef typename TFragmentStore::TReadSeqStore      TReadSeqStore;
3011     typedef Pattern<TReadIndex, TFilterSpec>            TFilterPattern; // filter
3012 
3013     //typedef typename Value<TReadSeqStore>::Type const   TRead;
3014     //typedef Pattern<TRead, MyersUkkonen>                TMyersPattern;  // verifier
3015     // typedef Pattern<TRead, Myers<FindInfix, False, void> >	TMyersPattern;	// verifier
3016 
3017     typedef typename TFragmentStore::TContigSeq TContigSeq;
3018     typedef typename Position<TContigSeq>::Type TContigPos;
3019     typedef MatchRecord<TContigPos> TMatchRecord;
3020 
3021     // configure Swift pattern
3022     TFilterPattern filterPattern(readIndex);
3023 
3024     // configure filter pattern
3025     // (if this is a pigeonhole filter, all sequences must be appended first)
3026     _applyFilterOptions(filterPattern, options);
3027     filterPattern.params.printDots = options._debugLevel > 0;
3028 
3029     // clear stats
3030     options.countFiltration = 0;
3031     options.countVerification = 0;
3032     options.timeMapReads = 0;
3033     options.timeDumpResults = 0;
3034     options.timeBuildQGramIndex = 0;
3035     options.timeCompactMatches = 0;
3036     options.timeMaskDuplicates = 0;
3037     options.timeFsCopy = 0;
3038     options.timeFiltration = 0;
3039     options.timeVerification = 0;
3040     SEQAN_PROTIMESTART(find_time);
3041 
3042     // We collect the matches in a more compact data structure than the
3043     // AlignedReadStoreElement from FragmentStore.
3044     String<TMatchRecord> matches;
3045 
3046     // iterate over genome sequences
3047     for (unsigned contigId = 0; contigId < length(store.contigStore); ++contigId)
3048     {
3049         // lock to prevent releasing and loading the same contig twice
3050         // (once per _mapSingleReadsToContig call)
3051         lockContig(store, contigId);
3052 #ifndef RAZERS_WINDOW
3053         if (options.forward)
3054             _mapSingleReadsToContig(matches, store, contigId, filterPattern, cnts, 'F', options, mode);
3055         if (options.reverse)
3056             _mapSingleReadsToContig(matches, store, contigId, filterPattern, cnts, 'R', options, mode);
3057 #else
3058         if (options.forward)
3059             _mapSingleReadsToContigWindow(store, contigId, filterPattern, cnts, 'F', options, mode);
3060         if (options.reverse)
3061             _mapSingleReadsToContigWindow(store, contigId, filterPattern, cnts, 'R', options, mode);
3062 #endif
3063         unlockAndFreeContig(store, contigId);
3064     }
3065 
3066     options.timeMapReads = SEQAN_PROTIMEDIFF(find_time);
3067     if (options._debugLevel >= 1)
3068         std::cerr << std::endl << "Finding reads took               \t" << options.timeMapReads << " seconds" << std::endl;
3069 
3070     double beginCopyTime = sysTime();
3071     // Final mask duplicates and compact matches.
3072     Nothing nothing;
3073     if (IsSameType<TGapMode, RazerSGapped>::VALUE || options.threshold == 0)
3074         maskDuplicates(matches, options, mode);
3075     compactMatches(matches, cnts, options, mode, nothing, COMPACT_FINAL);
3076     // Write back to store.
3077     reserve(store.alignedReadStore, length(matches), Exact());
3078     reserve(store.alignQualityStore, length(matches), Exact());
3079     typedef typename Iterator<String<TMatchRecord>, Standard>::Type TIterator;
3080     typedef typename Value<typename TFragmentStore::TAlignedReadStore>::Type TAlignedReadStoreElem;
3081     typedef typename Value<typename TFragmentStore::TAlignQualityStore>::Type TAlignedQualStoreElem;
3082     for (TIterator it = begin(matches), itEnd = end(matches); it != itEnd; ++it)
3083     {
3084         SEQAN_ASSERT_NEQ(it->orientation, '-');
3085         SEQAN_ASSERT_LEQ(it->beginPos, it->endPos);
3086         if (it->orientation == 'R')
3087             std::swap(it->beginPos, it->endPos);
3088         appendValue(store.alignedReadStore, TAlignedReadStoreElem(length(store.alignQualityStore), it->readId, it->contigId, it->beginPos, it->endPos));
3089         appendValue(store.alignQualityStore, TAlignedQualStoreElem(it->pairScore, it->score, -it->score));
3090     }
3091     options.timeFsCopy = sysTime() - beginCopyTime;
3092 
3093     if (options._debugLevel >= 1)
3094     {
3095         std::cerr << "Masking duplicates took          \t" << options.timeMaskDuplicates << " seconds" << std::endl;
3096         std::cerr << "Compacting matches took          \t" << options.timeCompactMatches << " seconds" << std::endl;
3097         std::cerr << "Building q-gram index took       \t" << options.timeBuildQGramIndex << " seconds" << std::endl;
3098         std::cerr << "Time for copying back            \t" << options.timeFsCopy << " seconds" << std::endl;
3099         std::cerr << "Time for filtration              \t" << options.timeFiltration << " seconds" << std::endl;
3100         std::cerr << "Time for verifications           \t" << options.timeVerification << " seconds" << std::endl;
3101         std::cerr << std::endl;
3102         std::cerr << "___FILTRATION_STATS____" << std::endl;
3103         std::cerr << "Filtration counter:      " << options.countFiltration << std::endl;
3104         std::cerr << "Successful verifications: " << options.countVerification << std::endl;
3105     }
3106     return 0;
3107 }
3108 
3109 //////////////////////////////////////////////////////////////////////////////
3110 // Wrapper for SWIFT (default)
3111 template <
3112     typename TFSSpec,
3113     typename TFSConfig,
3114     typename TCounts,
3115     typename TSpec,
3116     typename TShape,
3117     typename TAlignMode,
3118     typename TGapMode,
3119     typename TScoreMode,
3120     typename TMatchNPolicy,
3121     typename TFilterSpec>
3122 int _mapSingleReads(
3123     FragmentStore<TFSSpec, TFSConfig> & store,
3124     TCounts & cnts,
3125     RazerSCoreOptions<TSpec> & options,
3126     TShape const & shape,
3127     RazerSMode<TAlignMode, TGapMode, TScoreMode, TMatchNPolicy> const & mode,
3128     TFilterSpec)
3129 {
3130     typedef FragmentStore<TFSSpec, TFSConfig>           TFragmentStore;
3131     typedef typename TFragmentStore::TReadSeqStore      TReadSeqStore;
3132 #ifndef RAZERS_OPENADDRESSING
3133     typedef Index<TReadSeqStore, IndexQGram<TShape> >   TIndex;         // q-gram index
3134 #else
3135     typedef Index<TReadSeqStore, IndexQGram<TShape, OpenAddressing> >   TIndex;
3136 #endif
3137 
3138     // configure q-gram index
3139     TIndex swiftIndex(store.readSeqStore, shape);
3140 #ifdef RAZERS_OPENADDRESSING
3141     swiftIndex.alpha = options.loadFactor;
3142 #endif
3143     cargo(swiftIndex).abundanceCut = options.abundanceCut;
3144     cargo(swiftIndex)._debugLevel = options._debugLevel;
3145 
3146     return _mapSingleReads(store, cnts, options, mode, swiftIndex, TFilterSpec());
3147 }
3148 
3149 //////////////////////////////////////////////////////////////////////////////
3150 // Wrapper for SWIFT with Micro RNA
3151 template <
3152     typename TFSSpec,
3153     typename TFSConfig,
3154     typename TCounts,
3155     typename TSpec,
3156     typename TShape,
3157     typename TGapMode,
3158     typename TScoreMode,
3159     typename TMatchNPolicy,
3160     typename TFilterSpec>
3161 int _mapSingleReads(
3162     FragmentStore<TFSSpec, TFSConfig> & store,
3163     TCounts & cnts,
3164     RazerSCoreOptions<TSpec> & options,
3165     TShape const & shape,
3166     RazerSMode<RazerSPrefix, TGapMode, TScoreMode, TMatchNPolicy> const & mode,
3167     TFilterSpec)
3168 {
3169     typedef FragmentStore<TFSSpec, TFSConfig>               TFragmentStore;
3170     typedef typename TFragmentStore::TReadSeqStore          TReadSeqStore;
3171 
3172 //	typedef typename Value<TReadSeqStore>::Type				TRead;
3173 //	typedef typename Infix<TRead>::Type						TReadInfix;
3174 //	typedef StringSet<TReadInfix>							TReadSet;
3175     typedef TReadSeqStore                                   TReadSet;
3176     typedef Index<TReadSet, IndexQGram<TShape> >            TIndex;         // q-gram index
3177 
3178 //	TReadSet readSet;
3179 //	unsigned readCount = length(store.readSeqStore);
3180 //	resize(readSet, readCount, Exact());
3181 //
3182 //	for (unsigned i = 0; i < readCount; ++i)
3183 //		assign(readSet[i], prefix(store.readSeqStore[i], _min(length(store.readSeqStore[i]), options.prefixSeedLength)));
3184 //
3185 //	// configure q-gram index
3186 //	TIndex swiftIndex(readSet, shape);
3187     TIndex swiftIndex(store.readSeqStore, shape);
3188     cargo(swiftIndex).abundanceCut = options.abundanceCut;
3189     cargo(swiftIndex)._debugLevel = options._debugLevel;
3190 
3191     return _mapSingleReads(store, cnts, options, mode, swiftIndex, TFilterSpec());
3192 }
3193 
3194 //////////////////////////////////////////////////////////////////////////////
3195 // Wrapper for different filters specs
3196 template <
3197     typename TFSSpec,
3198     typename TFSConfig,
3199     typename TCounts,
3200     typename TSpec,
3201     typename TShape,
3202     typename TAlignMode,
3203     typename TGapMode,
3204     typename TScoreMode,
3205     typename TMatchNPolicy>
3206 int _mapSingleReads(
3207     FragmentStore<TFSSpec, TFSConfig> & store,
3208     TCounts & cnts,
3209     RazerSCoreOptions<TSpec> & options,
3210     TShape const & shape,
3211     RazerSMode<TAlignMode, TGapMode, TScoreMode, TMatchNPolicy> const & mode)
3212 {
3213     if (options.threshold > 0)
3214     {
3215         typedef typename If<IsSameType<TGapMode, RazerSGapped>, SwiftSemiGlobal, SwiftSemiGlobalHamming>::Type TSwiftSpec;
3216         return _mapSingleReads(store, cnts, options, shape, mode, Swift<TSwiftSpec>());
3217     }
3218     else
3219     {
3220         typedef typename If<IsSameType<TGapMode, RazerSGapped>, void, Hamming_>::Type TPigeonholeSpec;
3221         return _mapSingleReads(store, cnts, options, Shape<Dna, OneGappedShape>(), mode, Pigeonhole<TPigeonholeSpec>());
3222     }
3223 }
3224 
3225 //////////////////////////////////////////////////////////////////////////////
3226 // Wrapper for different filters specs
3227 template <
3228     typename TFSSpec,
3229     typename TFSConfig,
3230     typename TCounts,
3231     typename TSpec,
3232     typename TShape,
3233     typename TAlignMode,
3234     typename TGapMode,
3235     typename TScoreMode,
3236     typename TMatchNPolicy>
3237 int _mapMatePairReads(
3238     FragmentStore<TFSSpec, TFSConfig> & store,
3239     TCounts & cnts,
3240     RazerSCoreOptions<TSpec> & options,
3241     TShape const & shape,
3242     RazerSMode<TAlignMode, TGapMode, TScoreMode, TMatchNPolicy> const & mode)
3243 {
3244     if (options.threshold > 0)
3245     {
3246         typedef typename If<IsSameType<TGapMode, RazerSGapped>, SwiftSemiGlobal, SwiftSemiGlobalHamming>::Type TSwiftSpec;
3247         return _mapMatePairReads(store, cnts, options, shape, mode, Swift<TSwiftSpec>());
3248     }
3249     else
3250     {
3251         typedef typename If<IsSameType<TGapMode, RazerSGapped>, void, Hamming_>::Type TPigeonholeSpec;
3252         return _mapMatePairReads(store, cnts, options, Shape<Dna, OneGappedShape>(), mode, Pigeonhole<TPigeonholeSpec>());
3253     }
3254 }
3255 
3256 //////////////////////////////////////////////////////////////////////////////
3257 // Wrapper for single/mate-pair mapping
3258 template <
3259     typename TFSSpec,
3260     typename TFSConfig,
3261     typename TCounts,
3262     typename TSpec,
3263     typename TShape,
3264     typename TRazerSMode>
3265 int _mapReads(
3266     FragmentStore<TFSSpec, TFSConfig> & store,
3267     TCounts & cnts,
3268     RazerSCoreOptions<TSpec> & options,
3269     TShape const & shape,
3270     TRazerSMode                       const & mode)
3271 {
3272     //typedef FragmentStore<TFSSpec, TFSConfig>           TFragmentStore;
3273     //typedef typename TFragmentStore::TReadSeqStore      TReadSeqStore;
3274     //typedef typename Value<TReadSeqStore>::Type         TRead;
3275     //typedef Pattern<TRead const, MyersUkkonen>          TMyersPattern;  // verifier
3276 
3277 
3278     options.dRange = options.scoreDistanceRange;
3279     if (options.dRange == 0)
3280         options.dRange = 1 << 30;
3281     if (options.maxHits == 1 && !options.purgeAmbiguous)
3282         options.dRange = -1;
3283     else
3284         --options.dRange;
3285 
3286 
3287     int readCount = length(store.readSeqStore);
3288     resize(options.errorCutOff, readCount, Exact());
3289 
3290 #ifndef RAZERS_BANDED_MYERS
3291     if (options.gapMode == RAZERS_GAPPED)
3292         resize(options.forwardPatterns, readCount, Exact());
3293 #endif
3294 
3295     // set absolute error cut-offs
3296     Dna5String tmp;
3297 
3298     SEQAN_OMP_PRAGMA(parallel for private(tmp))
3299     for (int i = 0; i < readCount; ++i)
3300     {
3301         unsigned err = (unsigned)(length(store.readSeqStore[i]) * options.errorRate);
3302         options.errorCutOff[i] = (err < 255) ? err + 1 : 255;
3303 
3304         // optionally compute preprocessing information
3305 #ifndef RAZERS_BANDED_MYERS
3306         if (!empty(store.readSeqStore[i]))
3307             continue;
3308         _patternMatchNOfPattern(options.forwardPatterns[i], options.matchN);
3309         _patternMatchNOfFinder(options.forwardPatterns[i], options.matchN);
3310 
3311 #ifdef RAZERS_NOOUTERREADGAPS
3312         if (options.libraryLength >= 0 && (i & 1) == 1)
3313         {
3314             tmp = store.readSeqStore[i];
3315             reverseComplement(tmp);
3316             setHost(options.forwardPatterns[i], prefix(tmp, length(tmp) - 1));
3317         }
3318         else
3319             setHost(options.forwardPatterns[i], prefix(store.readSeqStore[i], length(store.readSeqStore[i]) - 1));
3320 #else
3321         if (options.libraryLength >= 0 && (i & 1) == 1)
3322         {
3323             tmp = store.readSeqStore[i];
3324             reverseComplement(tmp);
3325             setHost(options.forwardPatterns[i], tmp);
3326         }
3327         else
3328             setHost(options.forwardPatterns[i], store.readSeqStore[i]);
3329 #endif
3330 #endif  // #ifdef RAZERS_BANDED_MYERS
3331     }
3332 
3333 #ifdef _OPENMP
3334     if (options.threadCount == 0 || length(store.readNameStore) < MIN_PARALLEL_WORK)
3335 #endif
3336     {
3337         // Sequential RazerS
3338         #ifdef RAZERS_MATEPAIRS
3339         if (options.libraryLength >= 0)
3340             return _mapMatePairReads(store, cnts, options, shape, mode);
3341         else
3342         #endif  // #ifndef RAZERS_MATEPAIRS
3343         return _mapSingleReads(store, cnts, options, shape, mode);
3344     }
3345 #ifdef _OPENMP
3346     else
3347     {
3348         // Parallel RazerS
3349         #ifdef RAZERS_MATEPAIRS
3350         if (options.libraryLength >= 0)
3351             return _mapMatePairReadsParallel(store, cnts, options, shape, mode);
3352         else
3353         #endif  // #ifndef RAZERS_MATEPAIRS
3354         return _mapSingleReadsParallel(store, cnts, options, shape, mode);
3355     }
3356 #endif
3357 }
3358 
3359 //////////////////////////////////////////////////////////////////////////////
3360 // Wrapper for different shapes
3361 template <typename TFSSpec, typename TFSConfig, typename TCounts, typename TSpec, typename TRazersMode>
3362 int _mapReads(
3363     FragmentStore<TFSSpec, TFSConfig> & store,
3364     TCounts & cnts,
3365     RazerSCoreOptions<TSpec> & options,
3366     TRazersMode                       const & mode)
3367 {
3368     Shape<Dna, SimpleShape>     ungapped;
3369     Shape<Dna, OneGappedShape>  onegapped;
3370     Shape<Dna, GenericShape>    gapped;
3371 
3372     // 2x3 SPECIALIZATION
3373 
3374     // select best-fitting shape
3375     if (stringToShape(ungapped, options.shape))
3376         return _mapReads(store, cnts, options, ungapped, mode);
3377 
3378 //	if (stringToShape(onegapped, options.shape))
3379 //		return _mapReads(store, cnts, options, onegapped, mode);
3380     if (stringToShape(gapped, options.shape))
3381         return _mapReads(store, cnts, options, gapped, mode);
3382 
3383     return RAZERS_INVALID_SHAPE;
3384 }
3385 
3386 //////////////////////////////////////////////////////////////////////////////
3387 // Wrapper for different score modes
3388 template <typename TFSSpec, typename TFSConfig, typename TCounts, typename TSpec, typename TAlignMode, typename TGapMode, typename TMatchNPolicy>
3389 int _mapReads(
3390     FragmentStore<TFSSpec, TFSConfig> & store,
3391     TCounts & cnts,
3392     RazerSCoreOptions<TSpec> & options,
3393     RazerSMode<TAlignMode, TGapMode, Nothing, TMatchNPolicy> const)
3394 {
3395     if (options.scoreMode == RAZERS_ERRORS)
3396         return _mapReads(store, cnts, options, RazerSMode<TAlignMode, TGapMode, RazerSErrors, TMatchNPolicy>());
3397 
3398 /*	if (options.scoreMode == RAZERS_SCORE)
3399         return _mapReads(store, cnts, options, RazerSMode<TAlignMode, TGapMode, RazerSScore, TMatchNPolicy>());
3400     if (options.scoreMode == RAZERS_QUALITY)
3401         return _mapReads(store, cnts, options, RazerSMode<TAlignMode, TGapMode, RazerSQuality<>, TMatchNPolicy>());
3402 */  return RAZERS_INVALID_OPTIONS;
3403 }
3404 
3405 //////////////////////////////////////////////////////////////////////////////
3406 // Wrapper for different gap and align modes
3407 template <typename TFSSpec, typename TFSConfig, typename TCounts, typename TSpec>
3408 int _mapReads(
3409     FragmentStore<TFSSpec, TFSConfig> & store,
3410     TCounts & cnts,
3411     RazerSCoreOptions<TSpec> & options)
3412 {
3413     if (options.matchN)
3414     {
3415         if (options.gapMode == RAZERS_GAPPED)
3416         {
3417 //             if (options.alignMode == RAZERS_LOCAL)
3418 //                 return _mapReads(store, cnts, options, RazerSMode<RazerSLocal, RazerSGapped, Nothing, NMatchesAll_>());
3419 //             if (options.alignMode == RAZERS_PREFIX)
3420 //                 return _mapReads(store, cnts, options, RazerSMode<RazerSPrefix, RazerSGapped, Nothing, NMatchesAll_>());
3421             if (options.alignMode == RAZERS_GLOBAL)
3422                 return _mapReads(store, cnts, options, RazerSMode<RazerSGlobal, RazerSGapped, Nothing, NMatchesAll_>());
3423         }
3424         else
3425         {
3426 //             if (options.alignMode == RAZERS_LOCAL)
3427 //                 return _mapReads(store, cnts, options, RazerSMode<RazerSLocal, RazerSUngapped, Nothing, NMatchesAll_>());
3428 //             if (options.alignMode == RAZERS_PREFIX)
3429 //                 return _mapReads(store, cnts, options, RazerSMode<RazerSPrefix, RazerSUngapped, Nothing, NMatchesAll_>());
3430             if (options.alignMode == RAZERS_GLOBAL)
3431                 return _mapReads(store, cnts, options, RazerSMode<RazerSGlobal, RazerSUngapped, Nothing, NMatchesAll_>());
3432         }
3433     }
3434     else
3435     {
3436         if (options.gapMode == RAZERS_GAPPED)
3437         {
3438             //     if (options.alignMode == RAZERS_LOCAL)
3439             //         return _mapReads(store, cnts, options, RazerSMode<RazerSLocal, RazerSGapped, Nothing, NMatchesNone_>());
3440             //     if (options.alignMode == RAZERS_PREFIX)
3441             //         return _mapReads(store, cnts, options, RazerSMode<RazerSPrefix, RazerSGapped, Nothing, NMatchesNone_>());
3442             if (options.alignMode == RAZERS_GLOBAL)
3443                 return _mapReads(store, cnts, options, RazerSMode<RazerSGlobal, RazerSGapped, Nothing, NMatchesNone_>());
3444         }
3445         else
3446         {
3447             // if (options.alignMode == RAZERS_LOCAL)
3448             //     return _mapReads(store, cnts, options, RazerSMode<RazerSLocal, RazerSUngapped, Nothing, NMatchesNone_>());
3449             // if (options.alignMode == RAZERS_PREFIX)
3450             //     return _mapReads(store, cnts, options, RazerSMode<RazerSPrefix, RazerSUngapped, Nothing, NMatchesNone_>());
3451             if (options.alignMode == RAZERS_GLOBAL)
3452                 return _mapReads(store, cnts, options, RazerSMode<RazerSGlobal, RazerSUngapped, Nothing, NMatchesNone_>());
3453         }
3454     }
3455     return RAZERS_INVALID_OPTIONS;
3456 }
3457 
3458 }
3459 
3460 #endif
3461