1  /*==========================================================================
2              RazerS - Fast Read Mapping with Controlled Loss Rate
3                    http://www.seqan.de/projects/razers.html
4 
5  ============================================================================
6   Copyright (C) 2008 by David Weese
7 
8   This program is free software; you can redistribute it and/or
9   modify it under the terms of the GNU Lesser General Public
10   License as published by the Free Software Foundation; either
11   version 3 of the License, or (at your options) any later version.
12 
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17 
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  ==========================================================================*/
21 
22 #ifndef SEQAN_HEADER_OUTPUT_FORMAT_H
23 #define SEQAN_HEADER_OUTPUT_FORMAT_H
24 
25 #include <iostream>
26 #include <fstream>
27 #include <sstream>
28 
29 #include "razers.h"
30 #include <seqan/align.h>
31 
32 namespace seqan
33 {
34 
35 //////////////////////////////////////////////////////////////////////////////
36 // Quality-based score
37 
38 	template <typename TQualityString = CharString>
39 	struct Quality;
40 
41 	template <typename TValue, typename TQualityString>
42 	class Score<TValue, Quality<TQualityString> >
43 	{
44 	public:
45 		TValue data_match;
46 		TValue data_mismatch;
47 		TValue data_gap_extend;
48 		TValue data_gap_open;
49 
50 		TQualityString const *data_qual;
51 
52 	public:
Score()53 		Score():
54 			data_match(0),
55 			data_mismatch(-1),
56 			data_gap_extend(-1),
57 			data_gap_open(-1),
58 			data_qual(NULL)
59 		{
60 		}
Score(TValue _match,TValue _mismatch,TValue _gap)61 		Score(TValue _match, TValue _mismatch, TValue _gap):
62 			data_match(_match),
63 			data_mismatch(_mismatch),
64 			data_gap_extend(_gap),
65 			data_gap_open(_gap),
66 			data_qual(NULL)
67 		{
68 		}
Score(TValue _match,TValue _mismatch,TValue _gap_extend,TValue _gap_open,TQualityString const & _qual)69 		Score(TValue _match, TValue _mismatch, TValue _gap_extend, TValue _gap_open, TQualityString const &_qual):
70 			data_match(_match),
71 			data_mismatch(_mismatch),
72 			data_gap_extend(_gap_extend),
73 			data_gap_open(_gap_open),
74 			data_qual(&_qual)
75 		{
76 		}
77 
Score(Score const & other)78 		Score(Score const & other):
79 			data_match(other.data_match),
80 			data_mismatch(other.data_mismatch),
81 			data_gap_extend(other.data_gap_extend),
82 			data_gap_open(other.data_gap_open),
83 			data_qual(other.data_qual)
84 		{
85 		}
~Score()86 		~Score()
87 		{
88 		}
89 
90 		Score & operator = (Score const & other)
91 		{
92 			data_match = other.data_match;
93 			data_mismatch = other.data_mismatch;
94 			data_gap_extend = other.data_gap_extend;
95 			data_gap_open = other.data_gap_open;
96 			data_qual = other.data_qual;
97 			return *this;
98 		}
99 	};
100 
101 //////////////////////////////////////////////////////////////////////////////
102 
103 template <typename TValue, typename TQualityString, typename TPos1, typename TPos2, typename TSeq1, typename TSeq2>
104 inline TValue
score(Score<TValue,Quality<TQualityString>> const & me,TPos1 pos1,TPos2 pos2,TSeq1 const & seq1,TSeq2 const & seq2)105 score(Score<TValue, Quality<TQualityString> > const & me,
106 	  TPos1 pos1,
107 	  TPos2 pos2,
108 	  TSeq1 const &seq1,
109 	  TSeq2 const &seq2)
110 {
111 	if (seq1[pos1] != seq2[pos2])
112 		if (me.data_qual)
113 			return (*me.data_qual)[pos2];
114 		else
115 			return scoreMismatch(me);
116 	else
117 		return scoreMatch(me);
118 }
119 
120 
121 //////////////////////////////////////////////////////////////////////////////
122 // Less-operators ...
123 
124 	// ... to sort matches and remove duplicates with equal gBegin
125 	template <typename TReadMatch>
126 	struct LessGPosRNo : public ::std::binary_function < TReadMatch, TReadMatch, bool >
127 	{
operatorLessGPosRNo128 		inline bool operator() (TReadMatch const &a, TReadMatch const &b) const
129 		{
130 			// genome position and orientation
131 			if (a.gseqNo < b.gseqNo) return true;
132 			if (a.gseqNo > b.gseqNo) return false;
133 			if (a.gBegin < b.gBegin) return true;
134 			if (a.gBegin > b.gBegin) return false;
135 			if (a.orientation < b.orientation) return true;
136 			if (a.orientation > b.orientation) return false;
137 
138 			// read number
139 			if (a.rseqNo < b.rseqNo) return true;
140 			if (a.rseqNo > b.rseqNo) return false;
141 
142 			// quality
143 			return a.editDist < b.editDist;
144 		}
145 	};
146 
147 //////////////////////////////////////////////////////////////////////////////
148 // Determine error distribution
149 template <typename TErrDistr, typename TMatches, typename TReads, typename TGenomes, typename TOptions>
150 inline unsigned
getErrorDistribution(TErrDistr & posError,TMatches & matches,TReads & reads,TGenomes & genomes,TOptions & options)151 getErrorDistribution(
152 	TErrDistr &posError,
153 	TMatches &matches,
154 	TReads &reads,
155 	TGenomes &genomes,
156 	TOptions &options)
157 {
158 	typename Iterator<TMatches, Standard>::Type	it = begin(matches, Standard());
159 	typename Iterator<TMatches, Standard>::Type	itEnd = end(matches, Standard());
160 
161 	Dna5String genome;
162 	unsigned unique = 0;
163 	for (; it != itEnd; ++it)
164 	{
165 		if ((*it).orientation == '-') continue;
166 
167 		Dna5String const &read = reads[(*it).rseqNo];
168 		genome = infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd);
169 		if ((*it).orientation == 'R')
170 			reverseComplement(genome);
171 		for (unsigned i = 0; i < length(posError) && i < length(read); ++i)
172 			if ((options.compMask[ordValue(genome[i])] & options.compMask[ordValue(read[i])]) == 0)
173 				++posError[i];
174 		++unique;
175 	}
176 	return unique;
177 }
178 
179 template <typename TErrDistr, typename TCount1, typename TCount2, typename TMatches, typename TReads, typename TGenomes, typename TSpec>
180 inline unsigned
getErrorDistribution(TErrDistr & posError,TCount1 & insertions,TCount2 & deletions,TMatches & matches,TReads & reads,TGenomes & genomes,RazerSOptions<TSpec> & options)181 getErrorDistribution(
182 	TErrDistr &posError,
183 	TCount1 &insertions,
184 	TCount2 &deletions,
185 	TMatches &matches,
186 	TReads &reads,
187 	TGenomes &genomes,
188 	RazerSOptions<TSpec> &options)
189 {
190 	typedef Align<String<Dna5>, ArrayGaps> TAlign;
191 	typedef typename Row<TAlign>::Type TRow;
192 	typedef typename Iterator<TRow>::Type TIter;
193 
194 	//typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
195 	typedef typename Position<TAlign>::Type TPosition;
196 
197 	typename Iterator<TMatches, Standard>::Type	it = begin(matches, Standard());
198 	typename Iterator<TMatches, Standard>::Type	itEnd = end(matches, Standard());
199 
200 	Align<Dna5String, ArrayGaps> align;
201 	Score<int> scoreType = Score<int>(0, -999, -1001, -1000);	// levenshtein-score (match, mismatch, gapOpen, gapExtend)
202 	if (options.hammingOnly)
203 		scoreType.data_mismatch = -1;
204 	resize(rows(align), 2);
205 
206 	unsigned unique = 0;
207 	for (; it != itEnd; ++it)
208 	{
209 		if ((*it).orientation == '-') continue;
210 
211 		assignSource(row(align, 0), reads[(*it).rseqNo]);
212 		assignSource(row(align, 1), infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd));
213 		if ((*it).orientation == 'R')
214 			reverseComplement(source(row(align, 1)));
215 		globalAlignment(align, scoreType);
216 
217 		TRow& row0 = row(align, 0);
218 		TRow& row1 = row(align, 1);
219 
220 		TPosition begin = beginPosition(cols(align));
221 		TPosition end = endPosition(cols(align));
222 
223 		TIter it0 = iter(row0, begin);
224 		TIter it1 = iter(row1, begin);
225 		TIter end0 = iter(row0, end);
226 
227 		unsigned pos = 0;
228 		for (; it0 != end0 && pos < length(posError); ++it0, ++it1)
229 		{
230 			if (isGap(it0))
231 				++insertions;
232 			else
233 			{
234 				if (isGap(it1))
235 					++deletions;
236 				else
237 					if ((options.compMask[ordValue(getValue(it0))] & options.compMask[ordValue(getValue(it1))]) == 0)
238 						++posError[pos];
239 				++pos;
240 			}
241 		}
242 		++unique;
243 	}
244 	return unique;
245 }
246 
247 
248 //////////////////////////////////////////////////////////////////////////////
249 template <typename TFile, typename TSource, typename TSpec, typename TPosition>
250 inline void
dumpAlignment(TFile & target,Align<TSource,TSpec> const & source,TPosition begin_,TPosition end_)251 dumpAlignment(TFile & target, Align<TSource, TSpec> const & source, TPosition begin_, TPosition end_)
252 {
253 	typedef Align<TSource, TSpec> const TAlign;
254 	typedef typename Row<TAlign>::Type TRow;
255 	typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
256 
257 	TRowsPosition row_count = length(rows(source));
258 
259 	// Print sequences
260 	for (TRowsPosition i = 0; i < row_count; ++i)
261 	{
262 		if (i == 0)
263 			target << "#Read:   ";
264 		else
265 			target << "#Genome: ";
266 		TRow& row_ = row(source, i);
267 		typedef typename Iterator<typename Row<TAlign>::Type const>::Type TIter;
268 		TIter begin1_ = iter(row_, begin_);
269 		TIter end1_ = iter(row_, end_);
270 		for (; begin1_ != end1_; ++begin1_) {
271 			if (isGap(begin1_))
272 			    target << gapValue<char>();
273 			else
274 			    target << *begin1_;
275 		}
276 		target << '\n';
277 	}
278 }
279 
280 template<typename TMatches, typename TCounts, typename TOptions>
281 void
countCoocurrences(TMatches & matches,TCounts & cooc,TOptions & options)282 countCoocurrences(TMatches & matches, TCounts & cooc, TOptions & options)
283 {
284 	clear(cooc);
285 	int maxSeedErrors = (int)(options.errorRate * options.artSeedLength) + 1;
286 	resize(cooc,maxSeedErrors+1,0);
287 	for (int i = 0; i < maxSeedErrors+1; ++i)
288 		cooc[i] = 1;
289 
290 	int count = 0;
291 	unsigned readNo = -1;
292 	int preEditDist = -1;
293 	typename Iterator<TMatches>::Type it = begin(matches,Standard());
294 	typename Iterator<TMatches>::Type itEnd = end(matches,Standard());
295 
296 	for(; it != itEnd; ++it)
297 	{
298 		if ((*it).rseqNo == readNo)
299 		{
300 			if(preEditDist > 1) continue;// || dist > options.errorRate * maxReadLength + 1) continue;
301 			int dist = (*it).seedEditDist - preEditDist;
302 			if(dist > maxSeedErrors) continue;
303 			if(dist < 0) ++cooc[0];
304 			else ++cooc[dist];
305 		}
306 		else
307 		{
308 			readNo = (*it).rseqNo;
309 			preEditDist = (*it).seedEditDist;
310 			if(preEditDist <= 1) ++count;
311 		}
312 	}
313 	for (unsigned i = 0; i < length(cooc); ++i)
314 	{
315 		cooc[i] = (int)(-4.343 * log((double)cooc[i]/count) );
316 		if (cooc[i] < 0) cooc[i] = 0;
317 	}
318 	if(options._debugLevel > 1)
319 	{
320 		::std::cerr << "[mapping_count] ";
321 		for(unsigned j = 0; j < length(cooc); ++j)
322 			::std::cerr << cooc[j] << " ";
323 		::std::cerr << ::std::endl;
324 	}
325 
326 }
327 
328 
329 template<typename TAlign, typename TString, typename TIter>
330 void
getCigarLine(TAlign & align,TString & cigar,TString & mutations,TIter ali_it0,TIter ali_it0_stop,TIter ali_it1,TIter ali_it1_stop)331 getCigarLine(TAlign & align, TString & cigar, TString & mutations, TIter ali_it0, TIter ali_it0_stop, TIter ali_it1, TIter ali_it1_stop)
332 {
333 
334 	typedef typename Source<TAlign>::Type TSource;
335 	typedef typename Iterator<TSource, Rooted>::Type TStringIterator;
336 
337 // 	typedef typename Row<TAlign>::Type TRow;
338 // 	typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
339 
340 	TStringIterator readBase = begin(source(row(align,0)));
341 
342 	int readPos = 0;
343 	bool first = true;
344 	int inserted = 0;
345 	int deleted = 0;
346 	while(ali_it0 != ali_it0_stop && ali_it1 != ali_it1_stop)
347 	{
348 		inserted = 0;
349 		deleted = 0;
350 		int matched = 0;
351 		while(ali_it0!=ali_it0_stop && ali_it1!=ali_it1_stop && !isGap(ali_it0)&& !isGap(ali_it1))
352 		{
353 			++readPos;
354 			if(*ali_it1 != *ali_it0)
355 			{
356 				if(first) first = false;
357 				else mutations << ",";
358 				mutations << readPos <<*readBase;
359 			}
360 			++readBase;
361 			++ali_it0;
362 			++ali_it1;
363 			++matched;
364 		}
365 		if(matched>0) cigar << matched<< "M" ;
366 		while(ali_it0!=ali_it0_stop && isGap(ali_it0))
367 		{
368 			++ali_it0;
369 			++ali_it1;
370 			++deleted;
371 		}
372 		if(deleted>0) cigar << deleted << "D";
373 		while(isGap(ali_it1)&& ali_it1!=ali_it1_stop)
374 		{
375 			++ali_it0;
376 			++ali_it1;
377 			++readPos;
378 			if(first) first = false;
379 			else mutations << ",";
380 			mutations << readPos << *readBase;
381 			++readBase;
382 			++inserted;
383 		}
384 		if(inserted>0) cigar << inserted << "I";
385 	}
386 	// end gaps can happen in split mapping
387 	while(ali_it0!=ali_it0_stop)
388 	{
389 		++ali_it0;
390 		++deleted;
391 	}
392 	if(deleted>0) cigar << deleted << "D";
393 	while(ali_it1 != ali_it1_stop)
394 	{
395 		++ali_it1;
396 		++readPos;
397 		if(first) first = false;
398 		else mutations << ",";
399 		mutations << readPos << *readBase;
400 		++inserted;
401 	}
402 	if(inserted>0) cigar << inserted << "I";
403 
404 }
405 
406 template<typename TAlign, typename TIter>
407 void
getCigarLine(TAlign & align,String<Pair<char,int>> & cigar,String<Pair<Dna5,int>> & mutations,int offset,TIter ali_it0,TIter ali_it0_stop,TIter ali_it1,TIter ali_it1_stop)408 getCigarLine(TAlign & align,
409 		String<Pair<char,int> > & cigar,
410 		String<Pair<Dna5,int> > & mutations,
411 		int offset,
412 		TIter ali_it0, TIter ali_it0_stop, TIter ali_it1, TIter ali_it1_stop)
413 {
414 
415 	typedef typename Source<TAlign>::Type TSource;
416 	typedef typename Iterator<TSource, Rooted>::Type TStringIterator;
417 
418 // 	typedef typename Row<TAlign>::Type TRow;
419 // 	typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
420 
421 	TStringIterator readBase = begin(source(row(align,0)));
422 
423 	int readPos = 0;
424 	int inserted = 0;
425 	int deleted = 0;
426  	while(ali_it0 != ali_it0_stop && ali_it1 != ali_it1_stop)
427 	{
428 		inserted = 0;
429 		deleted = 0;
430 		int matched = 0;
431 		while(ali_it0!=ali_it0_stop && ali_it1!=ali_it1_stop && !isGap(ali_it0)&& !isGap(ali_it1))
432 		{
433 			++readPos;
434 			if(*ali_it1 != *ali_it0)
435 				appendValue(mutations,Pair<Dna5,int>((Dna5)(*readBase),readPos + offset));
436 			++readBase;
437 			++ali_it0;
438 			++ali_it1;
439 			++matched;
440 		}
441 		if(matched>0) appendValue(cigar,Pair<char,int>('M',matched));
442 		while(ali_it0!=ali_it0_stop && isGap(ali_it0))
443 		{
444 			++ali_it0;
445 			++ali_it1;
446 			++deleted;
447 		}
448 		if(deleted>0) appendValue(cigar,Pair<char,int>('D',deleted));
449 		while(isGap(ali_it1)&& ali_it1!=ali_it1_stop)
450 		{
451 			++ali_it0;
452 			++ali_it1;
453 			++readPos;
454 			appendValue(mutations,Pair<Dna5,int>((Dna5)(*readBase),readPos + offset));
455 			++readBase;
456 			++inserted;
457 		}
458 		if(inserted>0) appendValue(cigar,Pair<char,int>('I',inserted));
459 	}
460 	// end gaps can happen in split mapping
461 	while(ali_it0!=ali_it0_stop)
462 	{
463 		++ali_it0;
464 		++deleted;
465 	}
466 	if(deleted>0) appendValue(cigar,Pair<char,int>('D',deleted));
467 	while(ali_it1 != ali_it1_stop)
468 	{
469 		++ali_it1;
470 		++readPos;
471 		appendValue(mutations,Pair<Dna5,int>((Dna5)(*readBase),readPos + offset));
472 		++inserted;
473 	}
474 	if(inserted>0) appendValue(cigar,Pair<char,int>('I',inserted + offset));
475 
476 }
477 
478 
479 
480 #ifdef RAZERS_DIRECT_MAQ_MAPPING
481 //////////////////////////////////////////////////////////////////////////////
482 // Assign mapping quality and remove suboptimal matches
483 template < typename TMatches, typename TReads, typename TCooc, typename TCounts, typename TSpec >
assignMappingQuality(TMatches & matches,TReads & reads,TCooc & cooc,TCounts & cnts,RazerSOptions<TSpec> & options)484 void assignMappingQuality(TMatches &matches, TReads & reads, TCooc & cooc, TCounts &cnts, RazerSOptions<TSpec> & options)
485 {
486 	typedef typename Value<TMatches>::Type				TMatch;
487 	typedef typename Iterator<TMatches, Standard>::Type		TIterator;
488 
489 	//matches are already sorted
490 	//::std::sort(
491 	//	begin(matches, Standard()),
492 	//	end(matches, Standard()),
493 	//	LessRNoMQ<TMatch>());
494 
495 
496 	int maxSeedErrors = (int)(options.errorRate*options.artSeedLength)+1;
497 	unsigned readNo = -1;
498 
499 	TIterator it = begin(matches, Standard());
500 	TIterator itEnd = end(matches, Standard());
501 	TIterator dit = it;
502 
503 	int bestQualSum, secondBestQualSum;
504 	int secondBestDist = -1 ,secondBestMatches = -1;
505 	for (; it != itEnd; ++it)
506 	{
507 		if ((*it).orientation == '-') continue;
508 		bool mappingQualityFound = false;
509 		int mappingQuality = 0;
510 		int qualTerm1,qualTerm2;
511 
512 		readNo = (*it).rseqNo;
513 		bestQualSum = (*it).mScore;
514 
515 		if(++it!=itEnd && (*it).rseqNo==readNo)
516 		{
517 			secondBestQualSum = (*it).mScore;
518 			secondBestDist = (*it).editDist;
519 			secondBestDist = (*it).editDist;
520 			secondBestMatches = cnts[1][readNo] >> 5;
521 //CHECKcnts		secondBestMatches = cnts[secondBestDist][readNo];
522 //			secondBestMatches = cnts[secondBestDist][readNo];
523 			(*it).orientation = '-';
524 		//	if(secondBestDist<=bestDist) unique=0;
525 		}
526 		else secondBestQualSum = -1000;
527 		--it; //it is now at best match of current rseqNo
528 
529 		int bestDist = (*it).editDist;
530 		int kPrime = (*it).seedEditDist;
531 		if((bestQualSum==secondBestQualSum) || (kPrime>maxSeedErrors))
532 			mappingQualityFound = true;   //mq=0
533 		else{
534 			if(secondBestQualSum == -1000) qualTerm1 = 99;
535 			else
536 			{
537 				qualTerm1 = (int)(secondBestQualSum - bestQualSum - 4.343 * log((double)secondBestMatches));
538 				//if (secondBestKPrime - kPrime <= 1 && qualTerm1 > options.mutationRateQual) qualTerm1 = options.mutationRateQual; //TODO abchecken was mehr sinn macht
539 				if (secondBestDist - bestDist <= 1 && qualTerm1 > options.mutationRateQual) qualTerm1 = options.mutationRateQual;
540 			}
541 			float avgSeedQual = 0.0;
542 			if(!mappingQualityFound)
543 			{
544 				//TODO!!! generalize and adapt to razers lossrates
545 				// lossrate 0.42 => -10 log10(0.42) = 4
546 				int kPrimeLoss = 4; // options.kPrimeLoss; // bezieht sich auf 3 fehler in 28bp
547 				qualTerm2 = kPrimeLoss + cooc[maxSeedErrors-kPrime];
548 
549 				for(unsigned j = 0; j<options.artSeedLength; ++j)
550 				{
551 					int q = getQualityValue(reads[readNo][j]);//(int)((unsigned char)(reads[readNo][j])>>3);
552 					if(q>options.mutationRateQual) q = options.mutationRateQual;
553 					avgSeedQual+=q;
554 				}
555 				avgSeedQual/=options.artSeedLength;
556 				//-10 log10(28-2) = 14;
557 				//generalize to -10 log10(artSeedLength - maxSeedErrors +1 ) // 14 fits for seedlength 28 to 32 with 2 errors
558 				if(avgSeedQual>14) qualTerm2 += (int)((maxSeedErrors-kPrime)*(avgSeedQual-14));
559 			}
560 		}
561 		if (!mappingQualityFound) mappingQuality = (qualTerm1<qualTerm2) ? qualTerm1:qualTerm2;
562 		if (mappingQuality < 0) mappingQuality = 0;
563 		(*it).mScore = mappingQuality;
564 
565 		*dit = *it;
566 	//	if(secondBestQualSum != -1000) ++it;
567 		++dit;
568 	}
569 	resize(matches, dit - begin(matches, Standard()));
570 }
571 #endif
572 
573 
574 //////////////////////////////////////////////////////////////////////////////
575 // Dump an alignment
576 template <typename TFile, typename TSource, typename TSpec>
577 inline void
dumpAlignment(TFile & target,Align<TSource,TSpec> const & source)578 dumpAlignment(TFile & target, Align<TSource, TSpec> const & source)
579 {
580 	typedef Align<TSource, TSpec> const TAlign;
581 	typedef typename Row<TAlign>::Type TRow;
582 	typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
583 	typedef typename Position<TAlign>::Type TPosition;
584 
585 	TRowsPosition row_count = length(rows(source));
586 	TPosition begin_ = beginPosition(cols(source));
587 	TPosition end_ = endPosition(cols(source));
588 
589 	// Print sequences
590 	for(TRowsPosition i=0;i<row_count;++i) {
591 		if (i == 0)
592 			streamPut(target, "#Read:   ");
593 		else
594 			streamPut(target, "#Genome: ");
595 		TRow& row_ = row(source, i);
596 		typedef typename Iterator<typename Row<TAlign>::Type const>::Type TIter;
597 		TIter begin1_ = iter(row_, begin_);
598 		TIter end1_ = iter(row_, end_);
599 		for (; begin1_ != end1_; ++begin1_) {
600 			if (isGap(begin1_))
601 			    streamPut(target, gapValue<char>());
602 			else
603 			    streamPut(target, *begin1_);
604 		}
605 		streamPut(target, '\n');
606 	}
607 }
608 
609 template<typename TMatches, typename TReads, typename TReadNames, typename TSpec>
610 void
dumpUnmappedReads(TMatches & matches,TReads const & reads,TReadNames const & readIDs,RazerSOptions<TSpec> & options)611 dumpUnmappedReads(TMatches				&matches,
612 					TReads const		&reads,
613 					TReadNames const	&readIDs,
614 					RazerSOptions<TSpec> &options)
615 {
616 	typedef typename Iterator<TMatches>::Type TIterator;
617 
618 	::std::ofstream file;
619 	CharString fileName = options.outputUnmapped;
620 	if (empty(fileName))
621 		return;
622 
623 	file.open(toCString(fileName), ::std::ios_base::out | ::std::ios_base::trunc);
624 	if (!file.is_open()) {
625 		::std::cerr << "Failed to open output file" << ::std::endl;
626 		return;
627 	}
628 
629 	char headerChar = '>';
630 	if(options.readsWithQualities || options.fastaIdQual)
631 		headerChar = '@';
632 
633 	// sort according to read number
634 	::std::sort(begin(matches, Standard()), end(matches, Standard()), LessRNoGPos<TMatch>());
635 
636 	TIterator mIt = begin(matches,Standard());
637 	TIterator mEnd = end(matches,Standard());
638 
639 	unsigned readNo = 0;
640 	// output reads without any matches
641 	while(mIt != mEnd && readNo < length(reads))
642 	{
643 		while((*mIt).rseqNo > readNo)
644 		{
645 //			std::cout << "readNo=" << readNo << " + ";
646 			file << headerChar << readIDs[readNo] << std::endl << reads[readNo] << std::endl;
647 			if(options.readsWithQualities || options.fastaIdQual)
648 			{
649 				file << '+' << std::endl;
650 				for(unsigned j=0;j<length(reads[readNo]);++j)
651 					file << (char)(getQualityValue(reads[readNo][j])+ 33);
652 				file << std::endl;
653 			}
654 			++readNo;
655 		}
656 		while((*mIt).rseqNo == readNo && mIt != mEnd)
657 		{
658 			++mIt;
659 		}
660 		++readNo;
661 	}
662 //	std::cout << std::endl << "readNoInBetween=" << readNo << " + " << std::endl;
663 
664 	while(readNo < length(reads))
665 	{
666 //		std::cout << "readNo=" << readNo << " + ";
667 		file << headerChar << readIDs[readNo] << std::endl << reads[readNo] << std::endl;
668 		if(options.readsWithQualities || options.fastaIdQual)
669 		{
670 			file << '+' << std::endl;
671 			for(unsigned j=0;j<length(reads[readNo]);++j)
672 				file << (char)(getQualityValue(reads[readNo][j])+ 33);
673 			file << std::endl;
674 		}
675 		++readNo;
676 	}
677 	return;
678 
679 
680 }
681 
682 
683 //////////////////////////////////////////////////////////////////////////////
684 // Output matches
685 template <
686 	typename TMatches,
687 	typename TGenomeNames,
688 	typename TReads,
689 	typename TReadRegions,
690 	typename TReadNames,
691 	typename TCounts,
692 	typename TSpec
693 >
dumpMatches(TMatches & matches,TGenomeNames const & genomeIDs,StringSet<CharString> & genomeFileNameList,::std::map<unsigned,::std::pair<::std::string,unsigned>> & gnoToFileMap,TReads const & reads,TReadRegions const & readRegions,TCounts & stats,TReadNames const & readIDs,CharString readFName,CharString errorPrbFileName,RazerSOptions<TSpec> & options)694 void dumpMatches(
695 	TMatches &matches,							// forward/reverse matches
696 	TGenomeNames const &genomeIDs,				// Genome names (read from Fasta file, currently unused)
697 	StringSet<CharString> &genomeFileNameList,	// list of genome names (e.g. {"hs_ref_chr1.fa","hs_ref_chr2.fa"})
698 	::std::map<unsigned,::std::pair< ::std::string,unsigned> > &gnoToFileMap, //map to retrieve genome filename and sequence number within that file
699 	TReads const &reads,
700 	TReadRegions const &readRegions,
701 	TCounts & stats,							// Match statistics (possibly empty)
702 	TReadNames const &readIDs,					// Read names (read from Fasta file, currently unused)
703 	CharString readFName,					// read name (e.g. "reads.fa"), used for file/read naming
704 	CharString errorPrbFileName,
705 	RazerSOptions<TSpec> &options)
706 {
707 	typedef typename Value<TMatches>::Type		TMatch;
708 	//typedef typename Value<TReads>::Type		TRead;
709 	//typedef typename Value<TGenomeSet>::Type	TGenome;
710 	typedef typename TMatch::TGPos				TGPos;
711 
712 	//dump umapped reads in a separate file if filename was specified
713 	dumpUnmappedReads(matches, reads, readIDs, options);
714 
715 	if (options.minMatchLen > 0 && options.outputFormat != 4) options.outputFormat = 33;
716 
717 	if (options.outputFormat == 2)
718 	{
719 		options.sortOrder = 0;		// ... to count multiple matches
720 	}
721 
722 	if (options.outputFormat == 2)
723 	{
724 		options.maxHits = 1;		// Eland outputs at most one match
725 		options.sortOrder = 0;		// read numbers are increasing
726 		options.positionFormat = 1;	// bases in file are numbered starting at 1
727 		options.dumpAlignment = options.hammingOnly;
728 	}
729 #ifdef RAZERS_DIRECT_MAQ_MAPPING
730 	if (options.maqMapping) options.outputFormat = 3;
731 	int maxSeedErrors = (int)(options.errorRate * options.artSeedLength); //without + 1 here, used to check whether match should be supported if noBelowIdentity option is switched on
732 #endif
733 
734 	if (options.outputFormat == 3)
735 	{
736 		options.sortOrder = 1;		//  sort according to gPos
737 		options.positionFormat = 1;	// bases in file are numbered starting at 1
738 		//options.dumpAlignment = false;
739 	}
740 	if (options.outputFormat == 33 || options.outputFormat == 4)
741 	{
742 		options.positionFormat = 1;	// bases in file are numbered starting at 1
743 		options.dumpAlignment = false;
744 		options.sortOrder = 1;
745 	}
746 
747 
748 	// error profile
749 	unsigned maxReadLength = options.maxReadLength;
750 
751 	SEQAN_PROTIMESTART(dump_time);
752 
753 	// load Genome sequences for alignment dumps
754 	TGenomeSet genomes;
755 	if ((options.outputFormat == 0 && !options.hammingOnly) || options.dumpAlignment || !empty(errorPrbFileName) || options.outputFormat == 33 || options.outputFormat == 4)
756 		if (!loadGenomes(genomes, genomeFileNameList)) {
757 			::std::cerr << "Failed to load genomes" << ::std::endl;
758 			options.dumpAlignment = false;
759 		}
760 
761 	// how many 0's should be padded?
762 	int pzeros = 0;
763 	for (unsigned l = length(reads); l > 9; l = l / 10)
764 		++pzeros;
765 
766 	int gzeros = 0;
767 	for (unsigned l = length(genomes); l > 9; l = l / 10)
768 		++gzeros;
769 
770 	// remove the directory prefix of readFName
771 	::std::string _readName;
772 	assign(_readName, readFName);
773 	size_t lastPos = _readName.find_last_of('/') + 1;
774 	if (lastPos == _readName.npos) lastPos = _readName.find_last_of('\\') + 1;
775 	if (lastPos == _readName.npos) lastPos = 0;
776 	CharString readName = _readName.substr(lastPos);
777 
778 
779 	typedef Align<String<Dna5>, ArrayGaps> TAlign;
780 	TAlign align, alignL, alignR;
781 	Score<int> scoreType = Score<int>(0, -999, -1001, -1000);	// levenshtein-score (match, mismatch, gapOpen, gapExtend)
782 //	Score<int> scoreType(0,-1,-1,-1);
783 
784 	if (options.hammingOnly)
785 		scoreType.data_mismatch = -1;
786 	resize(rows(align), 2);
787 	resize(rows(alignL), 2);
788 	resize(rows(alignR), 2);
789 
790     VirtualStream<char, Output> file;
791     bool success;
792     if (!isEqual(options.output, "-"))
793         success = open(file, toCString(options.output));
794     else
795         success = open(file, std::cout, Nothing());
796 
797     if (!success) {
798         ::std::cerr << "Failed to open output file" << ::std::endl;
799         return;
800     }
801 
802 
803 	String<short> ambiStates;
804 	if(options.minMatchLen == 0)
805 		maskDuplicates(matches);
806 	if (options.outputFormat > 0
807 #ifdef RAZERS_DIRECT_MAQ_MAPPING
808 	 && !options.maqMapping
809 #endif
810 	)
811 	{
812 		// match statistics
813 		unsigned maxErrors = (int)(options.errorRate * maxReadLength);
814 		//if (maxErrors > 10) maxErrors = 10;
815 		resize(stats, maxErrors + 1);
816 		for (unsigned i = 0; i <= maxErrors; ++i)
817 			resize(stats[i], length(reads), 0);
818 		countMatches(matches, stats);
819 	}
820 
821 
822 
823 	Nothing nothing;
824 	unsigned currSeqNo = 0;
825 #ifdef RAZERS_DIRECT_MAQ_MAPPING
826 	if(options.maqMapping)
827 	{
828 		String<int> cooc;
829 		compactMatches(matches, stats, options, true, nothing, false); //only best two matches per read are kept
830 		countCoocurrences(matches,cooc,options);	//coocurrence statistics are filled
831 		assignMappingQuality(matches,reads,cooc,stats,options);//mapping qualities are assigned and only one match per read is kept
832 	}
833 	else
834 #endif
835 
836 #ifdef RAZERS_MICRO_RNA
837 	if(options.microRNA)purgeAmbiguousRnaMatches(matches,options);
838 	else
839 #endif
840 	{
841 		if(options.minMatchLen>0)
842 			compactAndCountSplicedMatches(matches, ambiStates, options, true);
843 		else
844 			compactMatches(matches, stats, options, true, nothing);
845 	}
846         int jj = 0;
847         if(!(options.minMatchLen > 0 ))
848 	  switch (options.sortOrder) {
849 		case 0:
850 			::std::sort(
851 				begin(matches, Standard()),
852 				end(matches, Standard()),
853 				LessRNoGPos<TMatch>());
854 			break;
855 
856 		case 1:
857 			::std::sort(
858 				begin(matches, Standard()),
859 				end(matches, Standard()),
860 				LessGPosRNo<TMatch>());
861 			break;
862 
863 	}
864 
865 	typename Iterator<TMatches, Standard>::Type	it = begin(matches, Standard());
866 	typename Iterator<TMatches, Standard>::Type	itEnd = end(matches, Standard());
867 
868 
869 	Dna5String gInf, gInfL, gInfR;
870 	char _sep_ = '\t';
871 	unsigned viewPosReadFirst = 0;
872 	unsigned viewPosReadLast  = 0;
873 
874 	switch (options.outputFormat)
875 	{
876 		case 0:	// Razer Format
877 //			_sep_ = ',';
878 			for(; it != itEnd; ++it)
879 			{
880 				unsigned	readLen = length(reads[(*it).rseqNo]);
881 				double		percId = 100.0 * (1.0 - (double)(*it).editDist / (double)readLen);
882 #ifdef RAZERS_MICRO_RNA
883 				percId = 100.0 * (1.0 - (double)(*it).editDist / (double) ((*it).mScore));
884 #endif
885 				switch (options.readNaming)
886 				{
887 					// 0..filename is the read's Fasta id
888 					case 0:
889 						file << readIDs[(*it).rseqNo];
890 						break;
891 
892 					// 1..filename is the read filename + seqNo
893 					case 1:
894 						file.fill('0');
895 						file << readName << '#' << ::std::setw(pzeros) << (*it).rseqNo + 1;
896 						break;
897 
898 					// 2..filename is the read sequence itself
899 					case 2:
900 						file << reads[(*it).rseqNo];
901 				}
902 
903 				file << _sep_ << options.positionFormat << _sep_ << readLen << _sep_ << (*it).orientation << _sep_;
904 
905 				switch (options.genomeNaming)
906 				{
907 					// 0..filename is the read's Fasta id
908 					case 0:
909 						file << genomeIDs[(*it).gseqNo];
910 						break;
911 
912 					// 1..filename is the read filename + seqNo
913 					case 1:
914 						file.fill('0');
915 						file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1;
916 				}
917 
918 				// get alignment to dump or to fix end coordinates
919 				if (options.dumpAlignment || !options.hammingOnly)
920 				{
921 #ifdef RAZERS_MICRO_RNA
922 					if(options.microRNA)
923 						assignSource(row(align, 0), prefix(reads[(*it).rseqNo],(*it).mScore));
924 					else
925 #endif
926 					assignSource(row(align, 0), reads[(*it).rseqNo]);
927 					assignSource(row(align, 1), infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd));
928 #ifdef RAZERS_MATEPAIRS
929 					if ((*it).pairId != 0 && ((*it).rseqNo & 1))
930 						reverseComplement(source(row(align, 0)));
931 #endif
932 					if ((*it).orientation == 'R')
933 						reverseComplement(source(row(align, 1)));
934 
935 					globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
936 
937 					// transform first and last read character to genomic positions
938 					viewPosReadFirst = toViewPosition(row(align, 0), 0);
939 					viewPosReadLast  = toViewPosition(row(align, 0), readLen - 1);
940 					unsigned genomePosReadFirst = toSourcePosition(row(align, 1), viewPosReadFirst);
941 					unsigned genomePosReadLast  = toSourcePosition(row(align, 1), viewPosReadLast);
942 
943 					if ((*it).orientation == 'R')
944 					{
945 						(*it).gBegin = (*it).gEnd - (genomePosReadLast + 1);
946 						(*it).gEnd -= genomePosReadFirst;
947 					}
948 					else
949 					{
950 						(*it).gEnd = (*it).gBegin + (genomePosReadLast + 1);
951 						(*it).gBegin += genomePosReadFirst;
952 					}
953 				}
954 
955 				file << _sep_ << ((*it).gBegin + options.positionFormat) << _sep_ << (*it).gEnd << _sep_ << ::std::setprecision(5) << percId;
956 #ifdef RAZERS_MICRO_RNA
957 				if(options.microRNA) file << _sep_ << (*it).mScore;
958 #endif
959 
960 #ifdef RAZERS_MATEPAIRS
961 				if ((*it).pairId != 0)
962 					file << _sep_ << (*it).pairId << _sep_ << (*it).pairScore << _sep_ << (*it).mateDelta;
963 #endif
964 				file << ::std::endl;
965 
966 				if (options.dumpAlignment)
967 					dumpAlignment(file, align, viewPosReadFirst, viewPosReadLast + 1);
968 			}
969 			break;
970 
971 
972 		case 1:	// Enhanced Fasta Format
973 			_sep_ = ',';
974 			for(unsigned matchReadNo = -1, matchReadCount = 0; it != itEnd; ++it)
975 			{
976 				unsigned	readLen = length(reads[(*it).rseqNo]);
977 				double		percId = 100.0 * (1.0 - (double)(*it).editDist / (double)readLen);
978 
979 				if (matchReadNo != (*it).rseqNo)
980 				{
981 					matchReadNo = (*it).rseqNo;
982 					matchReadCount = 0;
983 				} else
984 					++matchReadCount;
985 
986 				::std::string fastaID;
987 				assign(fastaID, readIDs[(*it).rseqNo]);
988 
989 				::std::string id = fastaID;
990 				int fragId = (*it).rseqNo;
991 				bool appendMatchId = options.maxHits > 1;
992 
993 				size_t left = fastaID.find_first_of('[');
994 				size_t right = fastaID.find_last_of(']');
995 				if (left != fastaID.npos && right != fastaID.npos && left < right)
996 				{
997 					fastaID.erase(right);
998 					fastaID.erase(0, left + 1);
999 					replace(fastaID.begin(), fastaID.end(), ',', ' ');
1000 					size_t pos = fastaID.find("id=");
1001 					if (pos != fastaID.npos) {
1002 						::std::istringstream iss(fastaID.substr(pos + 3));
1003 						iss >> id;
1004 //						appendMatchId = false;
1005 					}
1006 					pos = fastaID.find("fragId=");
1007 					if (pos != fastaID.npos) {
1008 						::std::istringstream iss(fastaID.substr(pos + 7));
1009 						iss >> fragId;
1010 					}
1011 				}
1012 
1013 				if ((*it).orientation == 'F')
1014 					// forward strand
1015 					file << '>' << ((*it).gBegin + options.positionFormat) << _sep_ << (*it).gEnd;
1016 				else
1017 					// reverse strand (switch begin and end)
1018 					file << '>' << (*it).gEnd << _sep_ << ((*it).gBegin + options.positionFormat);
1019 
1020 				unsigned ambig = 0;
1021 				for (unsigned i = 0; i <= (*it).editDist && i < length(stats); ++i)
1022 					ambig += stats[i][(*it).rseqNo];
1023 
1024 				file << "[id=" << id;
1025 				if (appendMatchId) file << "_" << matchReadCount;
1026 				file << ",fragId=" << fragId;
1027 				file << ",contigId=" << genomeIDs[(*it).gseqNo];
1028 				file << ",errors=" << (*it).editDist << ",percId=" << ::std::setprecision(5) << percId;
1029 				file << ",ambiguity=" << ambig << ']' << ::std::endl;
1030 
1031 				file << reads[(*it).rseqNo] << ::std::endl;
1032 			}
1033 			break;
1034 
1035 
1036 		case 2:	// Eland Format
1037 			_sep_ = '\t';
1038 			for(unsigned readNo = 0; readNo < length(reads); ++readNo)
1039 			{
1040 				switch (options.readNaming)
1041 				{
1042 					// 0..filename is the read's Fasta id
1043 					case 0:
1044 						file << '>' << readIDs[readNo] << _sep_;
1045 						break;
1046 
1047 					// 1..filename is the read filename + seqNo
1048 					case 1:
1049 						file.fill('0');
1050 						file << readName << '#' << ::std::setw(pzeros) << readNo + 1  << _sep_;
1051 						break;
1052 				}
1053 
1054 				if (it == itEnd || readNo < (*it).rseqNo)
1055 				{
1056 					if (!empty(reads[readNo]))
1057 						file << reads[readNo] << _sep_ << "NM" << _sep_ << '0' << _sep_ << '0' << _sep_ << '0' << ::std::endl;
1058 					else
1059 					{
1060 						for (unsigned i = 0; i < maxReadLength; ++i)
1061 							file << '.';
1062 						file << _sep_ << "QC" << _sep_ << '0' << _sep_ << '0' << _sep_ << '0' << ::std::endl;
1063 					}
1064 				} else
1065 				{
1066 					file << reads[readNo] << _sep_;
1067 					unsigned bestMatches = 1;
1068 					if ((unsigned)(*it).editDist < length(stats))
1069 						bestMatches = stats[(*it).editDist][readNo];
1070 
1071 					if (bestMatches == 0) file << '?';	// impossible
1072 					if (bestMatches == 1) file << 'U';	// unique best match
1073 					if (bestMatches >  1) file << 'R';	// non-unique best matches
1074 
1075 					file << (*it).editDist << _sep_ << stats[0][readNo] << _sep_ << stats[1][readNo] << _sep_ << stats[2][readNo];
1076 
1077 					if (bestMatches == 1)
1078 					{
1079 						file << _sep_;
1080 						switch (options.genomeNaming)
1081 						{
1082 							// 0..filename is the read's Fasta id
1083 							case 0:
1084 								file << genomeIDs[(*it).gseqNo];
1085 								break;
1086 
1087 							// 1..filename is the read filename + seqNo
1088 							case 1:
1089 								file.fill('0');
1090 								file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1;
1091 						}
1092 
1093 						if ((*it).orientation == 'F')
1094 							file << _sep_ << ((*it).gBegin + options.positionFormat) << _sep_ << 'F' << _sep_ << "..";
1095 						else
1096 							file << _sep_ << (*it).gEnd << _sep_ << 'R' << _sep_ << "..";
1097 
1098 						if ((*it).editDist > 0 && options.dumpAlignment && options.hammingOnly)
1099 						{
1100 							gInf = infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd);
1101 							if ((*it).orientation == 'R')
1102 								reverseComplement(gInf);
1103 							for (unsigned i = 0; i < length(gInf); ++i)
1104 								if ((options.compMask[ordValue(reads[readNo][i])] &
1105 									options.compMask[ordValue(gInf[i])]) == 0)
1106 									file << _sep_ << i + 1 << gInf[i];
1107 						}
1108 					}
1109 					file << ::std::endl;
1110 					++it;
1111 				}
1112 			}
1113 			break;
1114 		case 3: // Gff:  printf "$chr $name_$format read $pos %ld . $dir . ID=$col[0]$unique$rest\n",$pos+$len-1;
1115 			// NOTE(marehr): filecount+=2 might be a potential bug [https://github.com/seqan/seqan/issues/2165]
1116 			// In revision 4dbf27b55 and before, filecount was incremented twice at the
1117 			// end of the for loop, which caused a compiler warning (once in the body
1118 			// and once in the iteration_expression of the for loop). We kept this
1119 			// behaviour, because we have no active maintainer for this app.
1120 			for (unsigned filecount = 0; filecount < length(genomeFileNameList); filecount+=2)
1121 			{
1122 				// open genome file
1123 				SeqFileIn gFile;
1124 				if (!open(gFile, toCString(genomeFileNameList[filecount])))
1125 				{
1126 					std::cerr << "Couldn't open genome file." << std::endl;
1127 					break;
1128 				}
1129 
1130                 CharString currId;
1131 				Dna5String currGenome;
1132 
1133 				// iterate over genome sequences
1134 				for(; !atEnd(gFile); ++currSeqNo)
1135 				{
1136 					readRecord(currId, currGenome, gFile);			// read Fasta sequence
1137 					while(it != itEnd && (*it).gseqNo == currSeqNo)
1138 					{
1139 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1140 						if(options.maqMapping && options.noBelowIdentity && (*it).seedEditDist > maxSeedErrors)
1141 						{
1142 							++it;
1143 							continue;
1144 						}
1145 #endif
1146 
1147 						unsigned currReadNo = (*it).rseqNo;
1148 						int unique = 1;
1149 						unsigned bestMatches = 0;
1150 
1151 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1152 						if(options.maqMapping)
1153 							bestMatches = stats[0][currReadNo] >> 5;
1154 						else
1155 #endif
1156 						{
1157 							if (bestMatches == 0 && (unsigned)(*it).editDist < length(stats))
1158 							bestMatches = stats[(*it).editDist][currReadNo];
1159 
1160 						}
1161 
1162 						bool suboptimal = false;
1163 						if (
1164 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1165 							!options.maqMapping &&
1166 #endif
1167 							(unsigned)(*it).editDist > 0 )
1168 						{
1169 							for(unsigned d = 0; d < (unsigned)(*it).editDist; ++d)
1170 								if (stats[d][currReadNo]>0) suboptimal=true;
1171 						}
1172 
1173 						if (bestMatches !=  1)
1174 						{
1175 							unique = 0;
1176 							if(options.purgeAmbiguous)
1177 							{
1178 								++it;
1179 								continue;
1180 							}
1181 
1182 //							if((*it).mScore > 0) std::cout << (*it).mScore << "<-non uniq but score > 0\n";
1183 //							++it;
1184 //							continue; // TODO: output non-unique matches (concerns maq mapping only)
1185 						}
1186 						unsigned readLen = length(reads[currReadNo]);
1187 
1188 						String<Dna5Q> readInf = infix(reads[currReadNo],0,readLen);
1189 						switch (options.genomeNaming)
1190 						{
1191 							// 0..filename is the read's Fasta id
1192 							case 0:
1193 								file << genomeIDs[(*it).gseqNo] <<'\t';
1194 								break;
1195 							// 1..filename is the read filename + seqNo
1196 							case 1:
1197 								file.fill('0');
1198 								file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1 << '\t';
1199 								break;
1200 						}
1201 
1202 						// get alignment to dump or to fix end coordinates
1203 						if (!options.hammingOnly)
1204 						{
1205 							assignSource(row(align, 0), readInf);
1206 							//std::cout << "begin=" << (*it).gBegin <<" end="<< (*it).gEnd << std::endl;
1207 							assignSource(row(align, 1), infix(currGenome, (*it).gBegin, (*it).gEnd));
1208 							if ((*it).orientation == 'R')
1209 								reverseComplement(source(row(align, 1)));
1210 
1211 							globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
1212 
1213 							//std::cout << align << std::endl;
1214 							//dumpAlignment(::std::cout, align);
1215 
1216 							// transform first and last read character to genomic positions
1217 							viewPosReadFirst  = toViewPosition(row(align, 0), 0);
1218 							viewPosReadLast   = toViewPosition(row(align, 0), readLen - 1);
1219 							TGPos genomePosReadFirst = toSourcePosition(row(align, 1), viewPosReadFirst);
1220 							TGPos genomePosReadLast  = toSourcePosition(row(align, 1), viewPosReadLast);
1221 							if ((*it).orientation == 'R')
1222 							{
1223 								// watch out at chromosome borders
1224 								(*it).gBegin = (*it).gEnd - _min((TGPos)(genomePosReadLast + 1),(*it).gEnd);
1225 								(*it).gEnd -= genomePosReadFirst;
1226 							}
1227 							else
1228 							{
1229 								(*it).gEnd = (*it).gBegin + _min(genomePosReadLast + 1, static_cast<TGPos>(length(currGenome) - (*it).gBegin));
1230 								(*it).gBegin += genomePosReadFirst;
1231 							}
1232 
1233 							clear(row(align,1));
1234 							clear(row(align,0));
1235 							assignSource(row(align, 0), readInf);
1236 							//std::cout << "begin=" << (*it).gBegin <<" end="<< (*it).gEnd << std::endl;
1237 							assignSource(row(align, 1), infix(currGenome, (*it).gBegin, (*it).gEnd));
1238 							if ((*it).orientation == 'R')
1239 								reverseComplement(source(row(align, 1)));
1240 
1241 							globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
1242 
1243 							//std::cout << align << std::endl;
1244 							//dumpAlignment(::std::cout, align);
1245 
1246 							// transform first and last read character to genomic positions
1247 							viewPosReadFirst  = toViewPosition(row(align, 0), 0);
1248 							viewPosReadLast   = toViewPosition(row(align, 0), readLen - 1);
1249 
1250 
1251 						}
1252 
1253 						//file <<  options.runID << "_razers\tread";
1254 						file << "razers\tread";
1255 						file << '\t' << ((*it).gBegin + options.positionFormat) << '\t' << (*it).gEnd << '\t';
1256 						double percId = 100.0 * (1.0 - (double)(*it).editDist / (double)readLen);
1257 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1258 						if(options.maqMapping)
1259 							file << (*it).mScore << "\t";
1260 						else
1261 #endif
1262 							file << percId << "\t";
1263 
1264 						if ((*it).orientation == 'F')
1265 							file << '+' << '\t' << '.' <<'\t';
1266 						else
1267 							file << '-' << '\t' << '.' <<'\t';
1268 
1269 						switch (options.readNaming)
1270 						{
1271 							// 0..filename is the read's Fasta id
1272 							case 0:
1273 								file << "ID=" <<readIDs[currReadNo];
1274 								break;
1275 
1276 							// 1..filename is the read filename + seqNo
1277 							case 1:
1278 								file.fill('0');
1279 								file << "ID=" << readName << '#' << ::std::setw(pzeros) << currReadNo + 1;
1280 								break;
1281 						}
1282 						if(suboptimal) file << ";suboptimal";
1283 						else
1284 						{
1285 							if(unique==1) file << ";unique";
1286 							if(unique==0) file << ";multi";
1287 						}
1288 
1289 
1290 						if ((*it).editDist > 0)
1291 						{
1292 							if (options.hammingOnly)
1293 							{
1294 								gInf = infix(currGenome, (*it).gBegin, (*it).gEnd);
1295 								if ((*it).orientation == 'R')
1296 									reverseComplement(gInf);
1297 								bool first = true;
1298 								file << ";cigar=" << readLen << "M";
1299 								file << ";mutations=";
1300 								for (unsigned i = 0; i < length(gInf); ++i)
1301 									if ((options.compMask[ordValue(readInf[i])] &
1302 										options.compMask[ordValue(gInf[i])]) == 0)
1303 									{
1304 										if(first){ file << i + 1 << (Dna5)readInf[i]; first = false;}
1305 										else file <<','<< i + 1 << (Dna5)readInf[i];
1306 									}
1307 
1308 							}
1309 							else
1310 							{
1311 								std::stringstream cigar, mutations;
1312 								typedef typename Row<TAlign>::Type TRow;
1313 								typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
1314 								TAlignIterator ali_it0 = iter(row(align,0),viewPosReadFirst);
1315 								TAlignIterator ali_it1 = iter(row(align,1),viewPosReadFirst);
1316 								TAlignIterator ali_it0stop = iter(row(align,0),viewPosReadLast + 1);
1317 								TAlignIterator ali_it1stop = iter(row(align,1),viewPosReadLast + 1);
1318 
1319 								getCigarLine(align,cigar,mutations,ali_it0,ali_it0stop,ali_it1,ali_it1stop);
1320 								file << ";cigar="<<cigar.str();
1321 								if(length(mutations.str())>0)
1322 									file << ";mutations=" << mutations.str();
1323 							}
1324 						}
1325 
1326 						if(
1327 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1328 						options.maqMapping ||
1329 #endif
1330 						options.fastaIdQual || options.readsWithQualities)
1331 						{
1332 		//					file << ";read=";
1333 		//					for(unsigned j=0;j<length(reads[currReadNo]);++j)
1334 		//					{
1335 		//						file << (Dna5)reads[currReadNo][j];
1336 		//					}
1337 							file << ";quality=";
1338 							for(unsigned j=0;j<readLen;++j)
1339 							{
1340 								file << (char)(getQualityValue(readInf[j])+ 33);
1341 							}
1342 						}
1343 
1344 						file << ::std::endl;
1345 						if(options.dumpAlignment)
1346 						{
1347 							if((*it).editDist > 0 && !options.hammingOnly)
1348 								file << align;
1349 							else
1350 							{
1351 								clear(row(align,1));
1352 								clear(row(align,0));
1353 								assignSource(row(align, 0), readInf);
1354 								assignSource(row(align, 1), infix(currGenome, (*it).gBegin, (*it).gEnd));
1355 								if ((*it).orientation == 'R')
1356 									reverseComplement(source(row(align, 1)));
1357 								globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
1358 								file << align;
1359 
1360 							}
1361 						}
1362 						++it;
1363 					}
1364 				}
1365 				close(gFile);
1366 			}
1367 			break;
1368 		case 4: // SAM output for splitMatches
1369 			while(it != itEnd)// && (*it).gseqNo == currSeqNo)
1370 			{
1371 				unsigned currReadNo = (*it).rseqNo;
1372 				Dna5String &currGenome = genomes[(*it).gseqNo];
1373 				unsigned readLen = length(reads[currReadNo]);
1374 				TMatch& mL = *it;
1375 				++it;
1376 				TMatch& mR = *it;
1377 				unsigned readLenL = mL.mScore;
1378 				unsigned readLenR = mR.mScore;
1379 				String<Dna5Q> readInfL = infix(reads[currReadNo],0,readLenL);
1380 				String<Dna5Q> readInfR = infix(reads[currReadNo],length(reads[currReadNo])-readLenR,length(reads[currReadNo]));
1381 				int offsetL = 0;
1382 				int offsetR = readLen - readLenR;
1383 				if(mL.orientation == 'R')
1384 				{
1385 					offsetR = 0;
1386 					offsetL = readLen - readLenL;
1387 					readInfL = infix(reads[currReadNo],length(reads[currReadNo])-readLenL,length(reads[currReadNo]));
1388 					readInfR = infix(reads[currReadNo],0,readLenR);
1389 				}
1390 
1391 				// get alignment to dump and get cigar string
1392 				if (!options.hammingOnly)
1393 				{
1394 					assignSource(row(alignL, 0), readInfL);
1395 					assignSource(row(alignL, 1), infix(currGenome, mL.gBegin, mL.gEnd));
1396 					if (mL.orientation == 'R')
1397 						reverseComplement(source(row(alignL, 1)));
1398 
1399 					globalAlignment(alignL, scoreType, AlignConfig<false,false,false,false>(), Gotoh());
1400 
1401 					assignSource(row(alignR, 0), readInfR);
1402 					assignSource(row(alignR, 1), infix(currGenome, mR.gBegin, mR.gEnd));
1403 					if (mR.orientation == 'R')
1404 						reverseComplement(source(row(alignR, 1)));
1405 
1406 					globalAlignment(alignR, scoreType, AlignConfig<false,false,false,false>(), Gotoh());
1407 				}
1408 
1409 				switch (options.readNaming)
1410 				{
1411 					// 0..filename is the read's Fasta id
1412 					case 0:
1413 						file << "" <<readIDs[currReadNo];
1414 						break;
1415 					// 1..filename is the read filename + seqNo
1416 					case 1:
1417 						file.fill('0');
1418 						file << "" << readName << '#' << ::std::setw(pzeros) << currReadNo + 1;
1419 						break;
1420 				}
1421 				int samFlag = 0;
1422 				if(ambiStates[jj] == 2) samFlag |= 0x100; //suboptimal
1423 				if (mR.orientation == 'R') samFlag |= 0x10;
1424 				if(!empty(readRegions) && options.anchored)
1425 				{
1426 					samFlag |= 1;
1427 					if(readRegions[currReadNo].i2.i1 > 1) samFlag |= 0x10;
1428 					else samFlag |= 0x20;
1429 					if((readRegions[currReadNo].i2.i1 & 1) == 1) samFlag |= 0x80;
1430 					else samFlag |= 0x40;
1431 				}
1432 				file << '\t' << samFlag << '\t';
1433 
1434 				switch (options.genomeNaming)
1435 				{
1436 					// 0..filename is the read's Fasta id
1437 					case 0:
1438 						file << genomeIDs[(*it).gseqNo] <<'\t';
1439 						break;
1440 					// 1..filename is the read filename + seqNo
1441 					case 1:
1442 						file.fill('0');
1443 						file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1 << '\t';
1444 						break;
1445 				}
1446 
1447 				file << (mL.gBegin + 1) << '\t' ;
1448 
1449 				//double percId = 100.0 * (1.0 - (double)(mL.editDist + mR.editDist) /(double)(mL.mScore+mR.mScore));
1450 
1451 				if(ambiStates[jj] == 0) file << 255 << '\t'; //unique
1452 				if(ambiStates[jj] > 0) file << 0 << '\t'; //multi or suboptimal
1453 				++jj;
1454 
1455 				SEQAN_ASSERT(mL.pairId == mR.pairId);
1456 
1457 				int indelLen = mR.mScore + mL.mScore - readLen;
1458 				if(mL.gEnd != mR.gBegin) indelLen = mR.gBegin - mL.gEnd;
1459 
1460 				String<Pair<Dna5,int> > mutationsStr, mutStrMid, mutStrL, mutStrR;
1461 				String<Pair<char,int> > cigarStr, cigarMid, cigarL, cigarR;
1462 				if(indelLen < 0)
1463 				{
1464 					int offset = readLenL;
1465 					if(mL.orientation == 'R')
1466 						offset = readLenR;
1467 					String<Dna5Q> readInfMid = infix(reads[currReadNo],offset,offset - indelLen);
1468 					for (int i = 0; i < -indelLen; ++i)
1469 						appendValue(mutStrMid, Pair<Dna5,int>(readInfMid[i],i+offset+1));
1470 					appendValue(cigarMid, Pair<char,int>('I',-indelLen));
1471 
1472 				}
1473 				if(indelLen > 0) appendValue(cigarMid, Pair<char,int>('N',indelLen));
1474 
1475 				if (options.hammingOnly)
1476 				{
1477 					if(indelLen == 0)
1478 						appendValue(cigarL, Pair<char,int>('M',readLen));
1479 					else
1480 					{
1481 						appendValue(cigarL, Pair<char,int>('M',readLenL));
1482 						appendValue(cigarR, Pair<char,int>('M',readLenR));
1483 					}
1484 
1485 					//make left part of mutation string
1486 					if (mL.editDist > 0)
1487 					{
1488 						gInfL = infix(currGenome, mL.gBegin, mL.gEnd);
1489 						if (mL.orientation == 'R')
1490 							reverseComplement(gInfL);
1491 						for (unsigned i = 0; i < length(gInfL); ++i)
1492 							if ((options.compMask[ordValue(readInfL[i])] & options.compMask[ordValue(gInfL[i])]) == 0)
1493 								appendValue(mutStrL, Pair<Dna5,int>(readInfL[i],i+offsetL+1));
1494 					}
1495 
1496 					//make right part of mutation string
1497 					if (mR.editDist > 0)
1498 					{
1499 						gInfR = infix(currGenome, mR.gBegin, mR.gEnd);
1500 						if (mR.orientation == 'R')
1501 							reverseComplement(gInfR);
1502 						for (unsigned i = 0; i < length(gInfR); ++i)
1503 							if ((options.compMask[ordValue(readInfR[i])] & options.compMask[ordValue(gInfR[i])]) == 0)
1504 								appendValue(mutStrR, Pair<Dna5,int>(readInfR[i],i+offsetR+1));
1505 					}
1506 				}
1507 				else
1508 				{
1509 					typedef typename Row<TAlign>::Type TRow;
1510 					typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
1511 
1512 					//left business
1513 					TAlignIterator aliL_it0 = begin(row(alignL,0));
1514 					TAlignIterator aliL_it1 = begin(row(alignL,1));
1515 					TAlignIterator aliL_it0stop = end(row(alignL,0));
1516 					TAlignIterator aliL_it1stop = end(row(alignL,1));
1517 					getCigarLine(alignL,cigarL,mutStrL,offsetL,aliL_it0,aliL_it0stop,aliL_it1,aliL_it1stop);
1518 
1519 					//right business
1520 					TAlignIterator aliR_it0 = begin(row(alignR,0));
1521 					TAlignIterator aliR_it1 = begin(row(alignR,1));
1522 					TAlignIterator aliR_it0stop = end(row(alignR,0));
1523 					TAlignIterator aliR_it1stop = end(row(alignR,1));
1524 					getCigarLine(alignR,cigarR,mutStrR,offsetR,aliR_it0,aliR_it0stop,aliR_it1,aliR_it1stop);
1525 				}
1526 				//now plug together the parts: cigar
1527 				if (mL.orientation == 'F')
1528 				{
1529 					append(cigarStr,cigarL);
1530 					append(cigarStr,cigarMid);
1531 					append(cigarStr,cigarR);
1532 					append(mutationsStr,mutStrL);
1533 					append(mutationsStr,mutStrMid);
1534 					append(mutationsStr,mutStrR);
1535 
1536 					// print cigar string
1537 					for (unsigned i = 0; i < length(cigarStr); ++i)
1538 						file << cigarStr[i].i2 << cigarStr[i].i1;
1539 				}
1540 				else
1541 				{
1542 					append(cigarStr,cigarR);
1543 					append(cigarStr,cigarMid);
1544 					append(cigarStr,cigarL);
1545 					append(mutationsStr,mutStrR);
1546 					append(mutationsStr,mutStrMid);
1547 					append(mutationsStr,mutStrL);
1548 
1549 					// print cigar string
1550 					for (unsigned i = length(cigarStr); i != 0; )
1551 					{
1552 						--i;
1553 						file << cigarStr[i].i2 << cigarStr[i].i1;
1554 					}
1555 				}
1556 
1557 				file << '\t';
1558 
1559 				if(!empty(readRegions) && options.anchored)
1560 				{
1561 
1562 					switch (options.genomeNaming)
1563 					{
1564 						// 0..filename is the read's Fasta id
1565 						case 0:
1566 							file << genomeIDs[(*it).gseqNo] <<'\t';
1567 							break;
1568 						// 1..filename is the read filename + seqNo
1569 						case 1:
1570 							file.fill('0');
1571 							file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1 << '\t';
1572 							break;
1573 					}
1574 
1575 					file << readRegions[currReadNo].i2.i2 << '\t' ;
1576 					//int libraryError;
1577 					int templateLen = 0;
1578 					if(readRegions[currReadNo].i2.i1 < 2)
1579 						templateLen = readRegions[currReadNo].i2.i2 + readLen -(int) mL.gBegin;
1580 						//libraryError = (int) mR.gEnd + readRegions[currReadNo].i2.i2 - options.libraryLength + 2*readLen;
1581 					else
1582 						templateLen = (int) mR.gEnd - readRegions[currReadNo].i2.i2;
1583 						//libraryError = (int) mL.gBegin - readRegions[currReadNo].i2.i2 + options.libraryLength - readLen ;
1584 					file << templateLen << '\t';
1585 				}
1586 				else file << "*\t*\t*\t";
1587 
1588                 if (mL.orientation == 'F')
1589 				    file << reads[currReadNo] << "\t*\t";
1590                 else
1591 				    file << reverseComplementString(reads[currReadNo]) << "\t*\t";
1592 				// quality
1593 				if(options.fastaIdQual)
1594 				{
1595 					for(unsigned j=0;j<length(reads[currReadNo]);++j)
1596 					{
1597 						file << (char)(getQualityValue(reads[currReadNo][j])+ 33);
1598 					}
1599 				}
1600 				file << "AS:i:" << mR.pairScore;
1601 
1602 				file << ::std::endl;
1603 				++it;
1604 			}
1605 			break;
1606 		case 33: // special Gff for split reads
1607 			while(it != itEnd)// && (*it).gseqNo == currSeqNo)
1608 			{
1609 				unsigned currReadNo = (*it).rseqNo;
1610 				Dna5String &currGenome = genomes[(*it).gseqNo];
1611 				unsigned readLen = length(reads[currReadNo]);
1612 				TMatch& mL = *it;
1613 				++it;
1614 				TMatch& mR = *it;
1615 				unsigned readLenL = mL.mScore;
1616 				unsigned readLenR = mR.mScore;
1617 				SEQAN_ASSERT_LEQ(mL.mScore+mR.mScore,(int)readLen);
1618 				String<Dna5Q> readInfL = infix(reads[currReadNo],0,readLenL);
1619 				String<Dna5Q> readInfR = infix(reads[currReadNo],length(reads[currReadNo])-readLenR,length(reads[currReadNo]));
1620 				int offsetL = 0;
1621 				int offsetR = readLen - readLenR;
1622 				if(mL.orientation == 'R')
1623 				{
1624 					offsetR = 0;
1625 					offsetL = readLen - readLenL;
1626 					readInfL = infix(reads[currReadNo],length(reads[currReadNo])-readLenL,length(reads[currReadNo]));
1627 					readInfR = infix(reads[currReadNo],0,readLenR);
1628 				}
1629 
1630 				switch (options.genomeNaming)
1631 				{
1632 					// 0..filename is the read's Fasta id
1633 					case 0:
1634 						file << genomeIDs[(*it).gseqNo] <<'\t';
1635 						break;
1636 					// 1..filename is the read filename + seqNo
1637 					case 1:
1638 						file.fill('0');
1639 						file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1 << '\t';
1640 						break;
1641 				}
1642 
1643 				// get alignment to dump and get cigar string
1644 				if (!options.hammingOnly)
1645 				{
1646 					assignSource(row(alignL, 0), readInfL);
1647 					assignSource(row(alignL, 1), infix(currGenome, mL.gBegin, mL.gEnd));
1648 					if (mL.orientation == 'R')
1649 						reverseComplement(source(row(alignL, 1)));
1650 
1651 					globalAlignment(alignL, scoreType, AlignConfig<false,false,false,false>(), Gotoh());
1652 
1653 					assignSource(row(alignR, 0), readInfR);
1654 					assignSource(row(alignR, 1), infix(currGenome, mR.gBegin, mR.gEnd));
1655 					if (mR.orientation == 'R')
1656 						reverseComplement(source(row(alignR, 1)));
1657 
1658 					globalAlignment(alignR, scoreType, AlignConfig<false,false,false,false>(), Gotoh());
1659 				}
1660 
1661 				//file <<  options.runID << "_razers\tread";
1662 				file << "razers\tread";
1663 				file << '\t' << (mL.gBegin + options.positionFormat) << '\t' << mR.gEnd << '\t';
1664 //?//			double percId = 100.0 * (1.0 + (double)(mL.pairScore-mL.mScore-mR.mScore) / (double)readLen);
1665 				double percId = 100.0 * (1.0 - (double)(mL.editDist + mR.editDist) /(double)(mL.mScore+mR.mScore));
1666 				file << percId << "\t";
1667 //				if(!empty(readRegions) && options.anchored)
1668 //				{
1669 //					if(readRegions[currReadNo].i2 < 0)
1670 //						file << '-' << '\t' << '.' <<'\t';
1671 //					else
1672 //						file << '+' << '\t' << '.' <<'\t';
1673 //				}
1674 //				else
1675 				{
1676 					if (mL.orientation == 'F')
1677 						file << '+' << '\t' << '.' <<'\t';
1678 					else
1679 						file << '-' << '\t' << '.' <<'\t';
1680 				}
1681 
1682 				switch (options.readNaming)
1683 				{
1684 					// 0..filename is the read's Fasta id
1685 					case 0:
1686 						file << "ID=" <<readIDs[currReadNo];
1687 						break;
1688 					// 1..filename is the read filename + seqNo
1689 					case 1:
1690 						file.fill('0');
1691 						file << "ID=" << readName << '#' << ::std::setw(pzeros) << currReadNo + 1;
1692 						break;
1693 				}
1694 				if(ambiStates[jj] == 0) file << ";unique";
1695 				if(ambiStates[jj] == 1) file << ";multi";
1696 				if(ambiStates[jj] == 2) file << ";suboptimal";
1697 				++jj;
1698 
1699 				SEQAN_ASSERT(mL.pairId == mR.pairId);
1700 
1701 				int indelLen = mR.mScore + mL.mScore - readLen;
1702 				if(mL.gEnd != mR.gBegin) indelLen = mR.gBegin - mL.gEnd;
1703 
1704 				file << ";pairScore=" << (unsigned int) mR.pairScore;
1705 				if(!empty(readRegions) && options.anchored)
1706 				{
1707 					if(readRegions[currReadNo].i2.i1 < 2)
1708 						file << ";libraryError=" << (int) readRegions[currReadNo].i2.i2 - mR.gEnd - (int) options.libraryLength + 2*readLen;
1709                     else {
1710                         int tmp = (int)mL.gBegin + readLen - (int)options.libraryLength - readRegions[currReadNo].i2.i2;
1711 						file << ";libraryError=" <<  tmp ;
1712                     }
1713 
1714 				}
1715 
1716                 if(mL.gBegin > mR.gBegin) std::cout << "falsch\n";
1717 //				if(mR.traceExtension >= abs(indelLen))
1718 //					file << ";traceExt=" << (unsigned int) mR.traceExtension;
1719 
1720 				String<Pair<Dna5,int> > mutationsStr, mutStrMid, mutStrL, mutStrR;
1721 				String<Pair<char,int> > cigarStr, cigarMid, cigarL, cigarR;
1722 				if(indelLen < 0)
1723 				{
1724 					int offset = readLenL;
1725 					if(mL.orientation == 'R')
1726 						offset = readLenR;
1727 					String<Dna5Q> readInfMid = infix(reads[currReadNo],offset,offset - indelLen);
1728 					for (int i = 0; i < -indelLen; ++i)
1729 						appendValue(mutStrMid, Pair<Dna5,int>(readInfMid[i],i+offset+1));
1730 					appendValue(cigarMid, Pair<char,int>('I',-indelLen));
1731 
1732 				}
1733 				if(indelLen > 0) appendValue(cigarMid, Pair<char,int>('D',indelLen));
1734 				if(indelLen != 0)
1735 					file << ";split";
1736 
1737 				if (options.hammingOnly)
1738 				{
1739 					if(indelLen == 0)
1740 						appendValue(cigarL, Pair<char,int>('M',readLen));
1741 					else
1742 					{
1743 						appendValue(cigarL, Pair<char,int>('M',readLenL));
1744 						appendValue(cigarR, Pair<char,int>('M',readLenR));
1745 					}
1746 
1747 					//make left part of mutation string
1748 					if (mL.editDist > 0)
1749 					{
1750 						gInfL = infix(currGenome, mL.gBegin, mL.gEnd);
1751 						if (mL.orientation == 'R')
1752 							reverseComplement(gInfL);
1753 						for (unsigned i = 0; i < length(gInfL); ++i)
1754 							if ((options.compMask[ordValue(readInfL[i])] & options.compMask[ordValue(gInfL[i])]) == 0)
1755 								appendValue(mutStrL, Pair<Dna5,int>(readInfL[i],i+offsetL+1));
1756 					}
1757 
1758 					//make right part of mutation string
1759 					if (mR.editDist > 0)
1760 					{
1761 						gInfR = infix(currGenome, mR.gBegin, mR.gEnd);
1762 						if (mR.orientation == 'R')
1763 							reverseComplement(gInfR);
1764 						for (unsigned i = 0; i < length(gInfR); ++i)
1765 							if ((options.compMask[ordValue(readInfR[i])] & options.compMask[ordValue(gInfR[i])]) == 0)
1766 								appendValue(mutStrR, Pair<Dna5,int>(readInfR[i],i+offsetR+1));
1767 					}
1768 				}
1769 				else
1770 				{
1771 					typedef typename Row<TAlign>::Type TRow;
1772 					typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
1773 
1774 					//left business
1775 					TAlignIterator aliL_it0 = begin(row(alignL,0));
1776 					TAlignIterator aliL_it1 = begin(row(alignL,1));
1777 					TAlignIterator aliL_it0stop = end(row(alignL,0));
1778 					TAlignIterator aliL_it1stop = end(row(alignL,1));
1779 					getCigarLine(alignL,cigarL,mutStrL,offsetL,aliL_it0,aliL_it0stop,aliL_it1,aliL_it1stop);
1780 
1781 					//right business
1782 					TAlignIterator aliR_it0 = begin(row(alignR,0));
1783 					TAlignIterator aliR_it1 = begin(row(alignR,1));
1784 					TAlignIterator aliR_it0stop = end(row(alignR,0));
1785 					TAlignIterator aliR_it1stop = end(row(alignR,1));
1786 					getCigarLine(alignR,cigarR,mutStrR,offsetR,aliR_it0,aliR_it0stop,aliR_it1,aliR_it1stop);
1787 				}
1788 				//now plug together the parts: cigar
1789 				if(mL.orientation=='F')
1790 				{
1791 					append(cigarStr,cigarL);
1792 					append(cigarStr,cigarMid);
1793 					append(cigarStr,cigarR);
1794 					append(mutationsStr,mutStrL);
1795 					append(mutationsStr,mutStrMid);
1796 					append(mutationsStr,mutStrR);
1797 				}
1798 				else
1799 				{
1800 					append(cigarStr,cigarR);
1801 					append(cigarStr,cigarMid);
1802 					append(cigarStr,cigarL);
1803 					append(mutationsStr,mutStrR);
1804 					append(mutationsStr,mutStrMid);
1805 					append(mutationsStr,mutStrL);
1806 				}
1807 				bool first = true;
1808 				file << ";cigar=";// << cigarStr.str();
1809 				for (unsigned i = 0; i < length(cigarStr); ++i)
1810 					file << cigarStr[i].i2 << cigarStr[i].i1;
1811 
1812 				if(length(mutationsStr)>0)file << ";mutations=";
1813 				for (unsigned i = 0; i < length(mutationsStr); ++i)
1814 				{
1815 					if(first)
1816 					{
1817 						first = false;
1818 						file << mutationsStr[i].i2 << mutationsStr[i].i1;
1819 					}
1820 					else
1821 						file << "," << mutationsStr[i].i2 << mutationsStr[i].i1;
1822 				}
1823 
1824 				if(options.fastaIdQual)
1825 				{
1826 					file << ";quality=";
1827 					for(unsigned j=0;j<length(reads[currReadNo]);++j)
1828 					{
1829 						file << (char)(getQualityValue(reads[currReadNo][j])+ 33);
1830 					}
1831 				}
1832 				file << ::std::endl;
1833 				++it;
1834 			}
1835 			break;
1836 
1837 
1838 	}
1839 
1840 	close(file);
1841 
1842 	// get empirical error distribution
1843 	if (!empty(errorPrbFileName) && maxReadLength > 0)
1844 	{
1845         std::ofstream file;
1846 		file.open(toCString(errorPrbFileName), ::std::ios_base::out | ::std::ios_base::trunc);
1847 		if (file.is_open())
1848 		{
1849 			String<long double> posError;
1850 			unsigned unique = 0;
1851 			unsigned insertions = 0;
1852 			unsigned deletions = 0;
1853 			resize(posError, maxReadLength, 0);
1854 
1855 			if (options.hammingOnly)
1856 				unique = getErrorDistribution(posError, matches, reads, genomes, options);
1857 			else
1858 			{
1859 				unique = getErrorDistribution(posError, insertions, deletions, matches, reads, genomes, options);
1860 				::std::cerr << "insertProb: " << (double)insertions / ((double)length(posError) * (double)unique) << ::std::endl;
1861 				::std::cerr << "deleteProb: " << (double)deletions / ((double)length(posError) * (double)unique) << ::std::endl;
1862 			}
1863 
1864 			file << (double)posError[0] / (double)unique;
1865 			for (unsigned i = 1; i < length(posError); ++i)
1866 				file << '\t' << (double)posError[i] / (double)unique;
1867 			file << ::std::endl;
1868 			file.close();
1869 		} else
1870 			::std::cerr << "Failed to open error distribution file" << ::std::endl;
1871 	}
1872 
1873 	options.timeDumpResults = SEQAN_PROTIMEDIFF(dump_time);
1874 
1875 	if (options._debugLevel >= 1)
1876 		::std::cerr << "Dumping results took             \t" << options.timeDumpResults << " seconds" << ::std::endl;
1877 }
1878 
1879 
1880 }
1881 
1882 #endif
1883 
1884