1  /*==========================================================================
2              RazerS - Fast Read Mapping with Controlled Loss Rate
3                    http://www.seqan.de/projects/razers.html
4 
5  ============================================================================
6   Copyright (C) 2008 by David Weese
7 
8   This program is free software; you can redistribute it and/or
9   modify it under the terms of the GNU Lesser General Public
10   License as published by the Free Software Foundation; either
11   version 3 of the License, or (at your options) any later version.
12 
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17 
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  ==========================================================================*/
21 
22 #ifndef SEQAN_HEADER_OUTPUT_FORMAT_H
23 #define SEQAN_HEADER_OUTPUT_FORMAT_H
24 
25 #include <iostream>
26 #include <fstream>
27 #include <sstream>
28 
29 #include "razers.h"
30 #include <seqan/align.h>
31 
32 namespace SEQAN_NAMESPACE_MAIN
33 {
34 
35 //////////////////////////////////////////////////////////////////////////////
36 // Quality-based score
37 
38 	template <typename TQualityString = CharString>
39 	struct Quality;
40 
41 	template <typename TValue, typename TQualityString>
42 	class Score<TValue, Quality<TQualityString> >
43 	{
44 	public:
45 		TValue data_match;
46 		TValue data_mismatch;
47 		TValue data_gap_extend;
48 		TValue data_gap_open;
49 
50 		TQualityString const *data_qual;
51 
52 	public:
Score()53 		Score():
54 			data_match(0),
55 			data_mismatch(-1),
56 			data_gap_extend(-1),
57 			data_gap_open(-1),
58 			data_qual(NULL)
59 		{
60 		}
Score(TValue _match,TValue _mismatch,TValue _gap)61 		Score(TValue _match, TValue _mismatch, TValue _gap):
62 			data_match(_match),
63 			data_mismatch(_mismatch),
64 			data_gap_extend(_gap),
65 			data_gap_open(_gap),
66 			data_qual(NULL)
67 		{
68 		}
Score(TValue _match,TValue _mismatch,TValue _gap_extend,TValue _gap_open,TQualityString const & _qual)69 		Score(TValue _match, TValue _mismatch, TValue _gap_extend, TValue _gap_open, TQualityString const &_qual):
70 			data_match(_match),
71 			data_mismatch(_mismatch),
72 			data_gap_extend(_gap_extend),
73 			data_gap_open(_gap_open),
74 			data_qual(&_qual)
75 		{
76 		}
77 
Score(Score const & other)78 		Score(Score const & other):
79 			data_match(other.data_match),
80 			data_mismatch(other.data_mismatch),
81 			data_gap_extend(other.data_gap_extend),
82 			data_gap_open(other.data_gap_open),
83 			data_qual(other.data_qual)
84 		{
85 		}
~Score()86 		~Score()
87 		{
88 		}
89 
90 		Score & operator = (Score const & other)
91 		{
92 			data_match = other.data_match;
93 			data_mismatch = other.data_mismatch;
94 			data_gap_extend = other.data_gap_extend;
95 			data_gap_open = other.data_gap_open;
96 			data_qual = other.data_qual;
97 			return *this;
98 		}
99 	};
100 
101 //////////////////////////////////////////////////////////////////////////////
102 
103 template <typename TValue, typename TQualityString, typename TPos1, typename TPos2, typename TSeq1, typename TSeq2>
104 inline TValue
score(Score<TValue,Quality<TQualityString>> const & me,TPos1 pos1,TPos2 pos2,TSeq1 const & seq1,TSeq2 const & seq2)105 score(Score<TValue, Quality<TQualityString> > const & me,
106 	  TPos1 pos1,
107 	  TPos2 pos2,
108 	  TSeq1 const &seq1,
109 	  TSeq2 const &seq2)
110 {
111 	if (seq1[pos1] != seq2[pos2])
112 		if (me.data_qual)
113 			return (*me.data_qual)[pos2];
114 		else
115 			return scoreMismatch(me);
116 	else
117 		return scoreMatch(me);
118 }
119 
120 
121 //////////////////////////////////////////////////////////////////////////////
122 // Less-operators ...
123 
124 	// ... to sort matches and remove duplicates with equal gBegin
125 	template <typename TReadMatch>
126 	struct LessGPosRNo : public ::std::binary_function < TReadMatch, TReadMatch, bool >
127 	{
operatorLessGPosRNo128 		inline bool operator() (TReadMatch const &a, TReadMatch const &b) const
129 		{
130 			// genome position and orientation
131 			if (a.gseqNo < b.gseqNo) return true;
132 			if (a.gseqNo > b.gseqNo) return false;
133 			if (a.gBegin < b.gBegin) return true;
134 			if (a.gBegin > b.gBegin) return false;
135 			if (a.orientation < b.orientation) return true;
136 			if (a.orientation > b.orientation) return false;
137 
138 			// read number
139 			if (a.rseqNo < b.rseqNo) return true;
140 			if (a.rseqNo > b.rseqNo) return false;
141 
142 			// quality
143 			return a.editDist < b.editDist;
144 		}
145 	};
146 
147 //////////////////////////////////////////////////////////////////////////////
148 // Determine error distribution
149 template <typename TErrDistr, typename TMatches, typename TReads, typename TGenomes, typename TOptions>
150 inline unsigned
getErrorDistribution(TErrDistr & posError,TMatches & matches,TReads & reads,TGenomes & genomes,TOptions & options)151 getErrorDistribution(
152 	TErrDistr &posError,
153 	TMatches &matches,
154 	TReads &reads,
155 	TGenomes &genomes,
156 	TOptions &options)
157 {
158 	typename Iterator<TMatches, Standard>::Type	it = begin(matches, Standard());
159 	typename Iterator<TMatches, Standard>::Type	itEnd = end(matches, Standard());
160 
161 	Dna5String genome;
162 	unsigned unique = 0;
163 	for (; it != itEnd; ++it)
164 	{
165 		if ((*it).orientation == '-') continue;
166 
167 		Dna5String const &read = reads[(*it).rseqNo];
168 		genome = infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd);
169 		if ((*it).orientation == 'R')
170 			reverseComplement(genome);
171 		for (unsigned i = 0; i < length(posError) && i < length(read); ++i)
172 			if ((options.compMask[ordValue(genome[i])] & options.compMask[ordValue(read[i])]) == 0)
173 				++posError[i];
174 		++unique;
175 	}
176 	return unique;
177 }
178 
179 template <typename TErrDistr, typename TCount1, typename TCount2, typename TMatches, typename TReads, typename TGenomes, typename TSpec>
180 inline unsigned
getErrorDistribution(TErrDistr & posError,TCount1 & insertions,TCount2 & deletions,TMatches & matches,TReads & reads,TGenomes & genomes,RazerSOptions<TSpec> & options)181 getErrorDistribution(
182 	TErrDistr &posError,
183 	TCount1 &insertions,
184 	TCount2 &deletions,
185 	TMatches &matches,
186 	TReads &reads,
187 	TGenomes &genomes,
188 	RazerSOptions<TSpec> &options)
189 {
190 	typedef Align<String<Dna5>, ArrayGaps> TAlign;
191 	typedef typename Row<TAlign>::Type TRow;
192 	typedef typename Iterator<TRow>::Type TIter;
193 
194 	typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
195 	typedef typename Position<TAlign>::Type TPosition;
196 
197 	typename Iterator<TMatches, Standard>::Type	it = begin(matches, Standard());
198 	typename Iterator<TMatches, Standard>::Type	itEnd = end(matches, Standard());
199 
200 	Align<Dna5String, ArrayGaps> align;
201 	Score<int> scoreType = Score<int>(0, -999, -1001, -1000);	// levenshtein-score (match, mismatch, gapOpen, gapExtend)
202 	if (options.hammingOnly)
203 		scoreType.data_mismatch = -1;
204 	resize(rows(align), 2);
205 
206 	unsigned unique = 0;
207 	for (; it != itEnd; ++it)
208 	{
209 		if ((*it).orientation == '-') continue;
210 
211 		assignSource(row(align, 0), reads[(*it).rseqNo]);
212 		assignSource(row(align, 1), infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd));
213 		if ((*it).orientation == 'R')
214 			reverseComplement(source(row(align, 1)));
215 		globalAlignment(align, scoreType);
216 
217 		TRow& row0 = row(align, 0);
218 		TRow& row1 = row(align, 1);
219 
220 		TPosition begin = beginPosition(cols(align));
221 		TPosition end = endPosition(cols(align));
222 
223 		TIter it0 = iter(row0, begin);
224 		TIter it1 = iter(row1, begin);
225 		TIter end0 = iter(row0, end);
226 
227 		unsigned pos = 0;
228 		for (; it0 != end0 && pos < length(posError); ++it0, ++it1)
229 		{
230 			if (isGap(it0))
231 				++insertions;
232 			else
233 			{
234 				if (isGap(it1))
235 					++deletions;
236 				else
237 					if ((options.compMask[ordValue(getValue(it0))] & options.compMask[ordValue(getValue(it1))]) == 0)
238 						++posError[pos];
239 				++pos;
240 			}
241 		}
242 		++unique;
243 	}
244 	return unique;
245 }
246 
247 
248 //////////////////////////////////////////////////////////////////////////////
249 template <typename TFile, typename TSource, typename TSpec, typename TPosition>
250 inline void
dumpAlignment(TFile & target,Align<TSource,TSpec> const & source,TPosition begin_,TPosition end_)251 dumpAlignment(TFile & target, Align<TSource, TSpec> const & source, TPosition begin_, TPosition end_)
252 {
253 	typedef Align<TSource, TSpec> const TAlign;
254 	typedef typename Row<TAlign>::Type TRow;
255 	typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
256 
257 	TRowsPosition row_count = length(rows(source));
258 
259 	// Print sequences
260 	for (TRowsPosition i = 0; i < row_count; ++i)
261 	{
262 		if (i == 0)
263 			_streamWrite(target, "#Read:   ");
264 		else
265 			_streamWrite(target, "#Genome: ");
266 		TRow& row_ = row(source, i);
267 		typedef typename Iterator<typename Row<TAlign>::Type const>::Type TIter;
268 		TIter begin1_ = iter(row_, begin_);
269 		TIter end1_ = iter(row_, end_);
270 		for (; begin1_ != end1_; ++begin1_) {
271 			if (isGap(begin1_)) _streamPut(target, gapValue<char>());
272 			else _streamPut(target, *begin1_);
273 		}
274 		_streamPut(target, '\n');
275 	}
276 }
277 
278 template<typename TMatches, typename TCounts, typename TOptions>
279 void
countCoocurrences(TMatches & matches,TCounts & cooc,TOptions & options)280 countCoocurrences(TMatches & matches, TCounts & cooc, TOptions & options)
281 {
282 	clear(cooc);
283 	int maxSeedErrors = (int)(options.errorRate * options.artSeedLength) + 1;
284 	resize(cooc,maxSeedErrors+1,0);
285 	for (int i = 0; i < maxSeedErrors+1; ++i)
286 		cooc[i] = 1;
287 
288 	int count = 0;
289 	unsigned readNo = -1;
290 	int preEditDist = -1;
291 	typename Iterator<TMatches>::Type it = begin(matches,Standard());
292 	typename Iterator<TMatches>::Type itEnd = end(matches,Standard());
293 
294 	for(; it != itEnd; ++it)
295 	{
296 		if ((*it).rseqNo == readNo)
297 		{
298 			if(preEditDist > 1) continue;// || dist > options.errorRate * maxReadLength + 1) continue;
299 			int dist = (*it).seedEditDist - preEditDist;
300 			if(dist > maxSeedErrors) continue;
301 			if(dist < 0) ++cooc[0];
302 			else ++cooc[dist];
303 		}
304 		else
305 		{
306 			readNo = (*it).rseqNo;
307 			preEditDist = (*it).seedEditDist;
308 			if(preEditDist <= 1) ++count;
309 		}
310 	}
311 	for (unsigned i = 0; i < length(cooc); ++i)
312 	{
313 		cooc[i] = (int)(-4.343 * log((double)cooc[i]/count) );
314 		if (cooc[i] < 0) cooc[i] = 0;
315 	}
316 	if(options._debugLevel > 1)
317 	{
318 		::std::cerr << "[mapping_count] ";
319 		for(unsigned j = 0; j < length(cooc); ++j)
320 			::std::cerr << cooc[j] << " ";
321 		::std::cerr << ::std::endl;
322 	}
323 
324 }
325 
326 
327 template<typename TAlign, typename TString, typename TIter>
328 void
getCigarLine(TAlign & align,TString & cigar,TString & mutations,TIter ali_it0,TIter ali_it0_stop,TIter ali_it1,TIter ali_it1_stop)329 getCigarLine(TAlign & align, TString & cigar, TString & mutations, TIter ali_it0, TIter ali_it0_stop, TIter ali_it1, TIter ali_it1_stop)
330 {
331 
332 	typedef typename Source<TAlign>::Type TSource;
333 	typedef typename Iterator<TSource, Rooted>::Type TStringIterator;
334 
335 // 	typedef typename Row<TAlign>::Type TRow;
336 // 	typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
337 
338 	TStringIterator readBase = begin(source(row(align,0)));
339 
340 	int readPos = 0;
341 	bool first = true;
342 	int inserted = 0;
343 	int deleted = 0;
344 	while(ali_it0 != ali_it0_stop && ali_it1 != ali_it1_stop)
345 	{
346 		inserted = 0;
347 		deleted = 0;
348 		int matched = 0;
349 		while(ali_it0!=ali_it0_stop && ali_it1!=ali_it1_stop && !isGap(ali_it0)&& !isGap(ali_it1))
350 		{
351 			++readPos;
352 			if(*ali_it1 != *ali_it0)
353 			{
354 				if(first) first = false;
355 				else mutations << ",";
356 				mutations << readPos <<*readBase;
357 			}
358 			++readBase;
359 			++ali_it0;
360 			++ali_it1;
361 			++matched;
362 		}
363 		if(matched>0) cigar << matched<< "M" ;
364 		while(ali_it0!=ali_it0_stop && isGap(ali_it0))
365 		{
366 			++ali_it0;
367 			++ali_it1;
368 			++deleted;
369 		}
370 		if(deleted>0) cigar << deleted << "D";
371 		while(isGap(ali_it1)&& ali_it1!=ali_it1_stop)
372 		{
373 			++ali_it0;
374 			++ali_it1;
375 			++readPos;
376 			if(first) first = false;
377 			else mutations << ",";
378 			mutations << readPos << *readBase;
379 			++readBase;
380 			++inserted;
381 		}
382 		if(inserted>0) cigar << inserted << "I";
383 	}
384 	// end gaps can happen in split mapping
385 	while(ali_it0!=ali_it0_stop)
386 	{
387 		++ali_it0;
388 		++deleted;
389 	}
390 	if(deleted>0) cigar << deleted << "D";
391 	while(ali_it1 != ali_it1_stop)
392 	{
393 		++ali_it1;
394 		++readPos;
395 		if(first) first = false;
396 		else mutations << ",";
397 		mutations << readPos << *readBase;
398 		++inserted;
399 	}
400 	if(inserted>0) cigar << inserted << "I";
401 
402 }
403 
404 template<typename TAlign, typename TIter>
405 void
getCigarLine(TAlign & align,String<Pair<char,int>> & cigar,String<Pair<Dna5,int>> & mutations,int offset,TIter ali_it0,TIter ali_it0_stop,TIter ali_it1,TIter ali_it1_stop)406 getCigarLine(TAlign & align,
407 		String<Pair<char,int> > & cigar,
408 		String<Pair<Dna5,int> > & mutations,
409 		int offset,
410 		TIter ali_it0, TIter ali_it0_stop, TIter ali_it1, TIter ali_it1_stop)
411 {
412 
413 	typedef typename Source<TAlign>::Type TSource;
414 	typedef typename Iterator<TSource, Rooted>::Type TStringIterator;
415 
416 // 	typedef typename Row<TAlign>::Type TRow;
417 // 	typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
418 
419 	TStringIterator readBase = begin(source(row(align,0)));
420 
421 	int readPos = 0;
422 	int inserted = 0;
423 	int deleted = 0;
424  	while(ali_it0 != ali_it0_stop && ali_it1 != ali_it1_stop)
425 	{
426 		inserted = 0;
427 		deleted = 0;
428 		int matched = 0;
429 		while(ali_it0!=ali_it0_stop && ali_it1!=ali_it1_stop && !isGap(ali_it0)&& !isGap(ali_it1))
430 		{
431 			++readPos;
432 			if(*ali_it1 != *ali_it0)
433 				appendValue(mutations,Pair<Dna5,int>((Dna5)(*readBase),readPos + offset));
434 			++readBase;
435 			++ali_it0;
436 			++ali_it1;
437 			++matched;
438 		}
439 		if(matched>0) appendValue(cigar,Pair<char,int>('M',matched));
440 		while(ali_it0!=ali_it0_stop && isGap(ali_it0))
441 		{
442 			++ali_it0;
443 			++ali_it1;
444 			++deleted;
445 		}
446 		if(deleted>0) appendValue(cigar,Pair<char,int>('D',deleted));
447 		while(isGap(ali_it1)&& ali_it1!=ali_it1_stop)
448 		{
449 			++ali_it0;
450 			++ali_it1;
451 			++readPos;
452 			appendValue(mutations,Pair<Dna5,int>((Dna5)(*readBase),readPos + offset));
453 			++readBase;
454 			++inserted;
455 		}
456 		if(inserted>0) appendValue(cigar,Pair<char,int>('I',inserted));
457 	}
458 	// end gaps can happen in split mapping
459 	while(ali_it0!=ali_it0_stop)
460 	{
461 		++ali_it0;
462 		++deleted;
463 	}
464 	if(deleted>0) appendValue(cigar,Pair<char,int>('D',deleted));
465 	while(ali_it1 != ali_it1_stop)
466 	{
467 		++ali_it1;
468 		++readPos;
469 		appendValue(mutations,Pair<Dna5,int>((Dna5)(*readBase),readPos + offset));
470 		++inserted;
471 	}
472 	if(inserted>0) appendValue(cigar,Pair<char,int>('I',inserted + offset));
473 
474 }
475 
476 
477 
478 #ifdef RAZERS_DIRECT_MAQ_MAPPING
479 //////////////////////////////////////////////////////////////////////////////
480 // Assign mapping quality and remove suboptimal matches
481 template < typename TMatches, typename TReads, typename TCooc, typename TCounts, typename TSpec >
assignMappingQuality(TMatches & matches,TReads & reads,TCooc & cooc,TCounts & cnts,RazerSOptions<TSpec> & options)482 void assignMappingQuality(TMatches &matches, TReads & reads, TCooc & cooc, TCounts &cnts, RazerSOptions<TSpec> & options)
483 {
484 	typedef typename Value<TMatches>::Type				TMatch;
485 	typedef typename Iterator<TMatches, Standard>::Type		TIterator;
486 
487 	//matches are already sorted
488 	//::std::sort(
489 	//	begin(matches, Standard()),
490 	//	end(matches, Standard()),
491 	//	LessRNoMQ<TMatch>());
492 
493 
494 	int maxSeedErrors = (int)(options.errorRate*options.artSeedLength)+1;
495 	unsigned readNo = -1;
496 
497 	TIterator it = begin(matches, Standard());
498 	TIterator itEnd = end(matches, Standard());
499 	TIterator dit = it;
500 
501 	int bestQualSum, secondBestQualSum;
502 	int secondBestDist = -1 ,secondBestMatches = -1;
503 	for (; it != itEnd; ++it)
504 	{
505 		if ((*it).orientation == '-') continue;
506 		bool mappingQualityFound = false;
507 		int mappingQuality = 0;
508 		int qualTerm1,qualTerm2;
509 
510 		readNo = (*it).rseqNo;
511 		bestQualSum = (*it).mScore;
512 
513 		if(++it!=itEnd && (*it).rseqNo==readNo)
514 		{
515 			secondBestQualSum = (*it).mScore;
516 			secondBestDist = (*it).editDist;
517 			secondBestDist = (*it).editDist;
518 			secondBestMatches = cnts[1][readNo] >> 5;
519 //CHECKcnts		secondBestMatches = cnts[secondBestDist][readNo];
520 //			secondBestMatches = cnts[secondBestDist][readNo];
521 			(*it).orientation = '-';
522 		//	if(secondBestDist<=bestDist) unique=0;
523 		}
524 		else secondBestQualSum = -1000;
525 		--it; //it is now at best match of current rseqNo
526 
527 		int bestDist = (*it).editDist;
528 		int kPrime = (*it).seedEditDist;
529 		if((bestQualSum==secondBestQualSum) || (kPrime>maxSeedErrors))
530 			mappingQualityFound = true;   //mq=0
531 		else{
532 			if(secondBestQualSum == -1000) qualTerm1 = 99;
533 			else
534 			{
535 				qualTerm1 = (int)(secondBestQualSum - bestQualSum - 4.343 * log((double)secondBestMatches));
536 				//if (secondBestKPrime - kPrime <= 1 && qualTerm1 > options.mutationRateQual) qualTerm1 = options.mutationRateQual; //TODO abchecken was mehr sinn macht
537 				if (secondBestDist - bestDist <= 1 && qualTerm1 > options.mutationRateQual) qualTerm1 = options.mutationRateQual;
538 			}
539 			float avgSeedQual = 0.0;
540 			if(!mappingQualityFound)
541 			{
542 				//TODO!!! generalize and adapt to razers lossrates
543 				// lossrate 0.42 => -10 log10(0.42) = 4
544 				int kPrimeLoss = 4; // options.kPrimeLoss; // bezieht sich auf 3 fehler in 28bp
545 				qualTerm2 = kPrimeLoss + cooc[maxSeedErrors-kPrime];
546 
547 				for(unsigned j = 0; j<options.artSeedLength; ++j)
548 				{
549 					int q = getQualityValue(reads[readNo][j]);//(int)((unsigned char)(reads[readNo][j])>>3);
550 					if(q>options.mutationRateQual) q = options.mutationRateQual;
551 					avgSeedQual+=q;
552 				}
553 				avgSeedQual/=options.artSeedLength;
554 				//-10 log10(28-2) = 14;
555 				//generalize to -10 log10(artSeedLength - maxSeedErrors +1 ) // 14 fits for seedlength 28 to 32 with 2 errors
556 				if(avgSeedQual>14) qualTerm2 += (int)((maxSeedErrors-kPrime)*(avgSeedQual-14));
557 			}
558 		}
559 		if (!mappingQualityFound) mappingQuality = (qualTerm1<qualTerm2) ? qualTerm1:qualTerm2;
560 		if (mappingQuality < 0) mappingQuality = 0;
561 		(*it).mScore = mappingQuality;
562 
563 		*dit = *it;
564 	//	if(secondBestQualSum != -1000) ++it;
565 		++dit;
566 	}
567 	resize(matches, dit - begin(matches, Standard()));
568 }
569 #endif
570 
571 
572 //////////////////////////////////////////////////////////////////////////////
573 // Dump an alignment
574 template <typename TFile, typename TSource, typename TSpec>
575 inline void
dumpAlignment(TFile & target,Align<TSource,TSpec> const & source)576 dumpAlignment(TFile & target, Align<TSource, TSpec> const & source)
577 {
578 	typedef Align<TSource, TSpec> const TAlign;
579 	typedef typename Row<TAlign>::Type TRow;
580 	typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
581 	typedef typename Position<TAlign>::Type TPosition;
582 
583 	TRowsPosition row_count = length(rows(source));
584 	TPosition begin_ = beginPosition(cols(source));
585 	TPosition end_ = endPosition(cols(source));
586 
587 	// Print sequences
588 	for(TRowsPosition i=0;i<row_count;++i) {
589 		if (i == 0)
590 			_streamWrite(target, "#Read:   ");
591 		else
592 			_streamWrite(target, "#Genome: ");
593 		TRow& row_ = row(source, i);
594 		typedef typename Iterator<typename Row<TAlign>::Type const>::Type TIter;
595 		TIter begin1_ = iter(row_, begin_);
596 		TIter end1_ = iter(row_, end_);
597 		for (; begin1_ != end1_; ++begin1_) {
598 			if (isGap(begin1_)) _streamPut(target, gapValue<char>());
599 			else _streamPut(target, *begin1_);
600 		}
601 		_streamPut(target, '\n');
602 	}
603 }
604 
605 
606 //////////////////////////////////////////////////////////////////////////////
607 // Output matches
608 template <
609 	typename TMatches,
610 	typename TGenomeNames,
611 	typename TReads,
612 	typename TReadNames,
613 	typename TCounts,
614 	typename TSpec
615 >
dumpMatches(TMatches & matches,TGenomeNames const & genomeIDs,StringSet<CharString> & genomeFileNameList,::std::map<unsigned,::std::pair<::std::string,unsigned>> & gnoToFileMap,TReads const & reads,TCounts & stats,TReadNames const & readIDs,CharString readFName,CharString errorPrbFileName,RazerSOptions<TSpec> & options)616 void dumpMatches(
617 	TMatches &matches,							// forward/reverse matches
618 	TGenomeNames const &genomeIDs,				// Genome names (read from Fasta file, currently unused)
619 	StringSet<CharString> &genomeFileNameList,	// list of genome names (e.g. {"hs_ref_chr1.fa","hs_ref_chr2.fa"})
620 	::std::map<unsigned,::std::pair< ::std::string,unsigned> > &gnoToFileMap, //map to retrieve genome filename and sequence number within that file
621 	TReads const &reads,
622 	TCounts & stats,							// Match statistics (possibly empty)
623 	TReadNames const &readIDs,					// Read names (read from Fasta file, currently unused)
624 	CharString readFName,					// read name (e.g. "reads.fa"), used for file/read naming
625 	CharString errorPrbFileName,
626 	RazerSOptions<TSpec> &options)
627 {
628 	typedef typename Value<TMatches>::Type		TMatch;
629 	typedef typename Value<TReads>::Type		TRead;
630 	typedef typename Value<TGenomeSet>::Type	TGenome;
631 	typedef typename TMatch::TGPos				TGPos;
632 
633 	if (options.minMatchLen > 0) options.outputFormat = 33;
634 
635 	if (options.outputFormat == 2)
636 	{
637 		options.sortOrder = 0;		// ... to count multiple matches
638 	}
639 
640 	if (options.outputFormat == 2)
641 	{
642 		options.maxHits = 1;		// Eland outputs at most one match
643 		options.sortOrder = 0;		// read numbers are increasing
644 		options.positionFormat = 1;	// bases in file are numbered starting at 1
645 		options.dumpAlignment = options.hammingOnly;
646 	}
647 #ifdef RAZERS_DIRECT_MAQ_MAPPING
648 	if (options.maqMapping) options.outputFormat = 3;
649 	int maxSeedErrors = (int)(options.errorRate * options.artSeedLength); //without + 1 here, used to check whether match should be supported if noBelowIdentity option is switched on
650 #endif
651 
652 	if (options.outputFormat == 3)
653 	{
654 		options.sortOrder = 1;		//  sort according to gPos
655 		options.positionFormat = 1;	// bases in file are numbered starting at 1
656 		//options.dumpAlignment = false;
657 	}
658 	if (options.outputFormat == 33)
659 	{
660 		options.positionFormat = 1;	// bases in file are numbered starting at 1
661 		options.dumpAlignment = false;
662 		options.sortOrder = 1;
663 	}
664 
665 
666 	// error profile
667 	unsigned maxReadLength = options.maxReadLength;
668 
669 	SEQAN_PROTIMESTART(dump_time);
670 
671 	// load Genome sequences for alignment dumps
672 	TGenomeSet genomes;
673 	if ((options.outputFormat == 0 && !options.hammingOnly) || options.dumpAlignment || !empty(errorPrbFileName) || options.outputFormat == 33)
674 		if (!loadGenomes(genomes, genomeFileNameList)) {
675 			::std::cerr << "Failed to load genomes" << ::std::endl;
676 			options.dumpAlignment = false;
677 		}
678 
679 	// how many 0's should be padded?
680 	int pzeros = 0;
681 	for (unsigned l = length(reads); l > 9; l = l / 10)
682 		++pzeros;
683 
684 	int gzeros = 0;
685 	for (unsigned l = length(genomes); l > 9; l = l / 10)
686 		++gzeros;
687 
688 	// remove the directory prefix of readFName
689 	::std::string _readName;
690 	assign(_readName, readFName);
691 	size_t lastPos = _readName.find_last_of('/') + 1;
692 	if (lastPos == _readName.npos) lastPos = _readName.find_last_of('\\') + 1;
693 	if (lastPos == _readName.npos) lastPos = 0;
694 	CharString readName = _readName.substr(lastPos);
695 
696 
697 	typedef Align<String<Dna5>, ArrayGaps> TAlign;
698 	TAlign align, alignL, alignR;
699 	Score<int> scoreType = Score<int>(0, -999, -1001, -1000);	// levenshtein-score (match, mismatch, gapOpen, gapExtend)
700 //	Score<int> scoreType(0,-1,-1,-1);
701 
702 	if (options.hammingOnly)
703 		scoreType.data_mismatch = -1;
704 	resize(rows(align), 2);
705 	resize(rows(alignL), 2);
706 	resize(rows(alignR), 2);
707 
708 	::std::ofstream file;
709 	CharString fileName = options.output;
710 	if (empty(fileName))
711 	{
712 		fileName = readFName;
713 		append(fileName, ".result");
714 	}
715 
716 	file.open(toCString(fileName), ::std::ios_base::out | ::std::ios_base::trunc);
717 	if (!file.is_open()) {
718 		::std::cerr << "Failed to open output file" << ::std::endl;
719 		return;
720 	}
721 
722 
723 	String<short> ambiStates;
724 	if(options.minMatchLen == 0)
725 		maskDuplicates(matches);
726 	if (options.outputFormat > 0
727 #ifdef RAZERS_DIRECT_MAQ_MAPPING
728 	 && !options.maqMapping
729 #endif
730 	)
731 	{
732 		// match statistics
733 		unsigned maxErrors = (int)(options.errorRate * maxReadLength);
734 		//if (maxErrors > 10) maxErrors = 10;
735 		resize(stats, maxErrors + 1);
736 		for (unsigned i = 0; i <= maxErrors; ++i)
737 			resize(stats[i], length(reads), 0);
738 		countMatches(matches, stats);
739 	}
740 
741 
742 
743 	Nothing nothing;
744 	unsigned currSeqNo = 0;
745 #ifdef RAZERS_DIRECT_MAQ_MAPPING
746 	if(options.maqMapping)
747 	{
748 		String<int> cooc;
749 		compactMatches(matches, stats, options, true, nothing, false); //only best two matches per read are kept
750 		countCoocurrences(matches,cooc,options);	//coocurrence statistics are filled
751 		assignMappingQuality(matches,reads,cooc,stats,options);//mapping qualities are assigned and only one match per read is kept
752 	}
753 	else
754 #endif
755 
756 #ifdef RAZERS_MICRO_RNA
757 	if(options.microRNA)purgeAmbiguousRnaMatches(matches,options);
758 	else
759 #endif
760 	{
761 		if(options.minMatchLen>0)
762 			compactAndCountSplicedMatches(matches, ambiStates, options, true);
763 		else
764 			compactMatches(matches, stats, options, true, nothing);
765 	}
766         int jj = 0;
767         if(!(options.minMatchLen > 0 ))
768 	  switch (options.sortOrder) {
769 		case 0:
770 			::std::sort(
771 				begin(matches, Standard()),
772 				end(matches, Standard()),
773 				LessRNoGPos<TMatch>());
774 			break;
775 
776 		case 1:
777 			::std::sort(
778 				begin(matches, Standard()),
779 				end(matches, Standard()),
780 				LessGPosRNo<TMatch>());
781 			break;
782 
783 	}
784 
785 	typename Iterator<TMatches, Standard>::Type	it = begin(matches, Standard());
786 	typename Iterator<TMatches, Standard>::Type	itEnd = end(matches, Standard());
787 
788 
789 	Dna5String gInf, gInfL, gInfR;
790 	char _sep_ = '\t';
791 	unsigned viewPosReadFirst = 0;
792 	unsigned viewPosReadLast  = 0;
793 
794 	switch (options.outputFormat)
795 	{
796 		case 0:	// Razer Format
797 //			_sep_ = ',';
798 			for(; it != itEnd; ++it)
799 			{
800 				unsigned	readLen = length(reads[(*it).rseqNo]);
801 				double		percId = 100.0 * (1.0 - (double)(*it).editDist / (double)readLen);
802 #ifdef RAZERS_MICRO_RNA
803 				percId = 100.0 * (1.0 - (double)(*it).editDist / (double) ((*it).mScore));
804 #endif
805 				switch (options.readNaming)
806 				{
807 					// 0..filename is the read's Fasta id
808 					case 0:
809 						file << readIDs[(*it).rseqNo];
810 						break;
811 
812 					// 1..filename is the read filename + seqNo
813 					case 1:
814 						file.fill('0');
815 						file << readName << '#' << ::std::setw(pzeros) << (*it).rseqNo + 1;
816 						break;
817 
818 					// 2..filename is the read sequence itself
819 					case 2:
820 						file << reads[(*it).rseqNo];
821 				}
822 
823 				file << _sep_ << options.positionFormat << _sep_ << readLen << _sep_ << (*it).orientation << _sep_;
824 
825 				switch (options.genomeNaming)
826 				{
827 					// 0..filename is the read's Fasta id
828 					case 0:
829 						file << genomeIDs[(*it).gseqNo];
830 						break;
831 
832 					// 1..filename is the read filename + seqNo
833 					case 1:
834 						file.fill('0');
835 						file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1;
836 				}
837 
838 				// get alignment to dump or to fix end coordinates
839 				if (options.dumpAlignment || !options.hammingOnly)
840 				{
841 #ifdef RAZERS_MICRO_RNA
842 					if(options.microRNA)
843 						assignSource(row(align, 0), prefix(reads[(*it).rseqNo],(*it).mScore));
844 					else
845 #endif
846 					assignSource(row(align, 0), reads[(*it).rseqNo]);
847 					assignSource(row(align, 1), infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd));
848 #ifdef RAZERS_MATEPAIRS
849 					if ((*it).pairId != 0 && ((*it).rseqNo & 1))
850 						reverseComplement(source(row(align, 0)));
851 #endif
852 					if ((*it).orientation == 'R')
853 						reverseComplement(source(row(align, 1)));
854 
855 					globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
856 
857 					// transform first and last read character to genomic positions
858 					viewPosReadFirst = toViewPosition(row(align, 0), 0);
859 					viewPosReadLast  = toViewPosition(row(align, 0), readLen - 1);
860 					unsigned genomePosReadFirst = toSourcePosition(row(align, 1), viewPosReadFirst);
861 					unsigned genomePosReadLast  = toSourcePosition(row(align, 1), viewPosReadLast);
862 
863 					if ((*it).orientation == 'R')
864 					{
865 						(*it).gBegin = (*it).gEnd - (genomePosReadLast + 1);
866 						(*it).gEnd -= genomePosReadFirst;
867 					}
868 					else
869 					{
870 						(*it).gEnd = (*it).gBegin + (genomePosReadLast + 1);
871 						(*it).gBegin += genomePosReadFirst;
872 					}
873 				}
874 
875 				file << _sep_ << ((*it).gBegin + options.positionFormat) << _sep_ << (*it).gEnd << _sep_ << ::std::setprecision(5) << percId;
876 #ifdef RAZERS_MICRO_RNA
877 				if(options.microRNA) file << _sep_ << (*it).mScore;
878 #endif
879 
880 #ifdef RAZERS_MATEPAIRS
881 				if ((*it).pairId != 0)
882 					file << _sep_ << (*it).pairId << _sep_ << (*it).pairScore << _sep_ << (*it).mateDelta;
883 #endif
884 				file << ::std::endl;
885 
886 				if (options.dumpAlignment)
887 					dumpAlignment(file, align, viewPosReadFirst, viewPosReadLast + 1);
888 			}
889 			break;
890 
891 
892 		case 1:	// Enhanced Fasta Format
893 			_sep_ = ',';
894 			for(unsigned matchReadNo = -1, matchReadCount = 0; it != itEnd; ++it)
895 			{
896 				unsigned	readLen = length(reads[(*it).rseqNo]);
897 				double		percId = 100.0 * (1.0 - (double)(*it).editDist / (double)readLen);
898 
899 				if (matchReadNo != (*it).rseqNo)
900 				{
901 					matchReadNo = (*it).rseqNo;
902 					matchReadCount = 0;
903 				} else
904 					++matchReadCount;
905 
906 				::std::string fastaID;
907 				assign(fastaID, readIDs[(*it).rseqNo]);
908 
909 				::std::string id = fastaID;
910 				int fragId = (*it).rseqNo;
911 				bool appendMatchId = options.maxHits > 1;
912 
913 				size_t left = fastaID.find_first_of('[');
914 				size_t right = fastaID.find_last_of(']');
915 				if (left != fastaID.npos && right != fastaID.npos && left < right)
916 				{
917 					fastaID.erase(right);
918 					fastaID.erase(0, left + 1);
919 					replace(fastaID.begin(), fastaID.end(), ',', ' ');
920 					size_t pos = fastaID.find("id=");
921 					if (pos != fastaID.npos) {
922 						::std::istringstream iss(fastaID.substr(pos + 3));
923 						iss >> id;
924 //						appendMatchId = false;
925 					}
926 					pos = fastaID.find("fragId=");
927 					if (pos != fastaID.npos) {
928 						::std::istringstream iss(fastaID.substr(pos + 7));
929 						iss >> fragId;
930 					}
931 				}
932 
933 				if ((*it).orientation == 'F')
934 					// forward strand
935 					file << '>' << ((*it).gBegin + options.positionFormat) << _sep_ << (*it).gEnd;
936 				else
937 					// reverse strand (switch begin and end)
938 					file << '>' << (*it).gEnd << _sep_ << ((*it).gBegin + options.positionFormat);
939 
940 				unsigned ambig = 0;
941 				for (unsigned i = 0; i <= (*it).editDist && i < length(stats); ++i)
942 					ambig += stats[i][(*it).rseqNo];
943 
944 				file << "[id=" << id;
945 				if (appendMatchId) file << "_" << matchReadCount;
946 				file << ",fragId=" << fragId;
947 				file << ",contigId=" << genomeIDs[(*it).gseqNo];
948 				file << ",errors=" << (*it).editDist << ",percId=" << ::std::setprecision(5) << percId;
949 				file << ",ambiguity=" << ambig << ']' << ::std::endl;
950 
951 				file << reads[(*it).rseqNo] << ::std::endl;
952 			}
953 			break;
954 
955 
956 		case 2:	// Eland Format
957 			_sep_ = '\t';
958 			for(unsigned readNo = 0; readNo < length(reads); ++readNo)
959 			{
960 				switch (options.readNaming)
961 				{
962 					// 0..filename is the read's Fasta id
963 					case 0:
964 						file << '>' << readIDs[readNo] << _sep_;
965 						break;
966 
967 					// 1..filename is the read filename + seqNo
968 					case 1:
969 						file.fill('0');
970 						file << readName << '#' << ::std::setw(pzeros) << readNo + 1  << _sep_;
971 						break;
972 				}
973 
974 				if (it == itEnd || readNo < (*it).rseqNo)
975 				{
976 					if (!empty(reads[readNo]))
977 						file << reads[readNo] << _sep_ << "NM" << _sep_ << '0' << _sep_ << '0' << _sep_ << '0' << ::std::endl;
978 					else
979 					{
980 						for (unsigned i = 0; i < maxReadLength; ++i)
981 							file << '.';
982 						file << _sep_ << "QC" << _sep_ << '0' << _sep_ << '0' << _sep_ << '0' << ::std::endl;
983 					}
984 				} else
985 				{
986 					file << reads[readNo] << _sep_;
987 					unsigned bestMatches = 1;
988 					if ((unsigned)(*it).editDist < length(stats))
989 						bestMatches = stats[(*it).editDist][readNo];
990 
991 					if (bestMatches == 0) file << '?';	// impossible
992 					if (bestMatches == 1) file << 'U';	// unique best match
993 					if (bestMatches >  1) file << 'R';	// non-unique best matches
994 
995 					file << (*it).editDist << _sep_ << stats[0][readNo] << _sep_ << stats[1][readNo] << _sep_ << stats[2][readNo];
996 
997 					if (bestMatches == 1)
998 					{
999 						file << _sep_;
1000 						switch (options.genomeNaming)
1001 						{
1002 							// 0..filename is the read's Fasta id
1003 							case 0:
1004 								file << genomeIDs[(*it).gseqNo];
1005 								break;
1006 
1007 							// 1..filename is the read filename + seqNo
1008 							case 1:
1009 								file.fill('0');
1010 								file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1;
1011 						}
1012 
1013 						if ((*it).orientation == 'F')
1014 							file << _sep_ << ((*it).gBegin + options.positionFormat) << _sep_ << 'F' << _sep_ << "..";
1015 						else
1016 							file << _sep_ << (*it).gEnd << _sep_ << 'R' << _sep_ << "..";
1017 
1018 						if ((*it).editDist > 0 && options.dumpAlignment && options.hammingOnly)
1019 						{
1020 							gInf = infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd);
1021 							if ((*it).orientation == 'R')
1022 								reverseComplement(gInf);
1023 							for (unsigned i = 0; i < length(gInf); ++i)
1024 								if ((options.compMask[ordValue(reads[readNo][i])] &
1025 									options.compMask[ordValue(gInf[i])]) == 0)
1026 									file << _sep_ << i + 1 << gInf[i];
1027 						}
1028 					}
1029 					file << ::std::endl;
1030 					++it;
1031 				}
1032 			}
1033 			break;
1034 		case 3: // Gff:  printf "$chr $name_$format read $pos %ld . $dir . ID=$col[0]$unique$rest\n",$pos+$len-1;
1035 			for (unsigned filecount = 0; filecount < length(genomeFileNameList); ++filecount)
1036 			{
1037 				// open genome file
1038 				::std::ifstream gFile;
1039 				gFile.open(toCString(genomeFileNameList[filecount]), ::std::ios_base::in | ::std::ios_base::binary);
1040 				if (!gFile.is_open())
1041 				{
1042 					std::cerr << "Couldn't open genome file." << std::endl;
1043 					break;
1044 				}
1045 
1046 				Dna5String	currGenome;
1047 
1048 				// iterate over genome sequences
1049 				for(; !_streamEOF(gFile); ++currSeqNo)
1050 				{
1051 					read(gFile, currGenome, Fasta());			// read Fasta sequence
1052 					while(it != itEnd && (*it).gseqNo == currSeqNo)
1053 					{
1054 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1055 						if(options.maqMapping && options.noBelowIdentity && (*it).seedEditDist > maxSeedErrors)
1056 						{
1057 							++it;
1058 							continue;
1059 						}
1060 #endif
1061 
1062 						unsigned currReadNo = (*it).rseqNo;
1063 						int unique = 1;
1064 						unsigned bestMatches = 0;
1065 
1066 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1067 						if(options.maqMapping)
1068 							bestMatches = stats[0][currReadNo] >> 5;
1069 						else
1070 #endif
1071 						{
1072 							if (bestMatches == 0 && (unsigned)(*it).editDist < length(stats))
1073 							bestMatches = stats[(*it).editDist][currReadNo];
1074 
1075 						}
1076 
1077 						bool suboptimal = false;
1078 						if (
1079 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1080 							!options.maqMapping &&
1081 #endif
1082 							(unsigned)(*it).editDist > 0 )
1083 						{
1084 							for(unsigned d = 0; d < (unsigned)(*it).editDist; ++d)
1085 								if (stats[d][currReadNo]>0) suboptimal=true;
1086 						}
1087 
1088 						if (bestMatches !=  1)
1089 						{
1090 							unique = 0;
1091 							if(options.purgeAmbiguous)
1092 							{
1093 								++it;
1094 								continue;
1095 							}
1096 
1097 //							if((*it).mScore > 0) std::cout << (*it).mScore << "<-non uniq but score > 0\n";
1098 //							++it;
1099 //							continue; // TODO: output non-unique matches (concerns maq mapping only)
1100 						}
1101 						unsigned readLen = length(reads[currReadNo]);
1102 
1103 						String<Dna5Q> readInf = infix(reads[currReadNo],0,readLen);
1104 						switch (options.genomeNaming)
1105 						{
1106 							// 0..filename is the read's Fasta id
1107 							case 0:
1108 								file << genomeIDs[(*it).gseqNo] <<'\t';
1109 								break;
1110 							// 1..filename is the read filename + seqNo
1111 							case 1:
1112 								file.fill('0');
1113 								file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1 << '\t';
1114 								break;
1115 						}
1116 
1117 						// get alignment to dump or to fix end coordinates
1118 						if (!options.hammingOnly)
1119 						{
1120 							assignSource(row(align, 0), readInf);
1121 							//std::cout << "begin=" << (*it).gBegin <<" end="<< (*it).gEnd << std::endl;
1122 							assignSource(row(align, 1), infix(currGenome, (*it).gBegin, (*it).gEnd));
1123 							if ((*it).orientation == 'R')
1124 								reverseComplement(source(row(align, 1)));
1125 
1126 							globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
1127 
1128 							//std::cout << align << std::endl;
1129 							//dumpAlignment(::std::cout, align);
1130 
1131 							// transform first and last read character to genomic positions
1132 							viewPosReadFirst  = toViewPosition(row(align, 0), 0);
1133 							viewPosReadLast   = toViewPosition(row(align, 0), readLen - 1);
1134 							TGPos genomePosReadFirst = toSourcePosition(row(align, 1), viewPosReadFirst);
1135 							TGPos genomePosReadLast  = toSourcePosition(row(align, 1), viewPosReadLast);
1136 							if ((*it).orientation == 'R')
1137 							{
1138 								// watch out at chromosome borders
1139 								(*it).gBegin = (*it).gEnd - _min((TGPos)(genomePosReadLast + 1),(*it).gEnd);
1140 								(*it).gEnd -= genomePosReadFirst;
1141 							}
1142 							else
1143 							{
1144 								(*it).gEnd = (*it).gBegin + _min(genomePosReadLast + 1, static_cast<TGPos>(length(currGenome) - (*it).gBegin));
1145 								(*it).gBegin += genomePosReadFirst;
1146 							}
1147 
1148 							clear(row(align,1));
1149 							clear(row(align,0));
1150 							assignSource(row(align, 0), readInf);
1151 							//std::cout << "begin=" << (*it).gBegin <<" end="<< (*it).gEnd << std::endl;
1152 							assignSource(row(align, 1), infix(currGenome, (*it).gBegin, (*it).gEnd));
1153 							if ((*it).orientation == 'R')
1154 								reverseComplement(source(row(align, 1)));
1155 
1156 							globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
1157 
1158 							//std::cout << align << std::endl;
1159 							//dumpAlignment(::std::cout, align);
1160 
1161 							// transform first and last read character to genomic positions
1162 							viewPosReadFirst  = toViewPosition(row(align, 0), 0);
1163 							viewPosReadLast   = toViewPosition(row(align, 0), readLen - 1);
1164 
1165 
1166 						}
1167 
1168 						//file <<  options.runID << "_razers\tread";
1169 						file << "razers\tread";
1170 						file << '\t' << ((*it).gBegin + options.positionFormat) << '\t' << (*it).gEnd << '\t';
1171 						double percId = 100.0 * (1.0 - (double)(*it).editDist / (double)readLen);
1172 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1173 						if(options.maqMapping)
1174 							file << (*it).mScore << "\t";
1175 						else
1176 #endif
1177 							file << percId << "\t";
1178 
1179 						if ((*it).orientation == 'F')
1180 							file << '+' << '\t' << '.' <<'\t';
1181 						else
1182 							file << '-' << '\t' << '.' <<'\t';
1183 
1184 						switch (options.readNaming)
1185 						{
1186 							// 0..filename is the read's Fasta id
1187 							case 0:
1188 								file << "ID=" <<readIDs[currReadNo];
1189 								break;
1190 
1191 							// 1..filename is the read filename + seqNo
1192 							case 1:
1193 								file.fill('0');
1194 								file << "ID=" << readName << '#' << ::std::setw(pzeros) << currReadNo + 1;
1195 								break;
1196 						}
1197 						if(suboptimal) file << ";suboptimal";
1198 						else
1199 						{
1200 							if(unique==1) file << ";unique";
1201 							if(unique==0) file << ";multi";
1202 						}
1203 
1204 
1205 						if ((*it).editDist > 0)
1206 						{
1207 							if (options.hammingOnly)
1208 							{
1209 								gInf = infix(currGenome, (*it).gBegin, (*it).gEnd);
1210 								if ((*it).orientation == 'R')
1211 									reverseComplement(gInf);
1212 								bool first = true;
1213 								file << ";cigar=" << readLen << "M";
1214 								file << ";mutations=";
1215 								for (unsigned i = 0; i < length(gInf); ++i)
1216 									if ((options.compMask[ordValue(readInf[i])] &
1217 										options.compMask[ordValue(gInf[i])]) == 0)
1218 									{
1219 										if(first){ file << i + 1 << (Dna5)readInf[i]; first = false;}
1220 										else file <<','<< i + 1 << (Dna5)readInf[i];
1221 									}
1222 
1223 							}
1224 							else
1225 							{
1226 								std::stringstream cigar, mutations;
1227 								typedef typename Row<TAlign>::Type TRow;
1228 								typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
1229 								TAlignIterator ali_it0 = iter(row(align,0),viewPosReadFirst);
1230 								TAlignIterator ali_it1 = iter(row(align,1),viewPosReadFirst);
1231 								TAlignIterator ali_it0stop = iter(row(align,0),viewPosReadLast + 1);
1232 								TAlignIterator ali_it1stop = iter(row(align,1),viewPosReadLast + 1);
1233 
1234 								getCigarLine(align,cigar,mutations,ali_it0,ali_it0stop,ali_it1,ali_it1stop);
1235 								file << ";cigar="<<cigar.str();
1236 								if(length(mutations.str())>0)
1237 									file << ";mutations=" << mutations.str();
1238 							}
1239 						}
1240 
1241 
1242 						if(
1243 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1244 						options.maqMapping ||
1245 #endif
1246 						options.fastaIdQual)
1247 						{
1248 		//					file << ";read=";
1249 		//					for(unsigned j=0;j<length(reads[currReadNo]);++j)
1250 		//					{
1251 		//						file << (Dna5)reads[currReadNo][j];
1252 		//					}
1253 							file << ";quality=";
1254 							for(unsigned j=0;j<readLen;++j)
1255 							{
1256 								file << (char)(getQualityValue(readInf[j])+ 33);
1257 							}
1258 						}
1259 
1260 						file << ::std::endl;
1261 						if(options.dumpAlignment)
1262 						{
1263 							if((*it).editDist > 0 && !options.hammingOnly)
1264 								file << align;
1265 							else
1266 							{
1267 								clear(row(align,1));
1268 								clear(row(align,0));
1269 								assignSource(row(align, 0), readInf);
1270 								assignSource(row(align, 1), infix(currGenome, (*it).gBegin, (*it).gEnd));
1271 								if ((*it).orientation == 'R')
1272 									reverseComplement(source(row(align, 1)));
1273 								globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
1274 								file << align;
1275 
1276 							}
1277 						}
1278 						++it;
1279 					}
1280 				}
1281 				gFile.close();
1282 				++filecount;
1283 			}
1284 			break;
1285 		case 33: // special Gff for split reads
1286 			while(it != itEnd)// && (*it).gseqNo == currSeqNo)
1287 			{
1288 				unsigned currReadNo = (*it).rseqNo;
1289 				Dna5String &currGenome = genomes[(*it).gseqNo];
1290 				unsigned readLen = length(reads[currReadNo]);
1291 				TMatch& mL = *it;
1292 				++it;
1293 				TMatch& mR = *it;
1294 				unsigned readLenL = mL.mScore;
1295 				unsigned readLenR = mR.mScore;
1296 				String<Dna5Q> readInfL = infix(reads[currReadNo],0,readLenL);
1297 				String<Dna5Q> readInfR = infix(reads[currReadNo],length(reads[currReadNo])-readLenR,length(reads[currReadNo]));
1298 				int offsetL = 0;
1299 				int offsetR = readLen - readLenR;
1300 				if(mL.orientation == 'R')
1301 				{
1302 					offsetR = 0;
1303 					offsetL = readLen - readLenL;
1304 					readInfL = infix(reads[currReadNo],length(reads[currReadNo])-readLenL,length(reads[currReadNo]));
1305 					readInfR = infix(reads[currReadNo],0,readLenR);
1306 				}
1307 
1308 				switch (options.genomeNaming)
1309 				{
1310 					// 0..filename is the read's Fasta id
1311 					case 0:
1312 						file << genomeIDs[(*it).gseqNo] <<'\t';
1313 						break;
1314 					// 1..filename is the read filename + seqNo
1315 					case 1:
1316 						file.fill('0');
1317 						file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1 << '\t';
1318 						break;
1319 				}
1320 
1321 				// get alignment to dump and get cigar string
1322 				if (!options.hammingOnly)
1323 				{
1324 					assignSource(row(alignL, 0), readInfL);
1325 					assignSource(row(alignL, 1), infix(currGenome, mL.gBegin, mL.gEnd));
1326 					if (mL.orientation == 'R')
1327 						reverseComplement(source(row(alignL, 1)));
1328 
1329 					globalAlignment(alignL, scoreType, AlignConfig<false,false,false,false>(), Gotoh());
1330 
1331 					assignSource(row(alignR, 0), readInfR);
1332 					assignSource(row(alignR, 1), infix(currGenome, mR.gBegin, mR.gEnd));
1333 					if (mR.orientation == 'R')
1334 						reverseComplement(source(row(alignR, 1)));
1335 
1336 					globalAlignment(alignR, scoreType, AlignConfig<false,false,false,false>(), Gotoh());
1337 				}
1338 
1339 				//file <<  options.runID << "_razers\tread";
1340 				file << "razers\tread";
1341 				file << '\t' << (mL.gBegin + options.positionFormat) << '\t' << mR.gEnd << '\t';
1342 				double percId = 100.0 * (1.0 + (double)(mL.pairScore-mL.mScore-mR.mScore) / (double)readLen);
1343 				file << percId << "\t";
1344 
1345 				if (mL.orientation == 'F')
1346 					file << '+' << '\t' << '.' <<'\t';
1347 				else
1348 					file << '-' << '\t' << '.' <<'\t';
1349 
1350 				switch (options.readNaming)
1351 				{
1352 					// 0..filename is the read's Fasta id
1353 					case 0:
1354 						file << "ID=" <<readIDs[currReadNo];
1355 						break;
1356 					// 1..filename is the read filename + seqNo
1357 					case 1:
1358 						file.fill('0');
1359 						file << "ID=" << readName << '#' << ::std::setw(pzeros) << currReadNo + 1;
1360 						break;
1361 				}
1362 				if(ambiStates[jj] == 0) file << ";unique";
1363 				if(ambiStates[jj] == 1) file << ";multi";
1364 				if(ambiStates[jj] == 2) file << ";suboptimal";
1365 				++jj;
1366 
1367 				SEQAN_ASSERT_TRUE(mL.pairId == mR.pairId);
1368 
1369 				int indelLen = mR.mScore + mL.mScore - readLen;
1370 				if(mL.gEnd != mR.gBegin) indelLen = mR.gBegin - mL.gEnd;
1371 
1372 				file << ";pairScore=" << (unsigned int) mR.pairScore;
1373 
1374 //				if(mR.traceExtension >= abs(indelLen))
1375 //					file << ";traceExt=" << (unsigned int) mR.traceExtension;
1376 
1377 				String<Pair<Dna5,int> > mutationsStr, mutStrMid, mutStrL, mutStrR;
1378 				String<Pair<char,int> > cigarStr, cigarMid, cigarL, cigarR;
1379 				if(indelLen < 0)
1380 				{
1381 					int offset = readLenL;
1382 					if(mL.orientation == 'R')
1383 						offset = readLenR;
1384 					String<Dna5Q> readInfMid = infix(reads[currReadNo],offset,offset - indelLen);
1385 					for (int i = 0; i < -indelLen; ++i)
1386 						appendValue(mutStrMid, Pair<Dna5,int>(readInfMid[i],i+offset+1));
1387 					appendValue(cigarMid, Pair<char,int>('I',-indelLen));
1388 
1389 				}
1390 				if(indelLen > 0) appendValue(cigarMid, Pair<char,int>('D',indelLen));
1391 				if(indelLen != 0)
1392 					file << ";split";
1393 
1394 				if (options.hammingOnly)
1395 				{
1396 					if(indelLen == 0)
1397 						appendValue(cigarL, Pair<char,int>('M',readLen));
1398 					else
1399 					{
1400 						appendValue(cigarL, Pair<char,int>('M',readLenL));
1401 						appendValue(cigarR, Pair<char,int>('M',readLenR));
1402 					}
1403 
1404 					//make left part of mutation string
1405 					if (mL.editDist > 0)
1406 					{
1407 						gInfL = infix(currGenome, mL.gBegin, mL.gEnd);
1408 						if (mL.orientation == 'R')
1409 							reverseComplement(gInfL);
1410 						for (unsigned i = 0; i < length(gInfL); ++i)
1411 							if ((options.compMask[ordValue(readInfL[i])] & options.compMask[ordValue(gInfL[i])]) == 0)
1412 								appendValue(mutStrL, Pair<Dna5,int>(readInfL[i],i+offsetL+1));
1413 					}
1414 
1415 					//make right part of mutation string
1416 					if (mR.editDist > 0)
1417 					{
1418 						gInfR = infix(currGenome, mR.gBegin, mR.gEnd);
1419 						if (mR.orientation == 'R')
1420 							reverseComplement(gInfR);
1421 						for (unsigned i = 0; i < length(gInfR); ++i)
1422 							if ((options.compMask[ordValue(readInfR[i])] & options.compMask[ordValue(gInfR[i])]) == 0)
1423 								appendValue(mutStrR, Pair<Dna5,int>(readInfR[i],i+offsetR+1));
1424 					}
1425 				}
1426 				else
1427 				{
1428 					typedef typename Row<TAlign>::Type TRow;
1429 					typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
1430 
1431 					//left business
1432 					TAlignIterator aliL_it0 = begin(row(alignL,0));
1433 					TAlignIterator aliL_it1 = begin(row(alignL,1));
1434 					TAlignIterator aliL_it0stop = end(row(alignL,0));
1435 					TAlignIterator aliL_it1stop = end(row(alignL,1));
1436 					getCigarLine(alignL,cigarL,mutStrL,offsetL,aliL_it0,aliL_it0stop,aliL_it1,aliL_it1stop);
1437 
1438 					//right business
1439 					TAlignIterator aliR_it0 = begin(row(alignR,0));
1440 					TAlignIterator aliR_it1 = begin(row(alignR,1));
1441 					TAlignIterator aliR_it0stop = end(row(alignR,0));
1442 					TAlignIterator aliR_it1stop = end(row(alignR,1));
1443 					getCigarLine(alignR,cigarR,mutStrR,offsetR,aliR_it0,aliR_it0stop,aliR_it1,aliR_it1stop);
1444 				}
1445 				//now plug together the parts: cigar
1446 				if(mL.orientation=='F')
1447 				{
1448 					append(cigarStr,cigarL);
1449 					append(cigarStr,cigarMid);
1450 					append(cigarStr,cigarR);
1451 					append(mutationsStr,mutStrL);
1452 					append(mutationsStr,mutStrMid);
1453 					append(mutationsStr,mutStrR);
1454 				}
1455 				else
1456 				{
1457 					append(cigarStr,cigarR);
1458 					append(cigarStr,cigarMid);
1459 					append(cigarStr,cigarL);
1460 					append(mutationsStr,mutStrR);
1461 					append(mutationsStr,mutStrMid);
1462 					append(mutationsStr,mutStrL);
1463 				}
1464 				bool first = true;
1465 				file << ";cigar=";// << cigarStr.str();
1466 				for (unsigned i = 0; i < length(cigarStr); ++i)
1467 					file << cigarStr[i].i2 << cigarStr[i].i1;
1468 
1469 				if(length(mutationsStr)>0)file << ";mutations=";
1470 				for (unsigned i = 0; i < length(mutationsStr); ++i)
1471 				{
1472 					if(first)
1473 					{
1474 						first = false;
1475 						file << mutationsStr[i].i2 << mutationsStr[i].i1;
1476 					}
1477 					else
1478 						file << "," << mutationsStr[i].i2 << mutationsStr[i].i1;
1479 				}
1480 
1481 				if(options.fastaIdQual)
1482 				{
1483 					file << ";quality=";
1484 					for(unsigned j=0;j<length(reads[currReadNo]);++j)
1485 					{
1486 						file << (char)(getQualityValue(reads[currReadNo][j])+ 33);
1487 					}
1488 				}
1489 				file << ::std::endl;
1490 				++it;
1491 			}
1492 			break;
1493 
1494 
1495 	}
1496 
1497 	file.close();
1498 
1499 	// get empirical error distribution
1500 	if (!empty(errorPrbFileName) && maxReadLength > 0)
1501 	{
1502 		file.open(toCString(errorPrbFileName), ::std::ios_base::out | ::std::ios_base::trunc);
1503 		if (file.is_open())
1504 		{
1505 			String<long double> posError;
1506 			unsigned unique = 0;
1507 			unsigned insertions = 0;
1508 			unsigned deletions = 0;
1509 			resize(posError, maxReadLength, 0);
1510 
1511 			if (options.hammingOnly)
1512 				unique = getErrorDistribution(posError, matches, reads, genomes, options);
1513 			else
1514 			{
1515 				unique = getErrorDistribution(posError, insertions, deletions, matches, reads, genomes, options);
1516 				::std::cerr << "insertProb: " << (double)insertions / ((double)length(posError) * (double)unique) << ::std::endl;
1517 				::std::cerr << "deleteProb: " << (double)deletions / ((double)length(posError) * (double)unique) << ::std::endl;
1518 			}
1519 
1520 			file << (double)posError[0] / (double)unique;
1521 			for (unsigned i = 1; i < length(posError); ++i)
1522 				file << '\t' << (double)posError[i] / (double)unique;
1523 			file << ::std::endl;
1524 			file.close();
1525 		} else
1526 			::std::cerr << "Failed to open error distribution file" << ::std::endl;
1527 	}
1528 
1529 	options.timeDumpResults = SEQAN_PROTIMEDIFF(dump_time);
1530 
1531 	if (options._debugLevel >= 1)
1532 		::std::cerr << "Dumping results took             \t" << options.timeDumpResults << " seconds" << ::std::endl;
1533 }
1534 
1535 
1536 }
1537 
1538 #endif
1539 
1540