1  /*==========================================================================
2              RazerS - Fast Read Mapping with Controlled Loss Rate
3                    http://www.seqan.de/projects/razers.html
4 
5  ============================================================================
6   Copyright (C) 2008 by David Weese
7 
8   This program is free software; you can redistribute it and/or
9   modify it under the terms of the GNU Lesser General Public
10   License as published by the Free Software Foundation; either
11   version 3 of the License, or (at your options) any later version.
12 
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17 
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  ==========================================================================*/
21 
22 #ifndef SEQAN_HEADER_OUTPUT_FORMAT_H
23 #define SEQAN_HEADER_OUTPUT_FORMAT_H
24 
25 #include <iostream>
26 #include <fstream>
27 #include <sstream>
28 
29 #include "razers.h"
30 #include <seqan/align.h>
31 
32 namespace seqan
33 {
34 
35 //////////////////////////////////////////////////////////////////////////////
36 // Quality-based score
37 
38 	template <typename TQualityString = CharString>
39 	struct Quality;
40 
41 	template <typename TValue, typename TQualityString>
42 	class Score<TValue, Quality<TQualityString> >
43 	{
44 	public:
45 		TValue data_match;
46 		TValue data_mismatch;
47 		TValue data_gap_extend;
48 		TValue data_gap_open;
49 
50 		TQualityString const *data_qual;
51 
52 	public:
Score()53 		Score():
54 			data_match(0),
55 			data_mismatch(-1),
56 			data_gap_extend(-1),
57 			data_gap_open(-1),
58 			data_qual(NULL)
59 		{
60 		}
Score(TValue _match,TValue _mismatch,TValue _gap)61 		Score(TValue _match, TValue _mismatch, TValue _gap):
62 			data_match(_match),
63 			data_mismatch(_mismatch),
64 			data_gap_extend(_gap),
65 			data_gap_open(_gap),
66 			data_qual(NULL)
67 		{
68 		}
Score(TValue _match,TValue _mismatch,TValue _gap_extend,TValue _gap_open,TQualityString const & _qual)69 		Score(TValue _match, TValue _mismatch, TValue _gap_extend, TValue _gap_open, TQualityString const &_qual):
70 			data_match(_match),
71 			data_mismatch(_mismatch),
72 			data_gap_extend(_gap_extend),
73 			data_gap_open(_gap_open),
74 			data_qual(&_qual)
75 		{
76 		}
77 
Score(Score const & other)78 		Score(Score const & other):
79 			data_match(other.data_match),
80 			data_mismatch(other.data_mismatch),
81 			data_gap_extend(other.data_gap_extend),
82 			data_gap_open(other.data_gap_open),
83 			data_qual(other.data_qual)
84 		{
85 		}
~Score()86 		~Score()
87 		{
88 		}
89 
90 		Score & operator = (Score const & other)
91 		{
92 			data_match = other.data_match;
93 			data_mismatch = other.data_mismatch;
94 			data_gap_extend = other.data_gap_extend;
95 			data_gap_open = other.data_gap_open;
96 			data_qual = other.data_qual;
97 			return *this;
98 		}
99 	};
100 
101 //////////////////////////////////////////////////////////////////////////////
102 
103 template <typename TValue, typename TQualityString, typename TPos1, typename TPos2, typename TSeq1, typename TSeq2>
104 inline TValue
score(Score<TValue,Quality<TQualityString>> const & me,TPos1 pos1,TPos2 pos2,TSeq1 const & seq1,TSeq2 const & seq2)105 score(Score<TValue, Quality<TQualityString> > const & me,
106 	  TPos1 pos1,
107 	  TPos2 pos2,
108 	  TSeq1 const &seq1,
109 	  TSeq2 const &seq2)
110 {
111 	if (seq1[pos1] != seq2[pos2])
112 		if (me.data_qual)
113 			return (*me.data_qual)[pos2];
114 		else
115 			return scoreMismatch(me);
116 	else
117 		return scoreMatch(me);
118 }
119 
120 
121 //////////////////////////////////////////////////////////////////////////////
122 // Less-operators ...
123 
124 	// ... to sort matches and remove duplicates with equal gBegin
125 	template <typename TReadMatch>
126 	struct LessGPosRNo : public ::std::binary_function < TReadMatch, TReadMatch, bool >
127 	{
operatorLessGPosRNo128 		inline bool operator() (TReadMatch const &a, TReadMatch const &b) const
129 		{
130 			// genome position and orientation
131 			if (a.gseqNo < b.gseqNo) return true;
132 			if (a.gseqNo > b.gseqNo) return false;
133 			if (a.gBegin < b.gBegin) return true;
134 			if (a.gBegin > b.gBegin) return false;
135 			if (a.orientation < b.orientation) return true;
136 			if (a.orientation > b.orientation) return false;
137 
138 			// read number
139 			if (a.rseqNo < b.rseqNo) return true;
140 			if (a.rseqNo > b.rseqNo) return false;
141 
142 			// pair match id
143 			if (a.pairId < b.pairId) return true;
144 			if (a.pairId > b.pairId) return false;
145 
146 			// quality
147 			return a.editDist < b.editDist;
148 		}
149 	};
150 
151 //////////////////////////////////////////////////////////////////////////////
152 // Determine error distribution
153 template <typename TErrDistr, typename TMatches, typename TReads, typename TGenomes, typename TOptions>
154 inline unsigned
getErrorDistribution(TErrDistr & posError,TMatches & matches,TReads & reads,TGenomes & genomes,TOptions & options)155 getErrorDistribution(
156 	TErrDistr &posError,
157 	TMatches &matches,
158 	TReads &reads,
159 	TGenomes &genomes,
160 	TOptions &options)
161 {
162 	typename Iterator<TMatches, Standard>::Type	it = begin(matches, Standard());
163 	typename Iterator<TMatches, Standard>::Type	itEnd = end(matches, Standard());
164 
165 	Dna5String genome;
166 	unsigned unique = 0;
167 	for (; it != itEnd; ++it)
168 	{
169 		if ((*it).orientation == '-') continue;
170 
171 		Dna5String const &read = reads[(*it).rseqNo];
172 		genome = infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd);
173 		if ((*it).orientation == 'R')
174 			reverseComplement(genome);
175 		for (unsigned i = 0; i < length(posError) && i < length(read); ++i)
176 			if ((options.compMask[ordValue(genome[i])] & options.compMask[ordValue(read[i])]) == 0)
177 				++posError[i];
178 		++unique;
179 	}
180 	return unique;
181 }
182 
183 template <typename TErrDistr, typename TCount1, typename TCount2, typename TMatches, typename TReads, typename TGenomes, typename TSpec>
184 inline unsigned
getErrorDistribution(TErrDistr & posError,TCount1 & insertions,TCount2 & deletions,TMatches & matches,TReads & reads,TGenomes & genomes,RazerSOptions<TSpec> & options)185 getErrorDistribution(
186 	TErrDistr &posError,
187 	TCount1 &insertions,
188 	TCount2 &deletions,
189 	TMatches &matches,
190 	TReads &reads,
191 	TGenomes &genomes,
192 	RazerSOptions<TSpec> &options)
193 {
194 	typedef Align<String<Dna5>, ArrayGaps> TAlign;
195 	typedef typename Row<TAlign>::Type TRow;
196 	typedef typename Iterator<TRow>::Type TIter;
197 
198 	//typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
199 	typedef typename Position<TAlign>::Type TPosition;
200 
201 	typename Iterator<TMatches, Standard>::Type	it = begin(matches, Standard());
202 	typename Iterator<TMatches, Standard>::Type	itEnd = end(matches, Standard());
203 
204 	Align<Dna5String, ArrayGaps> align;
205 	Score<int> scoreType = Score<int>(0, -999, -1001, -1000);	// levenshtein-score (match, mismatch, gapOpen, gapExtend)
206 	if (options.hammingOnly)
207 		scoreType.data_mismatch = -1;
208 	resize(rows(align), 2);
209 
210 	unsigned unique = 0;
211 	for (; it != itEnd; ++it)
212 	{
213 		if ((*it).orientation == '-') continue;
214 
215 		assignSource(row(align, 0), reads[(*it).rseqNo]);
216 		assignSource(row(align, 1), infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd));
217 		if ((*it).orientation == 'R')
218 			reverseComplement(source(row(align, 1)));
219 		globalAlignment(align, scoreType);
220 
221 		TRow& row0 = row(align, 0);
222 		TRow& row1 = row(align, 1);
223 
224 		TPosition begin = beginPosition(cols(align));
225 		TPosition end = endPosition(cols(align));
226 
227 		TIter it0 = iter(row0, begin);
228 		TIter it1 = iter(row1, begin);
229 		TIter end0 = iter(row0, end);
230 
231 		unsigned pos = 0;
232 		for (; it0 != end0 && pos < length(posError); ++it0, ++it1)
233 		{
234 			if (isGap(it0))
235 				++insertions;
236 			else
237 			{
238 				if (isGap(it1))
239 					++deletions;
240 				else
241 					if ((options.compMask[ordValue(getValue(it0))] & options.compMask[ordValue(getValue(it1))]) == 0)
242 						++posError[pos];
243 				++pos;
244 			}
245 		}
246 		++unique;
247 	}
248 	return unique;
249 }
250 
251 
252 //////////////////////////////////////////////////////////////////////////////
253 template <typename TFile, typename TSource, typename TSpec, typename TPosition>
254 inline void
dumpAlignment(TFile & target,Align<TSource,TSpec> const & source,TPosition begin_,TPosition end_)255 dumpAlignment(TFile & target, Align<TSource, TSpec> const & source, TPosition begin_, TPosition end_)
256 {
257 	typedef Align<TSource, TSpec> const TAlign;
258 	typedef typename Row<TAlign>::Type TRow;
259 	typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
260 
261 	TRowsPosition row_count = length(rows(source));
262 
263 	// Print sequences
264 	for (TRowsPosition i = 0; i < row_count; ++i)
265 	{
266 		if (i == 0)
267 			target << "#Read:   ";
268 		else
269 			target << "#Genome: ";
270 		TRow& row_ = row(source, i);
271 		typedef typename Iterator<typename Row<TAlign>::Type const>::Type TIter;
272 		TIter begin1_ = iter(row_, begin_);
273 		TIter end1_ = iter(row_, end_);
274 		for (; begin1_ != end1_; ++begin1_) {
275 			if (isGap(begin1_))
276 			    target << gapValue<char>();
277 			else
278 			    target << *begin1_;
279 		}
280 		target << '\n';
281 	}
282 }
283 
284 template<typename TMatches, typename TCounts, typename TOptions>
285 void
countCoocurrences(TMatches & matches,TCounts & cooc,TOptions & options)286 countCoocurrences(TMatches & matches, TCounts & cooc, TOptions & options)
287 {
288 	clear(cooc);
289 	int maxSeedErrors = (int)(options.errorRate * options.artSeedLength) + 1;
290 	resize(cooc,maxSeedErrors+1,0);
291 	for (int i = 0; i < maxSeedErrors+1; ++i)
292 		cooc[i] = 1;
293 
294 	int count = 0;
295 	unsigned readNo = -1;
296 	int preEditDist = -1;
297 	typename Iterator<TMatches>::Type it = begin(matches,Standard());
298 	typename Iterator<TMatches>::Type itEnd = end(matches,Standard());
299 
300 	for(; it != itEnd; ++it)
301 	{
302 		if ((*it).rseqNo == readNo)
303 		{
304 			if(preEditDist > 1) continue;// || dist > options.errorRate * maxReadLength + 1) continue;
305 			int dist = (*it).seedEditDist - preEditDist;
306 			if(dist > maxSeedErrors) continue;
307 			if(dist < 0) ++cooc[0];
308 			else ++cooc[dist];
309 		}
310 		else
311 		{
312 			readNo = (*it).rseqNo;
313 			preEditDist = (*it).seedEditDist;
314 			if(preEditDist <= 1) ++count;
315 		}
316 	}
317 	for (unsigned i = 0; i < length(cooc); ++i)
318 	{
319 		cooc[i] = (int)(-4.343 * log((double)cooc[i]/count) );
320 		if (cooc[i] < 0) cooc[i] = 0;
321 	}
322 	if(options._debugLevel > 1)
323 	{
324 		::std::cerr << "[mapping_count] ";
325 		for(unsigned j = 0; j < length(cooc); ++j)
326 			::std::cerr << cooc[j] << " ";
327 		::std::cerr << ::std::endl;
328 	}
329 
330 }
331 
332 
333 template<typename TAlign, typename TString, typename TIter>
334 void
getCigarLine(TAlign & align,TString & cigar,TString & mutations,TIter ali_it0,TIter ali_it0_stop,TIter ali_it1,TIter ali_it1_stop)335 getCigarLine(TAlign & align, TString & cigar, TString & mutations, TIter ali_it0, TIter ali_it0_stop, TIter ali_it1, TIter ali_it1_stop)
336 {
337 
338 	typedef typename Source<TAlign>::Type TSource;
339 	typedef typename Iterator<TSource, Rooted>::Type TStringIterator;
340 
341 // 	typedef typename Row<TAlign>::Type TRow;
342 // 	typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
343 
344 	TStringIterator readBase = begin(source(row(align,0)));
345 
346 	int readPos = 0;
347 	bool first = true;
348 	int inserted = 0;
349 	int deleted = 0;
350 	while(ali_it0 != ali_it0_stop && ali_it1 != ali_it1_stop)
351 	{
352 		inserted = 0;
353 		deleted = 0;
354 		int matched = 0;
355 		while(ali_it0!=ali_it0_stop && ali_it1!=ali_it1_stop && !isGap(ali_it0)&& !isGap(ali_it1))
356 		{
357 			++readPos;
358 			if(*ali_it1 != *ali_it0)
359 			{
360 				if(first) first = false;
361 				else mutations << ",";
362 				mutations << readPos <<*readBase;
363 			}
364 			++readBase;
365 			++ali_it0;
366 			++ali_it1;
367 			++matched;
368 		}
369 		if(matched>0) cigar << matched<< "M" ;
370 		while(ali_it0!=ali_it0_stop && isGap(ali_it0))
371 		{
372 			++ali_it0;
373 			++ali_it1;
374 			++deleted;
375 		}
376 		if(deleted>0) cigar << deleted << "D";
377 		while(isGap(ali_it1)&& ali_it1!=ali_it1_stop)
378 		{
379 			++ali_it0;
380 			++ali_it1;
381 			++readPos;
382 			if(first) first = false;
383 			else mutations << ",";
384 			mutations << readPos << *readBase;
385 			++readBase;
386 			++inserted;
387 		}
388 		if(inserted>0) cigar << inserted << "I";
389 	}
390 	// end gaps can happen in split mapping
391 	while(ali_it0!=ali_it0_stop)
392 	{
393 		++ali_it0;
394 		++deleted;
395 	}
396 	if(deleted>0) cigar << deleted << "D";
397 	while(ali_it1 != ali_it1_stop)
398 	{
399 		++ali_it1;
400 		++readPos;
401 		if(first) first = false;
402 		else mutations << ",";
403 		mutations << readPos << *readBase;
404 		++inserted;
405 	}
406 	if(inserted>0) cigar << inserted << "I";
407 
408 }
409 
410 
411 #ifdef RAZERS_DIRECT_MAQ_MAPPING
412 //////////////////////////////////////////////////////////////////////////////
413 // Assign mapping quality and remove suboptimal matches
414 template < typename TMatches, typename TReads, typename TCooc, typename TCounts, typename TSpec >
assignMappingQuality(TMatches & matches,TReads & reads,TCooc & cooc,TCounts & cnts,RazerSOptions<TSpec> & options)415 void assignMappingQuality(TMatches &matches, TReads & reads, TCooc & cooc, TCounts &cnts, RazerSOptions<TSpec> & options)
416 {
417 	typedef typename Value<TMatches>::Type				TMatch;
418 	typedef typename Iterator<TMatches, Standard>::Type		TIterator;
419 
420 	//matches are already sorted
421 	//::std::sort(
422 	//	begin(matches, Standard()),
423 	//	end(matches, Standard()),
424 	//	LessRNoMQ<TMatch>());
425 
426 
427 	int maxSeedErrors = (int)(options.errorRate*options.artSeedLength)+1;
428 	unsigned readNo = -1;
429 
430 	TIterator it = begin(matches, Standard());
431 	TIterator itEnd = end(matches, Standard());
432 	TIterator dit = it;
433 
434 	int bestQualSum, secondBestQualSum;
435 	int secondBestDist = -1 ,secondBestMatches = -1;
436 	for (; it != itEnd; ++it)
437 	{
438 		if ((*it).orientation == '-') continue;
439 		bool mappingQualityFound = false;
440 		int mappingQuality = 0;
441 		int qualTerm1,qualTerm2;
442 
443 		readNo = (*it).rseqNo;
444 		bestQualSum = (*it).mScore;
445 
446 		if(++it!=itEnd && (*it).rseqNo==readNo)
447 		{
448 			secondBestQualSum = (*it).mScore;
449 			secondBestDist = (*it).editDist;
450 			secondBestDist = (*it).editDist;
451 			secondBestMatches = cnts[1][readNo] >> 5;
452 //CHECKcnts		secondBestMatches = cnts[secondBestDist][readNo];
453 //			secondBestMatches = cnts[secondBestDist][readNo];
454 			(*it).orientation = '-';
455 		//	if(secondBestDist<=bestDist) unique=0;
456 		}
457 		else secondBestQualSum = -1000;
458 		--it; //it is now at best match of current rseqNo
459 
460 		int bestDist = (*it).editDist;
461 		int kPrime = (*it).seedEditDist;
462 		if((bestQualSum==secondBestQualSum) || (kPrime>maxSeedErrors))
463 			mappingQualityFound = true;   //mq=0
464 		else{
465 			if(secondBestQualSum == -1000) qualTerm1 = 99;
466 			else
467 			{
468 				qualTerm1 = (int)(secondBestQualSum - bestQualSum - 4.343 * log((double)secondBestMatches));
469 				//if (secondBestKPrime - kPrime <= 1 && qualTerm1 > options.mutationRateQual) qualTerm1 = options.mutationRateQual; //TODO abchecken was mehr sinn macht
470 				if (secondBestDist - bestDist <= 1 && qualTerm1 > options.mutationRateQual) qualTerm1 = options.mutationRateQual;
471 			}
472 			float avgSeedQual = 0.0;
473 			if(!mappingQualityFound)
474 			{
475 				//TODO!!! generalize and adapt to razers lossrates
476 				// lossrate 0.42 => -10 log10(0.42) = 4
477 				int kPrimeLoss = 4; // options.kPrimeLoss; // bezieht sich auf 3 fehler in 28bp
478 				qualTerm2 = kPrimeLoss + cooc[maxSeedErrors-kPrime];
479 
480 				for(unsigned j = 0; j<options.artSeedLength; ++j)
481 				{
482 					int q = getQualityValue(reads[readNo][j]);//(int)((unsigned char)(reads[readNo][j])>>3);
483 					if(q>options.mutationRateQual) q = options.mutationRateQual;
484 					avgSeedQual+=q;
485 				}
486 				avgSeedQual/=options.artSeedLength;
487 				//-10 log10(28-2) = 14;
488 				//generalize to -10 log10(artSeedLength - maxSeedErrors +1 ) // 14 fits for seedlength 28 to 32 with 2 errors
489 				if(avgSeedQual>14) qualTerm2 += (int)((maxSeedErrors-kPrime)*(avgSeedQual-14));
490 			}
491 		}
492 		if (!mappingQualityFound) mappingQuality = (qualTerm1<qualTerm2) ? qualTerm1:qualTerm2;
493 		if (mappingQuality < 0) mappingQuality = 0;
494 		(*it).mScore = mappingQuality;
495 
496 		*dit = *it;
497 	//	if(secondBestQualSum != -1000) ++it;
498 		++dit;
499 	}
500 	resize(matches, dit - begin(matches, Standard()));
501 }
502 #endif
503 
504 
505 //////////////////////////////////////////////////////////////////////////////
506 // Output matches
507 template <
508 	typename TMatches,
509 	typename TGenomeNames,
510 	typename TReads,
511 	typename TReadNames,
512 	typename TCounts,
513 	typename TSpec
514 >
dumpMatches(TMatches & matches,TGenomeNames const & genomeIDs,StringSet<CharString> & genomeFileNameList,::std::map<unsigned,::std::pair<::std::string,unsigned>> & gnoToFileMap,TReads const & reads,TCounts & stats,TReadNames const & readIDs,CharString readFName,CharString errorPrbFileName,RazerSOptions<TSpec> & options)515 void dumpMatches(
516 	TMatches &matches,							// forward/reverse matches
517 	TGenomeNames const &genomeIDs,				// Genome names (read from Fasta file, currently unused)
518 	StringSet<CharString> &genomeFileNameList,	// list of genome names (e.g. {"hs_ref_chr1.fa","hs_ref_chr2.fa"})
519 	::std::map<unsigned,::std::pair< ::std::string,unsigned> > &gnoToFileMap, //map to retrieve genome filename and sequence number within that file
520 	TReads const &reads,
521 	TCounts & stats,							// Match statistics (possibly empty)
522 	TReadNames const &readIDs,					// Read names (read from Fasta file, currently unused)
523 	CharString readFName,                       // read name (e.g. "reads.fa"), used for file/read naming
524 	CharString errorPrbFileName,
525 	RazerSOptions<TSpec> &options)
526 {
527 	typedef typename Value<TMatches>::Type		TMatch;
528 	//typedef typename Value<TReads>::Type		TRead;
529 	//typedef typename Value<TGenomeSet>::Type	TGenome;
530 	//typedef typename TMatch::TGPos				TGPos;
531 
532 	if (options.outputFormat == 2)
533 	{
534 		options.sortOrder = 0;		// ... to count multiple matches
535 	}
536 
537 	if (options.outputFormat == 2)
538 	{
539 		options.maxHits = 1;		// Eland outputs at most one match
540 		options.sortOrder = 0;		// read numbers are increasing
541 		options.positionFormat = 1;	// bases in file are numbered starting at 1
542 		options.dumpAlignment = options.hammingOnly;
543 	}
544 #ifdef RAZERS_DIRECT_MAQ_MAPPING
545 	if (options.maqMapping) options.outputFormat = 3;
546 	int maxSeedErrors = (int)(options.errorRate * options.artSeedLength); //without + 1 here, used to check whether match should be supported if noBelowIdentity option is switched on
547 #endif
548 	if (options.outputFormat == 3)
549 	{
550 		options.sortOrder = 1;		//  sort according to gPos
551 		options.positionFormat = 1;	// bases in file are numbered starting at 1
552 		options.dumpAlignment = false;
553 	}
554 
555 #ifdef RAZERS_SPLICED
556 	if(options.minMatchLen > 0)
557 		options.sortOrder = 1;
558 #endif
559 
560 	// error profile
561 	unsigned maxReadLength = 0;
562 	for (unsigned i = 0; i < length(reads); ++i)
563 		if (maxReadLength < length(reads[i]))
564 			maxReadLength = length(reads[i]);
565 
566 	SEQAN_PROTIMESTART(dump_time);
567 
568 	// load Genome sequences for alignment dumps
569 	TGenomeSet genomes;
570 	if ((options.outputFormat == 0 && !options.hammingOnly) || options.dumpAlignment || !empty(errorPrbFileName))
571 		if (!loadGenomes(genomes, genomeFileNameList)) {
572 			::std::cerr << "Failed to load genomes" << ::std::endl;
573 			options.dumpAlignment = false;
574 		}
575 
576 	// how many 0's should be padded?
577 	int pzeros = 0;
578 	for (unsigned l = length(reads); l > 9; l = l / 10)
579 		++pzeros;
580 
581 	int gzeros = 0;
582 	for (unsigned l = length(genomes); l > 9; l = l / 10)
583 		++gzeros;
584 
585 	// remove the directory prefix of readFName
586 	::std::string _readName;
587 	assign(_readName, readFName);
588 	size_t lastPos = _readName.find_last_of('/') + 1;
589 	if (lastPos == _readName.npos) lastPos = _readName.find_last_of('\\') + 1;
590 	if (lastPos == _readName.npos) lastPos = 0;
591 	CharString readName = _readName.substr(lastPos);
592 
593 
594 	typedef Align<String<Dna5>, ArrayGaps> TAlign;
595 	TAlign align;
596 	Score<int> scoreType = Score<int>(0, -999, -1001, -1000);	// levenshtein-score (match, mismatch, gapOpen, gapExtend)
597 //	Score<int> scoreType(0,-1,-1,-1);
598 
599 	if (options.hammingOnly)
600 		scoreType.data_mismatch = -1;
601 	resize(rows(align), 2);
602 
603     VirtualStream<char, Output> file;
604     bool success;
605     if (!isEqual(options.output, "-"))
606         success = open(file, toCString(options.output));
607     else
608         success = open(file, std::cout, Nothing());
609 
610     if (!success)
611     {
612         ::std::cerr << "Failed to open output file" << ::std::endl;
613         return;
614     }
615 
616 #ifdef RAZERS_SPLICED
617 	//maskPairDuplicates(matches);
618 #else
619 	maskDuplicates(matches, options);
620 #endif
621 	if (options.outputFormat > 0
622 #ifdef RAZERS_DIRECT_MAQ_MAPPING
623 	 && !options.maqMapping
624 #endif
625 	)
626 	{
627 		// match statistics
628 		unsigned maxErrors = (int)(options.errorRate * maxReadLength);
629 		//if (maxErrors > 10) maxErrors = 10;
630         if (maxErrors < 2 && options.outputFormat == 2)
631             maxErrors = 2;
632 		resize(stats, maxErrors + 1);
633 		for (unsigned i = 0; i <= maxErrors; ++i)
634 			resize(stats[i], length(reads), 0);
635 #ifdef RAZERS_SPLICED
636 		if(options.minMatchLen > 0) countSplitMatches(matches, stats);
637 		else
638 #endif
639 		countMatches(matches, stats);
640 	}
641 
642 
643 
644 	Nothing nothing;
645 	unsigned currSeqNo = 0;
646 #ifdef RAZERS_DIRECT_MAQ_MAPPING
647 	if(options.maqMapping)
648 	{
649 		String<int> cooc;
650 		compactMatches(matches, stats, options, true, nothing, false); //only best two matches per read are kept
651 		countCoocurrences(matches,cooc,options);	//coocurrence statistics are filled
652 		assignMappingQuality(matches,reads,cooc,stats,options);//mapping qualities are assigned and only one match per read is kept
653 	}
654 	else
655 #endif
656 
657 #ifdef RAZERS_MICRO_RNA
658 	if(options.microRNA)purgeAmbiguousRnaMatches(matches,options);
659 	else
660 #endif
661 	{
662 #ifdef RAZERS_SPLICED
663 		if(options.minMatchLen>0)
664 			compactSplicedMatches(matches, stats, options, true, nothing,nothing);
665 		else
666 #endif
667 			compactMatches(matches, stats, options, true, nothing);
668 	}
669 
670 	switch (options.sortOrder) {
671 		case 0:
672 			::std::sort(
673 				begin(matches, Standard()),
674 				end(matches, Standard()),
675 				LessRNoGPos<TMatch>());
676 			break;
677 
678 		case 1:
679 			::std::sort(
680 				begin(matches, Standard()),
681 				end(matches, Standard()),
682 				LessGPosRNo<TMatch>());
683 			break;
684 
685 	}
686 
687 	typename Iterator<TMatches, Standard>::Type	it = begin(matches, Standard());
688 	typename Iterator<TMatches, Standard>::Type	itEnd = end(matches, Standard());
689 
690 
691 	Dna5String gInf;
692 	char _sep_ = '\t';
693 	unsigned viewPosReadFirst = 0;
694 	unsigned viewPosReadLast  = 0;
695 
696 	switch (options.outputFormat)
697 	{
698 		case 0:	// Razer Format
699 //			_sep_ = ',';
700 			for(; it != itEnd; ++it)
701 			{
702 				unsigned	readLen = length(reads[(*it).rseqNo]);
703 				double		percId = 100.0 * (1.0 - (double)(*it).editDist / (double)readLen);
704 #ifdef RAZERS_MICRO_RNA
705 				percId = 100.0 * (1.0 - (double)(*it).editDist / (double) ((*it).mScore));
706 #endif
707 				switch (options.readNaming)
708 				{
709 					// 0..filename is the read's Fasta id
710 					case 0:
711 						file << readIDs[(*it).rseqNo];
712 						break;
713 
714 					// 1..filename is the read filename + seqNo
715 					case 1:
716 						file.fill('0');
717 						file << readName << '#' << ::std::setw(pzeros) << (*it).rseqNo + 1;
718 						break;
719 
720 					// 2..filename is the read sequence itself
721 					case 2:
722 						file << reads[(*it).rseqNo];
723 				}
724 
725 				file << _sep_ << options.positionFormat << _sep_ << readLen << _sep_ << (*it).orientation << _sep_;
726 
727 				switch (options.genomeNaming)
728 				{
729 					// 0..filename is the read's Fasta id
730 					case 0:
731 						file << genomeIDs[(*it).gseqNo];
732 						break;
733 
734 					// 1..filename is the read filename + seqNo
735 					case 1:
736 						file.fill('0');
737 						file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1;
738 				}
739 
740 				// get alignment to dump or to fix end coordinates
741 				if (options.dumpAlignment || !options.hammingOnly)
742 				{
743 #ifdef RAZERS_MICRO_RNA
744 					if(options.microRNA)
745 						assignSource(row(align, 0), prefix(reads[(*it).rseqNo],(*it).mScore));
746 					else
747 #endif
748 					assignSource(row(align, 0), reads[(*it).rseqNo]);
749 					assignSource(row(align, 1), infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd));
750 #ifdef RAZERS_MATEPAIRS
751 					if ((*it).pairId != 0 && ((*it).rseqNo & 1))
752 						reverseComplement(source(row(align, 0)));
753 #endif
754 					if ((*it).orientation == 'R')
755 						reverseComplement(source(row(align, 1)));
756 
757 					globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
758 
759 					// transform first and last read character to genomic positions
760 					viewPosReadFirst = toViewPosition(row(align, 0), 0);
761 					viewPosReadLast  = toViewPosition(row(align, 0), readLen - 1);
762 					unsigned genomePosReadFirst = toSourcePosition(row(align, 1), viewPosReadFirst);
763 					unsigned genomePosReadLast  = toSourcePosition(row(align, 1), viewPosReadLast);
764 
765 					if ((*it).orientation == 'R')
766 					{
767 						(*it).gBegin = (*it).gEnd - (genomePosReadLast + 1);
768 						(*it).gEnd -= genomePosReadFirst;
769 					}
770 					else
771 					{
772 						(*it).gEnd = (*it).gBegin + (genomePosReadLast + 1);
773 						(*it).gBegin += genomePosReadFirst;
774 					}
775 				}
776 
777 				file << _sep_ << ((*it).gBegin + options.positionFormat) << _sep_ << (*it).gEnd << _sep_ << ::std::setprecision(5) << percId;
778 #ifdef RAZERS_MICRO_RNA
779 				if(options.microRNA) file << _sep_ << (*it).mScore;
780 #endif
781 
782 #ifdef RAZERS_MATEPAIRS
783 				if ((*it).pairId != 0)
784 					file << _sep_ << (*it).pairId << _sep_ << (*it).pairScore << _sep_ << (*it).mateDelta;
785 #endif
786 				file << ::std::endl;
787 
788 				if (options.dumpAlignment)
789 					dumpAlignment(file, align, viewPosReadFirst, viewPosReadLast + 1);
790 			}
791 			break;
792 
793 
794 		case 1:	// Enhanced Fasta Format
795 			_sep_ = ',';
796 			for(unsigned matchReadNo = -1, matchReadCount = 0; it != itEnd; ++it)
797 			{
798 				unsigned	readLen = length(reads[(*it).rseqNo]);
799 				double		percId = 100.0 * (1.0 - (double)(*it).editDist / (double)readLen);
800 
801 				if (matchReadNo != (*it).rseqNo)
802 				{
803 					matchReadNo = (*it).rseqNo;
804 					matchReadCount = 0;
805 				} else
806 					++matchReadCount;
807 
808 				::std::string fastaID;
809 				assign(fastaID, readIDs[(*it).rseqNo]);
810 
811 				::std::string id = fastaID;
812 				int fragId = (*it).rseqNo;
813 				bool appendMatchId = options.maxHits > 1;
814 
815 				size_t left = fastaID.find_first_of('[');
816 				size_t right = fastaID.find_last_of(']');
817 				if (left != fastaID.npos && right != fastaID.npos && left < right)
818 				{
819 					fastaID.erase(right);
820 					fastaID.erase(0, left + 1);
821 					replace(fastaID.begin(), fastaID.end(), ',', ' ');
822 					size_t pos = fastaID.find("id=");
823 					if (pos != fastaID.npos) {
824 						::std::istringstream iss(fastaID.substr(pos + 3));
825 						iss >> id;
826 //						appendMatchId = false;
827 					}
828 					pos = fastaID.find("fragId=");
829 					if (pos != fastaID.npos) {
830 						::std::istringstream iss(fastaID.substr(pos + 7));
831 						iss >> fragId;
832 					}
833 				}
834 
835 				if ((*it).orientation == 'F')
836 					// forward strand
837 					file << '>' << ((*it).gBegin + options.positionFormat) << _sep_ << (*it).gEnd;
838 				else
839 					// reverse strand (switch begin and end)
840 					file << '>' << (*it).gEnd << _sep_ << ((*it).gBegin + options.positionFormat);
841 
842 				unsigned ambig = 0;
843 				for (unsigned i = 0; i <= (*it).editDist && i < length(stats); ++i)
844 					ambig += stats[i][(*it).rseqNo];
845 
846 				file << "[id=" << id;
847 				if (appendMatchId) file << "_" << matchReadCount;
848 				file << ",fragId=" << fragId;
849 				file << ",contigId=" << genomeIDs[(*it).gseqNo];
850 				file << ",errors=" << (*it).editDist << ",percId=" << ::std::setprecision(5) << percId;
851 				file << ",ambiguity=" << ambig << ']' << ::std::endl;
852 
853 				file << reads[(*it).rseqNo] << ::std::endl;
854 			}
855 			break;
856 
857 
858 		case 2:	// Eland Format
859 			_sep_ = '\t';
860 			for(unsigned readNo = 0; readNo < length(reads); ++readNo)
861 			{
862 				switch (options.readNaming)
863 				{
864 					// 0..filename is the read's Fasta id
865 					case 0:
866 						file << '>' << readIDs[readNo] << _sep_;
867 						break;
868 
869 					// 1..filename is the read filename + seqNo
870 					case 1:
871 						file.fill('0');
872 						file << readName << '#' << ::std::setw(pzeros) << readNo + 1  << _sep_;
873 						break;
874 				}
875 
876 				if (it == itEnd || readNo < (*it).rseqNo)
877 				{
878 					if (!empty(reads[readNo]))
879 						file << reads[readNo] << _sep_ << "NM" << _sep_ << '0' << _sep_ << '0' << _sep_ << '0' << ::std::endl;
880 					else
881 					{
882 						for (unsigned i = 0; i < maxReadLength; ++i)
883 							file << '.';
884 						file << _sep_ << "QC" << _sep_ << '0' << _sep_ << '0' << _sep_ << '0' << ::std::endl;
885 					}
886 				} else
887 				{
888 					file << reads[readNo] << _sep_;
889 					unsigned bestMatches = 1;
890 					if ((unsigned)(*it).editDist < length(stats))
891 						bestMatches = stats[(*it).editDist][readNo];
892 
893 					if (bestMatches == 0) file << '?';	// impossible
894 					if (bestMatches == 1) file << 'U';	// unique best match
895 					if (bestMatches >  1) file << 'R';	// non-unique best matches
896 
897 					file << (*it).editDist << _sep_ << stats[0][readNo] << _sep_ << stats[1][readNo] << _sep_ << stats[2][readNo];
898 
899 					if (bestMatches == 1)
900 					{
901 						file << _sep_;
902 						switch (options.genomeNaming)
903 						{
904 							// 0..filename is the read's Fasta id
905 							case 0:
906 								file << genomeIDs[(*it).gseqNo];
907 								break;
908 
909 							// 1..filename is the read filename + seqNo
910 							case 1:
911 								file.fill('0');
912 								file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1;
913 						}
914 
915 						if ((*it).orientation == 'F')
916 							file << _sep_ << ((*it).gBegin + options.positionFormat) << _sep_ << 'F' << _sep_ << "..";
917 						else
918 							file << _sep_ << (*it).gEnd << _sep_ << 'R' << _sep_ << "..";
919 
920 						if ((*it).editDist > 0 && options.dumpAlignment && options.hammingOnly)
921 						{
922 							gInf = infix(genomes[(*it).gseqNo], (*it).gBegin, (*it).gEnd);
923 							if ((*it).orientation == 'R')
924 								reverseComplement(gInf);
925 							for (unsigned i = 0; i < length(gInf); ++i)
926 								if ((options.compMask[ordValue(reads[readNo][i])] &
927 									options.compMask[ordValue(gInf[i])]) == 0)
928 									file << _sep_ << i + 1 << gInf[i];
929 						}
930 					}
931 					file << ::std::endl;
932 					++it;
933 				}
934 			}
935 			break;
936 		case 3: // Gff:  printf "$chr $name_$format read $pos %ld . $dir . ID=$col[0]$unique$rest\n",$pos+$len-1;
937 			// NOTE(marehr): filecount+=2 might be a potential bug [https://github.com/seqan/seqan/issues/2165]
938 			// In revision 4dbf27b55 and before, filecount was incremented twice at the
939 			// end of the for loop, which caused a compiler warning (once in the body
940 			// and once in the iteration_expression of the for loop). We kept this
941 			// behaviour, because we have no active maintainer for this app.
942 			for (unsigned filecount = 0; filecount < length(genomeFileNameList); filecount+=2)
943 			{
944 				// open genome file
945 				SeqFileIn gFile;
946 				if (!open(gFile, toCString(genomeFileNameList[filecount])))
947 				{
948 					std::cerr << "Couldn't open genome file." << std::endl;
949 					break;
950 				}
951 
952                 CharString currId;
953 				Dna5String currGenome;
954 
955 				// iterate over genome sequences
956 				for(; !atEnd(gFile); ++currSeqNo)
957 				{
958 					readRecord(currId, currGenome, gFile);			// read Fasta sequence
959 					while(it != itEnd && (*it).gseqNo == currSeqNo)
960 					{
961 #ifdef RAZERS_DIRECT_MAQ_MAPPING
962 						if(options.maqMapping && options.noBelowIdentity && (*it).seedEditDist > maxSeedErrors)
963 						{
964 							++it;
965 							continue;
966 						}
967 #endif
968 
969 						unsigned currReadNo = (*it).rseqNo;
970 						int unique = 1;
971 						unsigned bestMatches = 0;
972 
973 #ifdef RAZERS_DIRECT_MAQ_MAPPING
974 						if(options.maqMapping)
975 							bestMatches = stats[0][currReadNo] >> 5;
976 						else
977 #endif
978 						{
979 #ifdef RAZERS_SPLICED
980 							if (options.minMatchLen > 0)
981 							{
982 								if( -(*it).pairScore < (int)length(stats))
983 									bestMatches = stats[-(*it).pairScore][currReadNo]/2;
984 							}
985 							else
986 #endif
987 								if (bestMatches == 0 && (unsigned)(*it).editDist < length(stats))
988 								bestMatches = stats[(*it).editDist][currReadNo];
989 
990 						}
991 
992 						bool suboptimal = false;
993 						if (
994 #ifdef RAZERS_DIRECT_MAQ_MAPPING
995 							!options.maqMapping &&
996 #endif
997 							(unsigned)(*it).editDist > 0
998 #ifdef RAZERS_SPLICED
999 							&& options.minMatchLen == 0
1000 #endif
1001 							)
1002 						{
1003 							for(unsigned d = 0; d < (unsigned)(*it).editDist; ++d)
1004 								if (stats[d][currReadNo]>0) suboptimal=true;
1005 						}
1006 #ifdef RAZERS_SPLICED
1007 						if(options.minMatchLen > 0 &&  -(*it).pairScore > 0)
1008 						{
1009 							for(unsigned d = 0; d < -(unsigned)(*it).pairScore; ++d)
1010 								if (stats[d][currReadNo]>0) suboptimal=true;
1011 						}
1012 #endif
1013 
1014 						if (bestMatches !=  1)
1015 						{
1016 							unique = 0;
1017 							if(options.purgeAmbiguous)
1018 							{
1019 								++it;
1020 								continue;
1021 							}
1022 
1023 //							if((*it).mScore > 0) std::cout << (*it).mScore << "<-non uniq but score > 0\n";
1024 //							++it;
1025 //							continue; // TODO: output non-unique matches (concerns maq mapping only)
1026 						}
1027 						unsigned readLen = length(reads[currReadNo]);
1028 #ifdef RAZERS_SPLICED
1029 						if(options.minMatchLen > 0)
1030 							readLen = (*it).mScore;
1031 #endif
1032 						String<Dna5Q> readInf = infix(reads[currReadNo],0,readLen);
1033 #ifdef RAZERS_SPLICED
1034 						if(options.minMatchLen > 0)
1035 						{
1036 							if(((*it).orientation == 'F' && (*it).mateDelta < 0)
1037 								|| ((*it).orientation == 'R' && (*it).mateDelta > 0))
1038 							{
1039 								readInf = infix(reads[currReadNo],
1040 									length(reads[currReadNo])-readLen,
1041 									length(reads[currReadNo]));
1042 							}
1043 						}
1044  #endif
1045 
1046 						switch (options.genomeNaming)
1047 						{
1048 							// 0..filename is the read's Fasta id
1049 							case 0:
1050 								file << genomeIDs[(*it).gseqNo] <<'\t';
1051 								break;
1052 							// 1..filename is the read filename + seqNo
1053 							case 1:
1054 								file.fill('0');
1055 								file << gnoToFileMap[(*it).gseqNo].first << '#' << ::std::setw(gzeros) << gnoToFileMap[(*it).gseqNo].second + 1 << '\t';
1056 								break;
1057 						}
1058 
1059 						// get alignment to dump or to fix end coordinates
1060 						if (!options.hammingOnly)
1061 						{
1062 							assignSource(row(align, 0), readInf);
1063 							assignSource(row(align, 1), infix(currGenome, (*it).gBegin, (*it).gEnd));
1064 							if ((*it).orientation == 'R')
1065 								reverseComplement(source(row(align, 1)));
1066 
1067 #ifdef RAZERS_SPLICED
1068 							if(options.minMatchLen > 0)
1069 								globalAlignment(align, scoreType, AlignConfig<false,false,false,false>(), Gotoh());
1070 							else
1071 #endif
1072 								globalAlignment(align, scoreType, AlignConfig<false,true,true,false>(), Gotoh());
1073 
1074 							// transform first and last read character to genomic positions
1075 							viewPosReadFirst  = toViewPosition(row(align, 0), 0);
1076 							viewPosReadLast   = toViewPosition(row(align, 0), readLen - 1);
1077 							unsigned genomePosReadFirst = toSourcePosition(row(align, 1), viewPosReadFirst);
1078 							unsigned genomePosReadLast  = toSourcePosition(row(align, 1), viewPosReadLast);
1079 #ifdef RAZERS_SPLICED
1080 							if(options.minMatchLen == 0)
1081 #endif
1082 							{
1083 								if ((*it).orientation == 'R')
1084 								{
1085 									(*it).gBegin = (*it).gEnd - (genomePosReadLast + 1);
1086 									(*it).gEnd -= genomePosReadFirst;
1087 								}
1088 								else
1089 								{
1090 									(*it).gEnd = (*it).gBegin + (genomePosReadLast + 1);
1091 									(*it).gBegin += genomePosReadFirst;
1092 								}
1093 							}
1094 						}
1095 
1096 						//file <<  options.runID << "_razers\tread";
1097 						file << "razers\tread";
1098 						file << '\t' << ((*it).gBegin + options.positionFormat) << '\t' << (*it).gEnd << '\t';
1099 						double percId = 100.0 * (1.0 - (double)(*it).editDist / (double)readLen);
1100 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1101 						if(options.maqMapping)
1102 							file << (*it).mScore << "\t";
1103 						else
1104 #endif
1105 							file << percId << "\t";
1106 
1107 						if ((*it).orientation == 'F')
1108 							file << '+' << '\t' << '.' <<'\t';
1109 						else
1110 							file << '-' << '\t' << '.' <<'\t';
1111 
1112 						switch (options.readNaming)
1113 						{
1114 							// 0..filename is the read's Fasta id
1115 							case 0:
1116 								file << "ID=" <<readIDs[currReadNo];
1117 								break;
1118 
1119 							// 1..filename is the read filename + seqNo
1120 							case 1:
1121 								file.fill('0');
1122 								file << "ID=" << readName << '#' << ::std::setw(pzeros) << currReadNo + 1;
1123 								break;
1124 						}
1125 						if(suboptimal) file << ";suboptimal";
1126 						else
1127 						{
1128 							if(unique==1) file << ";unique";
1129 							if(unique==0) file << ";multi";
1130 						}
1131 
1132 #ifdef RAZERS_SPLICED
1133 						if(options.minMatchLen > 0)
1134 						{
1135 							file << ";delta=" << (*it).mateDelta << ";pairId="<<(*it).pairId;
1136 							file << ";pairErr=" << -(*it).pairScore;
1137 						}
1138 #endif
1139 
1140 						if ((*it).editDist > 0)
1141 						{
1142 							if (options.hammingOnly)
1143 							{
1144 								gInf = infix(currGenome, (*it).gBegin, (*it).gEnd);
1145 								if ((*it).orientation == 'R')
1146 									reverseComplement(gInf);
1147 								bool first = true;
1148 								file << ";cigar=" << readLen << "M";
1149 								file << ";mutations=";
1150 								for (unsigned i = 0; i < length(gInf); ++i)
1151 									if ((options.compMask[ordValue(readInf[i])] &
1152 										options.compMask[ordValue(gInf[i])]) == 0)
1153 									{
1154 										if(first){ file << i + 1 << (Dna5)readInf[i]; first = false;}
1155 										else file <<','<< i + 1 << (Dna5)readInf[i];
1156 									}
1157 
1158 							}
1159 							else
1160 							{
1161 								std::stringstream cigar, mutations;
1162 								typedef typename Row<TAlign>::Type TRow;
1163 								typedef typename Iterator<TRow, Rooted>::Type TAlignIterator;
1164 								TAlignIterator ali_it0 = iter(row(align,0),viewPosReadFirst);
1165 								TAlignIterator ali_it1 = iter(row(align,1),viewPosReadFirst);
1166 								TAlignIterator ali_it0stop = iter(row(align,0),viewPosReadLast + 1);
1167 								TAlignIterator ali_it1stop = iter(row(align,1),viewPosReadLast + 1);
1168 
1169 #ifdef RAZERS_SPLICED
1170 								if(options.minMatchLen > 0)
1171 								{
1172 									ali_it0 = begin(row(align,0));
1173 									ali_it1 = begin(row(align,1));
1174 									ali_it0stop = end(row(align,0));
1175 									ali_it1stop = end(row(align,1));
1176 								}
1177 #endif
1178 								getCigarLine(align,cigar,mutations,ali_it0,ali_it0stop,ali_it1,ali_it1stop);
1179 								file << ";cigar="<<cigar.str();
1180 								if(length(mutations.str())>0)
1181 									file << ";mutations=" << mutations.str();
1182 							}
1183 						}
1184 
1185 
1186 						if(
1187 #ifdef RAZERS_DIRECT_MAQ_MAPPING
1188 						options.maqMapping ||
1189 #endif
1190 						options.fastaIdQual)
1191 						{
1192 		//					file << ";read=";
1193 		//					for(unsigned j=0;j<length(reads[currReadNo]);++j)
1194 		//					{
1195 		//						file << (Dna5)reads[currReadNo][j];
1196 		//					}
1197 							file << ";quality=";
1198 							for(unsigned j=0;j<readLen;++j)
1199 							{
1200 								file << (char)(getQualityValue(readInf[j])+ 33);
1201 							}
1202 						}
1203 
1204 						file << ::std::endl;
1205 						++it;
1206 					}
1207 				}
1208 				close(gFile);
1209 			}
1210 			break;
1211 	}
1212 
1213 	close(file);
1214 
1215 	// get empirical error distribution
1216 	if (!empty(errorPrbFileName) && maxReadLength > 0)
1217 	{
1218         std::ofstream file;
1219 		file.open(toCString(errorPrbFileName), ::std::ios_base::out | ::std::ios_base::trunc);
1220 		if (file.is_open())
1221 		{
1222 			String<long double> posError;
1223 			unsigned unique = 0;
1224 			unsigned insertions = 0;
1225 			unsigned deletions = 0;
1226 			resize(posError, maxReadLength, 0);
1227 
1228 			if (options.hammingOnly)
1229 				unique = getErrorDistribution(posError, matches, reads, genomes, options);
1230 			else
1231 			{
1232 				unique = getErrorDistribution(posError, insertions, deletions, matches, reads, genomes, options);
1233 				::std::cerr << "insertProb: " << (double)insertions / ((double)length(posError) * (double)unique) << ::std::endl;
1234 				::std::cerr << "deleteProb: " << (double)deletions / ((double)length(posError) * (double)unique) << ::std::endl;
1235 			}
1236 
1237 			file << (double)posError[0] / (double)unique;
1238 			for (unsigned i = 1; i < length(posError); ++i)
1239 				file << '\t' << (double)posError[i] / (double)unique;
1240 			file << ::std::endl;
1241 			file.close();
1242 		} else
1243 			::std::cerr << "Failed to open error distribution file" << ::std::endl;
1244 	}
1245 
1246 	options.timeDumpResults = SEQAN_PROTIMEDIFF(dump_time);
1247 
1248 	if (options._debugLevel >= 1)
1249 		::std::cerr << "Dumping results took             \t" << options.timeDumpResults << " seconds" << ::std::endl;
1250 }
1251 
1252 
1253 }
1254 
1255 #endif
1256 
1257