1 /*
2  * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3  *
4  * This file is part of Bowtie 2.
5  *
6  * Bowtie 2 is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Bowtie 2 is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <iomanip>
21 #include <limits>
22 #include "aln_sink.h"
23 #include "aligner_seed.h"
24 #include "util.h"
25 
26 using namespace std;
27 
28 
29 /**
30  * Initialize state machine with a new read.  The state we start in depends
31  * on whether it's paired-end or unpaired.
32  */
nextRead(bool paired)33 void ReportingState::nextRead(bool paired) {
34 	paired_ = paired;
35 	if(paired) {
36 		state_ = CONCORDANT_PAIRS;
37 		doneConcord_ = false;
38 		doneDiscord_ = p_.discord ? false : true;
39 		doneUnpair1_ = p_.mixed   ? false : true;
40 		doneUnpair2_ = p_.mixed   ? false : true;
41 		exitConcord_ = ReportingState::EXIT_DID_NOT_EXIT;
42 		exitDiscord_ = p_.discord ?
43 			ReportingState::EXIT_DID_NOT_EXIT :
44 			ReportingState::EXIT_DID_NOT_ENTER;
45 		exitUnpair1_ = p_.mixed   ?
46 			ReportingState::EXIT_DID_NOT_EXIT :
47 			ReportingState::EXIT_DID_NOT_ENTER;
48 		exitUnpair2_ = p_.mixed   ?
49 			ReportingState::EXIT_DID_NOT_EXIT :
50 			ReportingState::EXIT_DID_NOT_ENTER;
51 	} else {
52 		// Unpaired
53 		state_ = UNPAIRED;
54 		doneConcord_ = true;
55 		doneDiscord_ = true;
56 		doneUnpair1_ = false;
57 		doneUnpair2_ = true;
58 		exitConcord_ = ReportingState::EXIT_DID_NOT_ENTER; // not relevant
59 		exitDiscord_ = ReportingState::EXIT_DID_NOT_ENTER; // not relevant
60 		exitUnpair1_ = ReportingState::EXIT_DID_NOT_EXIT;
61 		exitUnpair2_ = ReportingState::EXIT_DID_NOT_ENTER; // not relevant
62 	}
63 	doneUnpair_ = doneUnpair1_ && doneUnpair2_;
64 	done_ = false;
65 	nconcord_ = ndiscord_ = nunpair1_ = nunpair2_ = 0;
66 }
67 
68 /**
69  * Caller uses this member function to indicate that one additional
70  * concordant alignment has been found.
71  */
foundConcordant()72 bool ReportingState::foundConcordant() {
73 	assert(paired_);
74 	assert_geq(state_, ReportingState::CONCORDANT_PAIRS);
75 	assert(!doneConcord_);
76 	nconcord_++;
77 	areDone(nconcord_, doneConcord_, exitConcord_);
78 	// No need to search for discordant alignments if there are one or more
79 	// concordant alignments.
80 	doneDiscord_ = true;
81 	exitDiscord_ = ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED;
82 	if(doneConcord_) {
83 		// If we're finished looking for concordant alignments, do we have to
84 		// continue on to search for unpaired alignments?  Only if our exit
85 		// from the concordant stage is EXIT_SHORT_CIRCUIT_M.  If it's
86 		// EXIT_SHORT_CIRCUIT_k or EXIT_WITH_ALIGNMENTS, we can skip unpaired.
87 		assert_neq(ReportingState::EXIT_NO_ALIGNMENTS, exitConcord_);
88 		if(exitConcord_ != ReportingState::EXIT_SHORT_CIRCUIT_M) {
89 			if(!doneUnpair1_) {
90 				doneUnpair1_ = true;
91 				exitUnpair1_ = ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED;
92 			}
93 			if(!doneUnpair2_) {
94 				doneUnpair2_ = true;
95 				exitUnpair2_ = ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED;
96 			}
97 		}
98 	}
99 	updateDone();
100 	return done();
101 }
102 
103 /**
104  * Caller uses this member function to indicate that one additional unpaired
105  * mate alignment has been found for the specified mate.
106  */
foundUnpaired(bool mate1)107 bool ReportingState::foundUnpaired(bool mate1) {
108 	assert_gt(state_, ReportingState::NO_READ);
109 	// Note: it's not right to assert !doneUnpair1_/!doneUnpair2_ here.
110 	// Even if we're done with finding
111 	if(mate1) {
112 		nunpair1_++;
113 		// Did we just finish with this mate?
114 		if(!doneUnpair1_) {
115 			areDone(nunpair1_, doneUnpair1_, exitUnpair1_);
116 			if(doneUnpair1_) {
117 				doneUnpair_ = doneUnpair1_ && doneUnpair2_;
118 				updateDone();
119 			}
120 		}
121 		if(nunpair1_ > 1) {
122 			doneDiscord_ = true;
123 			exitDiscord_ = ReportingState::EXIT_NO_ALIGNMENTS;
124 		}
125 	} else {
126 		nunpair2_++;
127 		// Did we just finish with this mate?
128 		if(!doneUnpair2_) {
129 			areDone(nunpair2_, doneUnpair2_, exitUnpair2_);
130 			if(doneUnpair2_) {
131 				doneUnpair_ = doneUnpair1_ && doneUnpair2_;
132 				updateDone();
133 			}
134 		}
135 		if(nunpair2_ > 1) {
136 			doneDiscord_ = true;
137 			exitDiscord_ = ReportingState::EXIT_NO_ALIGNMENTS;
138 		}
139 	}
140 	return done();
141 }
142 
143 /**
144  * Called to indicate that the aligner has finished searching for
145  * alignments.  This gives us a chance to finalize our state.
146  *
147  * TODO: Keep track of short-circuiting information.
148  */
finish()149 void ReportingState::finish() {
150 	if(!doneConcord_) {
151 		doneConcord_ = true;
152 		exitConcord_ =
153 			((nconcord_ > 0) ?
154 				ReportingState::EXIT_WITH_ALIGNMENTS :
155 				ReportingState::EXIT_NO_ALIGNMENTS);
156 	}
157 	assert_gt(exitConcord_, EXIT_DID_NOT_EXIT);
158 	if(!doneUnpair1_) {
159 		doneUnpair1_ = true;
160 		exitUnpair1_ =
161 			((nunpair1_ > 0) ?
162 				ReportingState::EXIT_WITH_ALIGNMENTS :
163 				ReportingState::EXIT_NO_ALIGNMENTS);
164 	}
165 	assert_gt(exitUnpair1_, EXIT_DID_NOT_EXIT);
166 	if(!doneUnpair2_) {
167 		doneUnpair2_ = true;
168 		exitUnpair2_ =
169 			((nunpair2_ > 0) ?
170 				ReportingState::EXIT_WITH_ALIGNMENTS :
171 				ReportingState::EXIT_NO_ALIGNMENTS);
172 	}
173 	assert_gt(exitUnpair2_, EXIT_DID_NOT_EXIT);
174 	if(!doneDiscord_) {
175 		// Check if the unpaired alignments should be converted to a single
176 		// discordant paired-end alignment.
177 		assert_eq(0, ndiscord_);
178 		if(nconcord_ == 0 && nunpair1_ == 1 && nunpair2_ == 1) {
179 			convertUnpairedToDiscordant();
180 		}
181 		doneDiscord_ = true;
182 		exitDiscord_ =
183 			((ndiscord_ > 0) ?
184 				ReportingState::EXIT_WITH_ALIGNMENTS :
185 				ReportingState::EXIT_NO_ALIGNMENTS);
186 	}
187 	assert(!paired_ || exitDiscord_ > ReportingState::EXIT_DID_NOT_EXIT);
188 	doneUnpair_ = done_ = true;
189 	assert(done());
190 }
191 
192 /**
193  * Populate given counters with the number of various kinds of alignments
194  * to report for this read.  Concordant alignments are preferable to (and
195  * mutually exclusive with) discordant alignments, and paired-end
196  * alignments are preferable to unpaired alignments.
197  *
198  * The caller also needs some additional information for the case where a
199  * pair or unpaired read aligns repetitively.  If the read is paired-end
200  * and the paired-end has repetitive concordant alignments, that should be
201  * reported, and 'pairMax' is set to true to indicate this.  If the read is
202  * paired-end, does not have any conordant alignments, but does have
203  * repetitive alignments for one or both mates, then that should be
204  * reported, and 'unpair1Max' and 'unpair2Max' are set accordingly.
205  *
206  * Note that it's possible in the case of a paired-end read for the read to
207  * have repetitive concordant alignments, but for one mate to have a unique
208  * unpaired alignment.
209  */
getReport(uint64_t & nconcordAln,uint64_t & ndiscordAln,uint64_t & nunpair1Aln,uint64_t & nunpair2Aln,bool & pairMax,bool & unpair1Max,bool & unpair2Max) const210 void ReportingState::getReport(
211 	uint64_t& nconcordAln, // # concordant alignments to report
212 	uint64_t& ndiscordAln, // # discordant alignments to report
213 	uint64_t& nunpair1Aln, // # unpaired alignments for mate #1 to report
214 	uint64_t& nunpair2Aln, // # unpaired alignments for mate #2 to report
215 	bool& pairMax,         // repetitive concordant alignments
216 	bool& unpair1Max,      // repetitive alignments for mate #1
217 	bool& unpair2Max)      // repetitive alignments for mate #2
218 	const
219 {
220 	nconcordAln = ndiscordAln = nunpair1Aln = nunpair2Aln = 0;
221 	pairMax = unpair1Max = unpair2Max = false;
222 	assert_gt(p_.khits, 0);
223 	assert_gt(p_.mhits, 0);
224 	if(paired_) {
225 		// Do we have 1 or more concordant alignments to report?
226 		if(exitConcord_ == ReportingState::EXIT_SHORT_CIRCUIT_k) {
227 			// k at random
228 			assert_geq(nconcord_, (uint64_t)p_.khits);
229 			nconcordAln = p_.khits;
230 			return;
231 		} else if(exitConcord_ == ReportingState::EXIT_SHORT_CIRCUIT_M) {
232 			assert(p_.msample);
233 			assert_gt(nconcord_, 0);
234 			pairMax = true;  // repetitive concordant alignments
235 			if(p_.mixed) {
236 				unpair1Max = nunpair1_ > (uint64_t)p_.mhits;
237 				unpair2Max = nunpair2_ > (uint64_t)p_.mhits;
238 			}
239 			// Not sure if this is OK
240 			nconcordAln = 1; // 1 at random
241 			return;
242 		} else if(exitConcord_ == ReportingState::EXIT_WITH_ALIGNMENTS) {
243 			assert_gt(nconcord_, 0);
244 			// <= k at random
245 			nconcordAln = min<uint64_t>(nconcord_, p_.khits);
246 			return;
247 		}
248 		assert(!p_.mhitsSet() || nconcord_ <= (uint64_t)p_.mhits+1);
249 
250 		// Do we have a discordant alignment to report?
251 		if(exitDiscord_ == ReportingState::EXIT_WITH_ALIGNMENTS) {
252 			// Report discordant
253 			assert(p_.discord);
254 			ndiscordAln = 1;
255 			return;
256 		}
257 	}
258 
259 	assert_neq(ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED, exitUnpair1_);
260 	assert_neq(ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED, exitUnpair2_);
261 
262 	if((paired_ && !p_.mixed) || nunpair1_ + nunpair2_ == 0) {
263 		// Unpaired alignments either not reportable or non-existant
264 		return;
265 	}
266 
267 	// Do we have 1 or more alignments for mate #1 to report?
268 	if(exitUnpair1_ == ReportingState::EXIT_SHORT_CIRCUIT_k) {
269 		// k at random
270 		assert_geq(nunpair1_, (uint64_t)p_.khits);
271 		nunpair1Aln = p_.khits;
272 	} else if(exitUnpair1_ == ReportingState::EXIT_SHORT_CIRCUIT_M) {
273 		assert(p_.msample);
274 		assert_gt(nunpair1_, 0);
275 		unpair1Max = true;  // repetitive alignments for mate #1
276 		nunpair1Aln = 1; // 1 at random
277 	} else if(exitUnpair1_ == ReportingState::EXIT_WITH_ALIGNMENTS) {
278 		assert_gt(nunpair1_, 0);
279 		// <= k at random
280 		nunpair1Aln = min<uint64_t>(nunpair1_, (uint64_t)p_.khits);
281 	}
282 	assert(!p_.mhitsSet() || paired_ || nunpair1_ <= (uint64_t)p_.mhits+1);
283 
284 	// Do we have 2 or more alignments for mate #2 to report?
285 	if(exitUnpair2_ == ReportingState::EXIT_SHORT_CIRCUIT_k) {
286 		// k at random
287 		nunpair2Aln = p_.khits;
288 	} else if(exitUnpair2_ == ReportingState::EXIT_SHORT_CIRCUIT_M) {
289 		assert(p_.msample);
290 		assert_gt(nunpair2_, 0);
291 		unpair2Max = true;  // repetitive alignments for mate #1
292 		nunpair2Aln = 1; // 1 at random
293 	} else if(exitUnpair2_ == ReportingState::EXIT_WITH_ALIGNMENTS) {
294 		assert_gt(nunpair2_, 0);
295 		// <= k at random
296 		nunpair2Aln = min<uint64_t>(nunpair2_, (uint64_t)p_.khits);
297 	}
298 	assert(!p_.mhitsSet() || paired_ || nunpair2_ <= (uint64_t)p_.mhits+1);
299 }
300 
301 /**
302  * Given the number of alignments in a category, check whether we
303  * short-circuited out of the category.  Set the done and exit arguments to
304  * indicate whether and how we short-circuited.
305  */
areDone(uint64_t cnt,bool & done,int & exit) const306 inline void ReportingState::areDone(
307 	uint64_t cnt,    // # alignments in category
308 	bool& done,      // out: whether we short-circuited out of category
309 	int& exit) const // out: if done, how we short-circuited (-k? -m? etc)
310 {
311 	assert(!done);
312 	// Have we exceeded the -k limit?
313 	assert_gt(p_.khits, 0);
314 	assert_gt(p_.mhits, 0);
315 	if(cnt >= (uint64_t)p_.khits && !p_.mhitsSet()) {
316 		done = true;
317 		exit = ReportingState::EXIT_SHORT_CIRCUIT_k;
318 	}
319 	// Have we exceeded the -m or -M limit?
320 	else if(p_.mhitsSet() && cnt > (uint64_t)p_.mhits) {
321 		done = true;
322 		assert(p_.msample);
323 		exit = ReportingState::EXIT_SHORT_CIRCUIT_M;
324 	}
325 }
326 
printPct(std::ostream & os,uint64_t num,uint64_t denom)327 static std::ostream& printPct(
328 	std::ostream& os,
329 	uint64_t num,
330 	uint64_t denom)
331 {
332 	double pct = 0.0f;
333 	if(denom != 0) { pct = 100.0 * (double)num / (double)denom; }
334 	os << fixed << setprecision(2) << pct << '%';
335 	return os;
336 }
337 /**
338  * Print a friendly summary of:
339  *
340  *  1. How many reads were aligned and had one or more alignments
341  *     reported
342  *  2. How many reads exceeded the -m or -M ceiling and therefore had
343  *     their alignments suppressed or sampled
344  *  3. How many reads failed to align entirely
345  *
346  * Optionally print a series of Hadoop streaming-style counter updates
347  * with similar information.
348  */
printAlSumm(const ReportingMetrics & met,size_t repThresh,bool discord,bool mixed,bool hadoopOut)349 void AlnSink::printAlSumm(
350 	const ReportingMetrics& met,
351 	size_t repThresh,   // threshold for uniqueness, or max if no thresh
352 	bool discord,       // looked for discordant alignments
353 	bool mixed,         // looked for unpaired alignments where paired failed?
354 	bool hadoopOut)     // output Hadoop counters?
355 {
356 	// NOTE: there's a filtering step at the very beginning, so everything
357 	// being reported here is post filtering
358 
359 	bool canRep = repThresh != MAX_SIZE_T;
360 	if(hadoopOut) {
361 		cerr << "reporter:counter:Bowtie,Reads processed," << met.nread << endl;
362 	}
363 	uint64_t totread = met.nread;
364 	if(totread > 0) {
365 		cerr << "" << met.nread << " reads; of these:" << endl;
366 	} else {
367 		assert_eq(0, met.npaired);
368 		assert_eq(0, met.nunpaired);
369 		cerr << "" << totread << " reads" << endl;
370 	}
371 	uint64_t totpair = met.npaired;
372 	if(totpair > 0) {
373 		// Paired output
374 		cerr << "  " << totpair << " (";
375 		printPct(cerr, totpair, totread);
376 		cerr << ") were paired; of these:" << endl;
377 
378 		// Concordants
379 		cerr << "    " << met.nconcord_0 << " (";
380 		printPct(cerr, met.nconcord_0, met.npaired);
381 		cerr << ") aligned concordantly 0 times" << endl;
382 		if(canRep) {
383 			// Print the number that aligned concordantly exactly once
384 			assert_eq(met.nconcord_uni, met.nconcord_uni1+met.nconcord_uni2);
385 			cerr << "    " << met.nconcord_uni1 << " (";
386 			printPct(cerr, met.nconcord_uni1, met.npaired);
387 			cerr << ") aligned concordantly exactly 1 time" << endl;
388 
389 			// Print the number that aligned concordantly more than once but
390 			// fewer times than the limit
391 
392 			cerr << "    " << met.nconcord_uni2+met.nconcord_rep << " (";
393 			printPct(cerr, met.nconcord_uni2+met.nconcord_rep, met.npaired);
394 			cerr << ") aligned concordantly >1 times" << endl;
395 		} else {
396 			// Print the number that aligned concordantly exactly once
397 			assert_eq(met.nconcord_uni, met.nconcord_uni1+met.nconcord_uni2);
398 			cerr << "    " << met.nconcord_uni1 << " (";
399 			printPct(cerr, met.nconcord_uni1, met.npaired);
400 			cerr << ") aligned concordantly exactly 1 time" << endl;
401 
402 			// Print the number that aligned concordantly more than once
403 			cerr << "    " << met.nconcord_uni2 << " (";
404 			printPct(cerr, met.nconcord_uni2, met.npaired);
405 			cerr << ") aligned concordantly >1 times" << endl;
406 		}
407 		if(discord) {
408 			// TODO: what about discoardant and on separate chromosomes?
409 
410 			// Bring out the unaligned pair total so we can subtract discordants
411 			cerr << "    ----" << endl;
412 			cerr << "    " << met.nconcord_0
413 			     << " pairs aligned concordantly 0 times; of these:" << endl;
414 			// Discordants
415 			cerr << "      " << met.ndiscord << " (";
416 			printPct(cerr, met.ndiscord, met.nconcord_0);
417 			cerr << ") aligned discordantly 1 time" << endl;
418 		}
419 		uint64_t ncondiscord_0 = met.nconcord_0 - met.ndiscord;
420 		if(mixed) {
421 			// Bring out the unaligned pair total so we can subtract discordants
422 			cerr << "    ----" << endl;
423 			cerr << "    " << ncondiscord_0
424 			     << " pairs aligned 0 times concordantly or discordantly; of these:" << endl;
425 			cerr << "      " << (ncondiscord_0 * 2) << " mates make up the pairs; of these:" << endl;
426 			cerr << "        " << met.nunp_0_0 << " " << "(";
427 			printPct(cerr, met.nunp_0_0, ncondiscord_0 * 2);
428 			cerr << ") aligned 0 times" << endl;
429 			if(canRep) {
430 				// Print the number that aligned exactly once
431 				assert_eq(met.nunp_0_uni, met.nunp_0_uni1+met.nunp_0_uni2);
432 				cerr << "        " << met.nunp_0_uni1 << " (";
433 				printPct(cerr, met.nunp_0_uni1, ncondiscord_0 * 2);
434 				cerr << ") aligned exactly 1 time" << endl;
435 
436 				// Print the number that aligned more than once but fewer times
437 				// than the limit
438 				cerr << "        " << met.nunp_0_uni2+met.nunp_0_rep << " (";
439 				printPct(cerr, met.nunp_0_uni2+met.nunp_0_rep, ncondiscord_0 * 2);
440 				cerr << ") aligned >1 times" << endl;
441 			} else {
442 				// Print the number that aligned exactly once
443 				assert_eq(met.nunp_0_uni, met.nunp_0_uni1+met.nunp_0_uni2);
444 				cerr << "        " << met.nunp_0_uni1 << " (";
445 				printPct(cerr, met.nunp_0_uni1, ncondiscord_0 * 2);
446 				cerr << ") aligned exactly 1 time" << endl;
447 
448 				// Print the number that aligned more than once but fewer times
449 				// than the limit
450 				cerr << "        " << met.nunp_0_uni2 << " (";
451 				printPct(cerr, met.nunp_0_uni2, ncondiscord_0 * 2);
452 				cerr << ") aligned >1 times" << endl;
453 			}
454 
455 			//if(canRep) {
456 			//	// Bring out the repetitively aligned pair total so we can subtract discordants
457 			//	cerr << "    ----" << endl;
458 			//	cerr << "    " << met.nconcord_rep
459 			//		 << " pairs aligned concordantly >" << repThresh
460 			//		 << " times; of these:" << endl;
461 			//	cerr << "      " << (met.nconcord_rep * 2) << " mates make up the pairs; of these:" << endl;
462 			//
463 			//	cerr << "        " << met.nunp_rep_0 << " (";
464 			//	printPct(cerr, met.nunp_rep_0, met.nconcord_rep * 2);
465 			//	cerr << ") aligned 0 times" << endl;
466 			//
467 			//	cerr << "        " << met.nunp_rep_uni << " (";
468 			//	printPct(cerr, met.nunp_rep_uni, met.nconcord_rep * 2);
469 			//	cerr << ") aligned >0 and <=" << repThresh << " times" << endl;
470 			//
471 			//	cerr << "        " << met.nunp_rep_rep << " (";
472 			//	printPct(cerr, met.nunp_rep_rep, met.nconcord_rep * 2);
473 			//	cerr << ") aligned >" << repThresh << " times" << endl;
474 			//}
475 		}
476 	}
477 	uint64_t totunpair = met.nunpaired;
478 	if(totunpair > 0) {
479 		// Unpaired output
480 		cerr << "  " << totunpair << " (";
481 		printPct(cerr, totunpair, totread);
482 		cerr << ") were unpaired; of these:" << endl;
483 
484 		cerr << "    " << met.nunp_0 << " (";
485 		printPct(cerr, met.nunp_0, met.nunpaired);
486 		cerr << ") aligned 0 times" << endl;
487 		if(hadoopOut) {
488 			cerr << "reporter:counter:Bowtie 2,Unpaired reads with 0 alignments,"
489 			     << met.nunpaired << endl;
490 		}
491 
492 		if(canRep) {
493 			// Print the number that aligned exactly once
494 			assert_eq(met.nunp_uni, met.nunp_uni1+met.nunp_uni2);
495 			cerr << "    " << met.nunp_uni1 << " (";
496 			printPct(cerr, met.nunp_uni1, met.nunpaired);
497 			cerr << ") aligned exactly 1 time" << endl;
498 
499 			// Print the number that aligned more than once but fewer times
500 			// than the limit
501 			cerr << "    " << met.nunp_uni2+met.nunp_rep << " (";
502 			printPct(cerr, met.nunp_uni2+met.nunp_rep, met.nunpaired);
503 			cerr << ") aligned >1 times" << endl;
504 		} else {
505 			// Print the number that aligned exactly once
506 			assert_eq(met.nunp_uni, met.nunp_uni1+met.nunp_uni2);
507 			cerr << "    " << met.nunp_uni1 << " (";
508 			printPct(cerr, met.nunp_uni1, met.nunpaired);
509 			cerr << ") aligned exactly 1 time" << endl;
510 
511 			// Print the number that aligned more than once
512 			cerr << "    " << met.nunp_uni2 << " (";
513 			printPct(cerr, met.nunp_uni2, met.nunpaired);
514 			cerr << ") aligned >1 times" << endl;
515 		}
516 	}
517 	uint64_t tot_al_cand = totunpair + totpair*2;
518 	uint64_t tot_al =
519 		(met.nconcord_uni + met.nconcord_rep)*2 +
520 		(met.ndiscord)*2 +
521 		met.nunp_0_uni +
522 		met.nunp_0_rep +
523 		met.nunp_uni +
524 		met.nunp_rep;
525 	assert_leq(tot_al, tot_al_cand);
526 	printPct(cerr, tot_al, tot_al_cand);
527 	cerr << " overall alignment rate" << endl;
528 }
529 
530 /**
531  * Return true iff the read in rd1/rd2 matches the last read handled, which
532  * should still be in rd1_/rd2_.
533  */
sameRead(const Read * rd1,const Read * rd2,bool qualitiesMatter)534 bool AlnSinkWrap::sameRead(
535 	// One of the other of rd1, rd2 will = NULL if read is unpaired
536 	const Read* rd1,      // new mate #1
537 	const Read* rd2,      // new mate #2
538 	bool qualitiesMatter) // aln policy distinguishes b/t quals?
539 {
540 	bool same = false;
541 	if(rd1_ != NULL || rd2_ != NULL) {
542 		// This is not the first time the sink was initialized with
543 		// a read.  Check if new read/pair is identical to previous
544 		// read/pair
545 		if((rd1_ == NULL) == (rd1 == NULL) &&
546 		   (rd2_ == NULL) == (rd2 == NULL))
547 		{
548 			bool m1same = (rd1 == NULL && rd1_ == NULL);
549 			if(!m1same) {
550 				assert(rd1 != NULL);
551 				assert(rd1_ != NULL);
552 				m1same = Read::same(
553 					rd1->patFw,  // new seq
554 					rd1->qual,   // new quals
555 					rd1_->patFw, // old seq
556 					rd1_->qual,  // old quals
557 					qualitiesMatter);
558 			}
559 			if(m1same) {
560 				bool m2same = (rd2 == NULL && rd2_ == NULL);
561 				if(!m2same) {
562 					m2same = Read::same(
563 						rd2->patFw,  // new seq
564 						rd2->qual,   // new quals
565 						rd2_->patFw, // old seq
566 						rd2_->qual,  // old quals
567 						qualitiesMatter);
568 				}
569 				same = m2same;
570 			}
571 		}
572 	}
573 	return same;
574 }
575 
576 /**
577  * Initialize the wrapper with a new read pair and return an integer >= -1
578  * indicating which stage the aligner should start at.  If -1 is returned, the
579  * aligner can skip the read entirely.  Checks if the new read pair is
580  * identical to the previous pair.  If it is, then we return the id of the
581  * first stage to run.
582  */
nextRead(const Read * rd1,const Read * rd2,TReadId rdid,bool qualitiesMatter)583 int AlnSinkWrap::nextRead(
584 	// One of the other of rd1, rd2 will = NULL if read is unpaired
585 	const Read* rd1,      // new mate #1
586 	const Read* rd2,      // new mate #2
587 	TReadId rdid,         // read ID for new pair
588 	bool qualitiesMatter) // aln policy distinguishes b/t quals?
589 {
590 	assert(!init_);
591 	assert(rd1 != NULL || rd2 != NULL);
592 	init_ = true;
593 	// Keep copy of new read, so that we can compare it with the
594 	// next one
595 	if(rd1 != NULL) {
596 		rd1_ = rd1;
597 	} else rd1_ = NULL;
598 	if(rd2 != NULL) {
599 		rd2_ = rd2;
600 	} else rd2_ = NULL;
601 	rdid_ = rdid;
602 	// Caller must now align the read
603 	maxed1_ = false;
604 	maxed2_ = false;
605 	maxedOverall_ = false;
606 	bestPair_ = best2Pair_ =
607 	bestUnp1_ = best2Unp1_ =
608 	bestUnp2_ = best2Unp2_ = std::numeric_limits<THitInt>::min();
609 	rs1_.clear();     // clear out paired-end alignments
610 	rs2_.clear();     // clear out paired-end alignments
611 	rs1u_.clear();    // clear out unpaired alignments for mate #1
612 	rs2u_.clear();    // clear out unpaired alignments for mate #2
613 	st_.nextRead(readIsPair()); // reset state
614 	assert(empty());
615 	assert(!maxed());
616 	// Start from the first stage
617 	return 0;
618 }
619 
620 /**
621  * Inform global, shared AlnSink object that we're finished with this read.
622  * The global AlnSink is responsible for updating counters, creating the output
623  * record, and delivering the record to the appropriate output stream.
624  *
625  * What gets reported for a paired-end alignment?
626  *
627  * 1. If there are reportable concordant alignments, report those and stop
628  * 2. If there are reportable discordant alignments, report those and stop
629  * 3. If unpaired alignments can be reported:
630  *    3a. Report
631  #
632  * Update metrics.  Only ambiguity is: what if a pair aligns repetitively and
633  * one of its mates aligns uniquely?
634  *
635  * 	uint64_t al;   // # mates w/ >= 1 reported alignment
636  *  uint64_t unal; // # mates w/ 0 alignments
637  *  uint64_t max;  // # mates withheld for exceeding -M/-m ceiling
638  *  uint64_t al_concord;  // # pairs w/ >= 1 concordant alignment
639  *  uint64_t al_discord;  // # pairs w/ >= 1 discordant alignment
640  *  uint64_t max_concord; // # pairs maxed out
641  *  uint64_t unal_pair;   // # pairs where neither mate aligned
642  */
finishRead(const SeedResults * sr1,const SeedResults * sr2,bool exhaust1,bool exhaust2,bool nfilt1,bool nfilt2,bool scfilt1,bool scfilt2,bool lenfilt1,bool lenfilt2,bool qcfilt1,bool qcfilt2,RandomSource & rnd,ReportingMetrics & met,const PerReadMetrics & prm,const Scoring & sc,bool suppressSeedSummary,bool suppressAlignments,bool scUnMapped,bool xeq)643 void AlnSinkWrap::finishRead(
644 	const SeedResults *sr1,         // seed alignment results for mate 1
645 	const SeedResults *sr2,         // seed alignment results for mate 2
646 	bool               exhaust1,    // mate 1 exhausted?
647 	bool               exhaust2,    // mate 2 exhausted?
648 	bool               nfilt1,      // mate 1 N-filtered?
649 	bool               nfilt2,      // mate 2 N-filtered?
650 	bool               scfilt1,     // mate 1 score-filtered?
651 	bool               scfilt2,     // mate 2 score-filtered?
652 	bool               lenfilt1,    // mate 1 length-filtered?
653 	bool               lenfilt2,    // mate 2 length-filtered?
654 	bool               qcfilt1,     // mate 1 qc-filtered?
655 	bool               qcfilt2,     // mate 2 qc-filtered?
656 	RandomSource&      rnd,         // pseudo-random generator
657 	ReportingMetrics&  met,         // reporting metrics
658 	const PerReadMetrics& prm,      // per-read metrics
659 	const Scoring& sc,              // scoring scheme
660 	bool suppressSeedSummary,       // = true
661 	bool suppressAlignments,        // = false
662 	bool scUnMapped,                // = false
663 	bool xeq)                       // = false
664 {
665 	obuf_.clear();
666 	OutputQueueMark qqm(g_.outq(), obuf_, rdid_, threadid_);
667 	assert(init_);
668 	if(!suppressSeedSummary) {
669 		if(sr1 != NULL) {
670 			assert(rd1_ != NULL);
671 			// Mate exists and has non-empty SeedResults
672 			g_.reportSeedSummary(obuf_, *rd1_, rdid_, threadid_, *sr1, true);
673 		} else if(rd1_ != NULL) {
674 			// Mate exists but has NULL SeedResults
675 			g_.reportEmptySeedSummary(obuf_, *rd1_, rdid_, true);
676 		}
677 		if(sr2 != NULL) {
678 			assert(rd2_ != NULL);
679 			// Mate exists and has non-empty SeedResults
680 			g_.reportSeedSummary(obuf_, *rd2_, rdid_, threadid_, *sr2, true);
681 		} else if(rd2_ != NULL) {
682 			// Mate exists but has NULL SeedResults
683 			g_.reportEmptySeedSummary(obuf_, *rd2_, rdid_, true);
684 		}
685 	}
686 	if(!suppressAlignments) {
687 		// Ask the ReportingState what to report
688 		st_.finish();
689 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
690 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
691 		st_.getReport(
692 			nconcord,
693 			ndiscord,
694 			nunpair1,
695 			nunpair2,
696 			pairMax,
697 			unpair1Max,
698 			unpair2Max);
699 		assert_leq(nconcord, rs1_.size());
700 		assert_leq(nunpair1, rs1u_.size());
701 		assert_leq(nunpair2, rs2u_.size());
702 		assert_leq(ndiscord, 1);
703 		assert_gt(rp_.khits, 0);
704 		assert_gt(rp_.mhits, 0);
705 		assert(!pairMax    || rs1_.size()  >= (uint64_t)rp_.mhits);
706 		assert(!unpair1Max || rs1u_.size() >= (uint64_t)rp_.mhits);
707 		assert(!unpair2Max || rs2u_.size() >= (uint64_t)rp_.mhits);
708 		met.nread++;
709 		if(readIsPair()) {
710 			met.npaired++;
711 		} else {
712 			met.nunpaired++;
713 		}
714 		// Report concordant paired-end alignments if possible
715 		if(nconcord > 0) {
716 			AlnSetSumm concordSumm(
717 				rd1_, rd2_, &rs1_, &rs2_, &rs1u_, &rs2u_,
718 				exhaust1, exhaust2, -1, -1);
719 			// Sort by score then pick from low to high
720 			AlnScore bestUScore, bestP1Score, bestP2Score, bestCScore;
721 			AlnScore bestUDist, bestP1Dist, bestP2Dist, bestCDist;
722 			AlnScore bestUnchosenUScore, bestUnchosenP1Score, bestUnchosenP2Score, bestUnchosenCScore;
723 			AlnScore bestUnchosenUDist, bestUnchosenP1Dist, bestUnchosenP2Dist, bestUnchosenCDist;
724 			// TODO: should probably package these variables up so it's not
725 			// such a pain to pass them around
726 			size_t off = selectByScore(
727 				&rs1_, &rs2_,
728 				nconcord, select1_,
729 				&rs1u_, &rs2u_,
730 				bestUScore,
731 				bestUDist,
732 				bestP1Score,
733 				bestP1Dist,
734 				bestP2Score,
735 				bestP2Dist,
736 				bestCScore,
737 				bestCDist,
738 				bestUnchosenUScore,
739 				bestUnchosenUDist,
740 				bestUnchosenP1Score,
741 				bestUnchosenP1Dist,
742 				bestUnchosenP2Score,
743 				bestUnchosenP2Dist,
744 				bestUnchosenCScore,
745 				bestUnchosenCDist,
746 				rnd);
747 			concordSumm.setBest(
748 				bestUScore,
749 				bestUDist,
750 				bestP1Score,
751 				bestP1Dist,
752 				bestP2Score,
753 				bestP2Dist,
754 				bestCScore,
755 				bestCDist,
756 				bestUnchosenUScore,
757 				bestUnchosenUDist,
758 				bestUnchosenP1Score,
759 				bestUnchosenP1Dist,
760 				bestUnchosenP2Score,
761 				bestUnchosenP2Dist,
762 				bestUnchosenCScore,
763 				bestUnchosenCDist);
764 			assert(concordSumm.bestScore(true).valid());
765 			assert(concordSumm.bestScore(false).valid());
766 			assert_lt(off, rs1_.size());
767 			const AlnRes *rs1 = &rs1_[off];
768 			const AlnRes *rs2 = &rs2_[off];
769 			AlnFlags flags1(
770 				ALN_FLAG_PAIR_CONCORD_MATE1,
771 				st_.params().mhitsSet(),
772 				unpair1Max,
773 				pairMax,
774 				nfilt1,
775 				scfilt1,
776 				lenfilt1,
777 				qcfilt1,
778 				st_.params().mixed,
779 				true,       // primary
780 				true,       // opp aligned
781 				rs2->fw(),  // opp fw
782 				scUnMapped,
783 				xeq);
784 			AlnFlags flags2(
785 				ALN_FLAG_PAIR_CONCORD_MATE2,
786 				st_.params().mhitsSet(),
787 				unpair2Max,
788 				pairMax,
789 				nfilt2,
790 				scfilt2,
791 				lenfilt2,
792 				qcfilt2,
793 				st_.params().mixed,
794 				false,      // primary
795 				true,       // opp aligned
796 				rs1->fw(),  // opp fw
797 				scUnMapped,
798 				xeq);
799 			// Issue: we only set the flags once, but some of the flags might
800 			// vary from pair to pair among the pairs we're reporting.  For
801 			// instance, whether the a given mate aligns to the forward strand.
802 			SeedAlSumm ssm1, ssm2;
803 			sr1->toSeedAlSumm(ssm1);
804 			sr2->toSeedAlSumm(ssm2);
805 			for(size_t i = 0; i < rs1_.size(); i++) {
806 				rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1);
807 				rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2);
808 				assert_eq(abs(rs1_[i].fragmentLength()), abs(rs2_[i].fragmentLength()));
809 			}
810 			assert(!select1_.empty());
811 			g_.reportHits(
812 				obuf_,
813 				staln_,
814 				threadid_,
815 				rd1_,
816 				rd2_,
817 				rdid_,
818 				select1_,
819 				NULL,
820 				&rs1_,
821 				&rs2_,
822 				pairMax,
823 				concordSumm,
824 				ssm1,
825 				ssm2,
826 				&flags1,
827 				&flags2,
828 				prm,
829 				mapq_,
830 				sc,
831 				false);
832 			if(pairMax) {
833 				met.nconcord_rep++;
834 			} else {
835 				met.nconcord_uni++;
836 				assert(!rs1_.empty());
837 				if(rs1_.size() == 1) {
838 					met.nconcord_uni1++;
839 				} else {
840 					met.nconcord_uni2++;
841 				}
842 			}
843 			init_ = false;
844 			//g_.outq().finishRead(obuf_, rdid_, threadid_);
845 			return;
846 		}
847 		// Report disconcordant paired-end alignments if possible
848 		else if(ndiscord > 0) {
849 			ASSERT_ONLY(bool ret =) prepareDiscordants();
850 			assert(ret);
851 			assert_eq(1, rs1_.size());
852 			assert_eq(1, rs2_.size());
853 			AlnSetSumm discordSumm(
854 				rd1_, rd2_, &rs1_, &rs2_, &rs1u_, &rs2u_,
855 				exhaust1, exhaust2, -1, -1);
856 			const AlnRes *rs1 = &rs1_[0];
857 			const AlnRes *rs2 = &rs2_[0];
858 			AlnFlags flags1(
859 				ALN_FLAG_PAIR_DISCORD_MATE1,
860 				st_.params().mhitsSet(),
861 				false,
862 				pairMax,
863 				nfilt1,
864 				scfilt1,
865 				lenfilt1,
866 				qcfilt1,
867 				st_.params().mixed,
868 				true,       // primary
869 				true,       // opp aligned
870 				rs2->fw(),  // opp fw
871 				scUnMapped,
872 				xeq);
873 			AlnFlags flags2(
874 				ALN_FLAG_PAIR_DISCORD_MATE2,
875 				st_.params().mhitsSet(),
876 				false,
877 				pairMax,
878 				nfilt2,
879 				scfilt2,
880 				lenfilt2,
881 				qcfilt2,
882 				st_.params().mixed,
883 				false,      // primary
884 				true,       // opp aligned
885 				rs1->fw(),  // opp fw
886 				scUnMapped,
887 				xeq);
888 			SeedAlSumm ssm1, ssm2;
889 			sr1->toSeedAlSumm(ssm1);
890 			sr2->toSeedAlSumm(ssm2);
891 			for(size_t i = 0; i < rs1_.size(); i++) {
892 				rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1);
893 				rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2);
894 				assert(rs1_[i].isFraglenSet() == rs2_[i].isFraglenSet());
895 				assert(!rs1_[i].isFraglenSet() || abs(rs1_[i].fragmentLength()) == abs(rs2_[i].fragmentLength()));
896 			}
897 			AlnScore bestUScore, bestP1Score, bestP2Score, bestCScore;
898 			AlnScore bestUDist, bestP1Dist, bestP2Dist, bestCDist;
899 			AlnScore bestUnchosenUScore, bestUnchosenP1Score, bestUnchosenP2Score, bestUnchosenCScore;
900 			AlnScore bestUnchosenUDist, bestUnchosenP1Dist, bestUnchosenP2Dist, bestUnchosenCDist;
901 			ASSERT_ONLY(size_t off =) selectByScore(
902 				&rs1_, &rs2_,
903 				ndiscord, select1_,
904 				&rs1u_, &rs2u_,
905 				bestUScore,
906 				bestUDist,
907 				bestP1Score,
908 				bestP1Dist,
909 				bestP2Score,
910 				bestP2Dist,
911 				bestCScore,
912 				bestCDist,
913 				bestUnchosenUScore,
914 				bestUnchosenUDist,
915 				bestUnchosenP1Score,
916 				bestUnchosenP1Dist,
917 				bestUnchosenP2Score,
918 				bestUnchosenP2Dist,
919 				bestUnchosenCScore,
920 				bestUnchosenCDist,
921 				rnd);
922 			discordSumm.setBest(
923 				bestUScore,
924 				bestUDist,
925 				bestP1Score,
926 				bestP1Dist,
927 				bestP2Score,
928 				bestP2Dist,
929 				bestCScore,
930 				bestCDist,
931 				bestUnchosenUScore,
932 				bestUnchosenUDist,
933 				bestUnchosenP1Score,
934 				bestUnchosenP1Dist,
935 				bestUnchosenP2Score,
936 				bestUnchosenP2Dist,
937 				bestUnchosenCScore,
938 				bestUnchosenCDist);
939 			assert_eq(0, off);
940 			assert(!select1_.empty());
941 			g_.reportHits(
942 				obuf_,
943 				staln_,
944 				threadid_,
945 				rd1_,
946 				rd2_,
947 				rdid_,
948 				select1_,
949 				NULL,
950 				&rs1_,
951 				&rs2_,
952 				pairMax,
953 				discordSumm,
954 				ssm1,
955 				ssm2,
956 				&flags1,
957 				&flags2,
958 				prm,
959 				mapq_,
960 				sc,
961 				false);
962 			met.nconcord_0++;
963 			met.ndiscord++;
964 			init_ = false;
965 			//g_.outq().finishRead(obuf_, rdid_, threadid_);
966 			return;
967 		}
968 		// If we're at this point, at least one mate failed to align.
969 		// BTL: That's not true.  It could be that there are no concordant
970 		// alignments but both mates have unpaired alignments, with one of
971 		// the mates having more than one.
972 		//assert(nunpair1 == 0 || nunpair2 == 0);
973 		assert(!pairMax);
974 
975 		// Update counters given that one mate didn't align
976 		if(readIsPair()) {
977 			met.nconcord_0++;
978 		}
979 		if(rd1_ != NULL) {
980 			if(nunpair1 > 0) {
981 				// Update counters
982 				if(readIsPair()) {
983 					if(unpair1Max) met.nunp_0_rep++;
984 					else {
985 						met.nunp_0_uni++;
986 						assert(!rs1u_.empty());
987 						if(rs1u_.size() == 1) {
988 							met.nunp_0_uni1++;
989 						} else {
990 							met.nunp_0_uni2++;
991 						}
992 					}
993 				} else {
994 					if(unpair1Max) met.nunp_rep++;
995 					else {
996 						met.nunp_uni++;
997 						assert(!rs1u_.empty());
998 						if(rs1u_.size() == 1) {
999 							met.nunp_uni1++;
1000 						} else {
1001 							met.nunp_uni2++;
1002 						}
1003 					}
1004 				}
1005 			} else if(unpair1Max) {
1006 				// Update counters
1007 				if(readIsPair())   met.nunp_0_rep++;
1008 				else               met.nunp_rep++;
1009 			} else {
1010 				// Update counters
1011 				if(readIsPair())   met.nunp_0_0++;
1012 				else               met.nunp_0++;
1013 			}
1014 		}
1015 		if(rd2_ != NULL) {
1016 			if(nunpair2 > 0) {
1017 				// Update counters
1018 				if(readIsPair()) {
1019 					if(unpair2Max) met.nunp_0_rep++;
1020 					else {
1021 						assert(!rs2u_.empty());
1022 						met.nunp_0_uni++;
1023 						if(rs2u_.size() == 1) {
1024 							met.nunp_0_uni1++;
1025 						} else {
1026 							met.nunp_0_uni2++;
1027 						}
1028 					}
1029 				} else {
1030 					if(unpair2Max) met.nunp_rep++;
1031 					else {
1032 						assert(!rs2u_.empty());
1033 						met.nunp_uni++;
1034 						if(rs2u_.size() == 1) {
1035 							met.nunp_uni1++;
1036 						} else {
1037 							met.nunp_uni2++;
1038 						}
1039 					}
1040 				}
1041 			} else if(unpair2Max) {
1042 				// Update counters
1043 				if(readIsPair())   met.nunp_0_rep++;
1044 				else               met.nunp_rep++;
1045 			} else {
1046 				// Update counters
1047 				if(readIsPair())   met.nunp_0_0++;
1048 				else               met.nunp_0++;
1049 			}
1050 		}
1051 
1052 		const AlnRes *repRs1 = NULL, *repRs2 = NULL;
1053 		AlnSetSumm summ1, summ2;
1054 		AlnFlags flags1, flags2;
1055 		TRefId refid = -1; TRefOff refoff = -1;
1056 		bool rep1 = rd1_ != NULL && nunpair1 > 0;
1057 		bool rep2 = rd2_ != NULL && nunpair2 > 0;
1058 
1059 		// This is the preliminary if statement for mate 1 - here we're
1060 		// gathering some preliminary information, making it possible to call
1061 		// g_.reportHits(...) with information about both mates potentially
1062 		if(rep1) {
1063 			// Mate 1 aligned at least once
1064 			summ1.init(
1065 				rd1_, NULL, NULL, NULL, &rs1u_, NULL,
1066 				exhaust1, exhaust2, -1, -1);
1067 			// Sort by score then pick from low to high
1068 			AlnScore bestUScore, bestP1Score, bestP2Score, bestCScore;
1069 			AlnScore bestUDist, bestP1Dist, bestP2Dist, bestCDist;
1070 			AlnScore bestUnchosenUScore, bestUnchosenP1Score, bestUnchosenP2Score, bestUnchosenCScore;
1071 			AlnScore bestUnchosenUDist, bestUnchosenP1Dist, bestUnchosenP2Dist, bestUnchosenCDist;
1072 			size_t off = selectByScore(
1073 				&rs1u_, NULL, nunpair1, select1_, NULL, NULL,
1074 				bestUScore,
1075 				bestUDist,
1076 				bestP1Score,
1077 				bestP1Dist,
1078 				bestP2Score,
1079 				bestP2Dist,
1080 				bestCScore,
1081 				bestCDist,
1082 				bestUnchosenUScore,
1083 				bestUnchosenUDist,
1084 				bestUnchosenP1Score,
1085 				bestUnchosenP1Dist,
1086 				bestUnchosenP2Score,
1087 				bestUnchosenP2Dist,
1088 				bestUnchosenCScore,
1089 				bestUnchosenCDist,
1090 				rnd);
1091 			summ1.setBest(
1092 				bestUScore,
1093 				bestUDist,
1094 				bestP1Score,
1095 				bestP1Dist,
1096 				bestP2Score,
1097 				bestP2Dist,
1098 				bestCScore,
1099 				bestCDist,
1100 				bestUnchosenUScore,
1101 				bestUnchosenUDist,
1102 				bestUnchosenP1Score,
1103 				bestUnchosenP1Dist,
1104 				bestUnchosenP2Score,
1105 				bestUnchosenP2Dist,
1106 				bestUnchosenCScore,
1107 				bestUnchosenCDist);
1108 			repRs1 = &rs1u_[off];
1109 		} else if(rd1_ != NULL) {
1110 			// Mate 1 failed to align - don't do anything yet.  First we want
1111 			// to collect information on mate 2 in case that factors into the
1112 			// summary
1113 			assert(!unpair1Max);
1114 		}
1115 
1116 		if(rep2) {
1117 			summ2.init(
1118 				NULL, rd2_, NULL, NULL, NULL, &rs2u_,
1119 				exhaust1, exhaust2, -1, -1);
1120 			// Sort by score then pick from low to high
1121 			AlnScore bestUScore, bestP1Score, bestP2Score, bestCScore;
1122 			AlnScore bestUDist, bestP1Dist, bestP2Dist, bestCDist;
1123 			AlnScore bestUnchosenUScore, bestUnchosenP1Score, bestUnchosenP2Score, bestUnchosenCScore;
1124 			AlnScore bestUnchosenUDist, bestUnchosenP1Dist, bestUnchosenP2Dist, bestUnchosenCDist;
1125 			size_t off = selectByScore(
1126 				&rs2u_, NULL, nunpair2, select2_, NULL, NULL,
1127 				bestUScore,
1128 				bestUDist,
1129 				bestP1Score,
1130 				bestP1Dist,
1131 				bestP2Score,
1132 				bestP2Dist,
1133 				bestCScore,
1134 				bestCDist,
1135 				bestUnchosenUScore,
1136 				bestUnchosenUDist,
1137 				bestUnchosenP1Score,
1138 				bestUnchosenP1Dist,
1139 				bestUnchosenP2Score,
1140 				bestUnchosenP2Dist,
1141 				bestUnchosenCScore,
1142 				bestUnchosenCDist,
1143 				rnd);
1144 			summ2.setBest(
1145 				bestUScore,
1146 				bestUDist,
1147 				bestP1Score,
1148 				bestP1Dist,
1149 				bestP2Score,
1150 				bestP2Dist,
1151 				bestCScore,
1152 				bestCDist,
1153 				bestUnchosenUScore,
1154 				bestUnchosenUDist,
1155 				bestUnchosenP1Score,
1156 				bestUnchosenP1Dist,
1157 				bestUnchosenP2Score,
1158 				bestUnchosenP2Dist,
1159 				bestUnchosenCScore,
1160 				bestUnchosenCDist);
1161 			repRs2 = &rs2u_[off];
1162 		} else if(rd2_ != NULL) {
1163 			// Mate 2 failed to align - don't do anything yet.  First we want
1164 			// to collect information on mate 1 in case that factors into the
1165 			// summary
1166 			assert(!unpair2Max);
1167 		}
1168 
1169 		// Now set up flags
1170 		if(rep1) {
1171 			// Initialize flags.  Note: We want to have information about how
1172 			// the other mate aligned (if it did) at this point
1173 			flags1.init(
1174 				readIsPair() ?
1175 					ALN_FLAG_PAIR_UNPAIRED_MATE1 :
1176 					ALN_FLAG_PAIR_UNPAIRED,
1177 				st_.params().mhitsSet(),
1178 				unpair1Max,
1179 				pairMax,
1180 				nfilt1,
1181 				scfilt1,
1182 				lenfilt1,
1183 				qcfilt1,
1184 				st_.params().mixed,
1185 				true,   // primary
1186 				repRs2 != NULL,                    // opp aligned
1187 				repRs2 == NULL || repRs2->fw(),    // opp fw
1188 				scUnMapped,
1189 				xeq);
1190 			for(size_t i = 0; i < rs1u_.size(); i++) {
1191 				rs1u_[i].setMateParams(ALN_RES_TYPE_UNPAIRED_MATE1, NULL, flags1);
1192 			}
1193 		}
1194 		if(rep2) {
1195 			// Initialize flags.  Note: We want to have information about how
1196 			// the other mate aligned (if it did) at this point
1197 			flags2.init(
1198 				readIsPair() ?
1199 					ALN_FLAG_PAIR_UNPAIRED_MATE2 :
1200 					ALN_FLAG_PAIR_UNPAIRED,
1201 				st_.params().mhitsSet(),
1202 				unpair2Max,
1203 				pairMax,
1204 				nfilt2,
1205 				scfilt2,
1206 				lenfilt2,
1207 				qcfilt2,
1208 				st_.params().mixed,
1209 				true,   // primary
1210 				repRs1 != NULL,                  // opp aligned
1211 				repRs1 == NULL || repRs1->fw(),  // opp fw
1212 				scUnMapped,
1213 				xeq);
1214 			for(size_t i = 0; i < rs2u_.size(); i++) {
1215 				rs2u_[i].setMateParams(ALN_RES_TYPE_UNPAIRED_MATE2, NULL, flags2);
1216 			}
1217 		}
1218 
1219 		// Now report mate 1
1220 		if(rep1) {
1221 			SeedAlSumm ssm1, ssm2;
1222 			if(sr1 != NULL) sr1->toSeedAlSumm(ssm1);
1223 			if(sr2 != NULL) sr2->toSeedAlSumm(ssm2);
1224 			assert(!select1_.empty());
1225 			g_.reportHits(
1226 				obuf_,
1227 				staln_,
1228 				threadid_,
1229 				rd1_,
1230 				repRs2 != NULL ? rd2_ : NULL,
1231 				rdid_,
1232 				select1_,
1233 				repRs2 != NULL ? &select2_ : NULL,
1234 				&rs1u_,
1235 				repRs2 != NULL ? &rs2u_ : NULL,
1236 				unpair1Max,
1237 				summ1,
1238 				ssm1,
1239 				ssm2,
1240 				&flags1,
1241 				repRs2 != NULL ? &flags2 : NULL,
1242 				prm,
1243 				mapq_,
1244 				sc,
1245 				false);
1246 			assert_lt(select1_[0], rs1u_.size());
1247 			refid = rs1u_[select1_[0]].refid();
1248 			refoff = rs1u_[select1_[0]].refoff();
1249 		}
1250 
1251 		// Now report mate 2
1252 		//if(rep2 && !rep1) {
1253 		if(rep2) {
1254 			SeedAlSumm ssm1, ssm2;
1255 			if(sr1 != NULL) sr1->toSeedAlSumm(ssm1);
1256 			if(sr2 != NULL) sr2->toSeedAlSumm(ssm2);
1257 			assert(!select2_.empty());
1258 			g_.reportHits(
1259 				obuf_,
1260 				staln_,
1261 				threadid_,
1262 				rd2_,
1263 				repRs1 != NULL ? rd1_ : NULL,
1264 				rdid_,
1265 				select2_,
1266 				repRs1 != NULL ? &select1_ : NULL,
1267 				&rs2u_,
1268 				repRs1 != NULL ? &rs1u_ : NULL,
1269 				unpair2Max,
1270 				summ2,
1271 				ssm1,
1272 				ssm2,
1273 				&flags2,
1274 				repRs1 != NULL ? &flags1 : NULL,
1275 				prm,
1276 				mapq_,
1277 				sc,
1278 				false);
1279 			assert_lt(select2_[0], rs2u_.size());
1280 			refid = rs2u_[select2_[0]].refid();
1281 			refoff = rs2u_[select2_[0]].refoff();
1282 		}
1283 
1284 		if(rd1_ != NULL && nunpair1 == 0) {
1285 			if(nunpair2 > 0) {
1286 				assert_neq(-1, refid);
1287 				summ1.init(
1288 					rd1_, NULL, NULL, NULL, NULL, NULL,
1289 					exhaust1, exhaust2, refid, refoff);
1290 			} else {
1291 				summ1.init(
1292 					rd1_, NULL, NULL, NULL, NULL, NULL,
1293 					exhaust1, exhaust2, -1, -1);
1294 			}
1295 			SeedAlSumm ssm1, ssm2;
1296 			if(sr1 != NULL) sr1->toSeedAlSumm(ssm1);
1297 			if(sr2 != NULL) sr2->toSeedAlSumm(ssm2);
1298 			flags1.init(
1299 				readIsPair() ?
1300 					ALN_FLAG_PAIR_UNPAIRED_MATE1 :
1301 					ALN_FLAG_PAIR_UNPAIRED,
1302 				st_.params().mhitsSet(),
1303 				false,
1304 				false,
1305 				nfilt1,
1306 				scfilt1,
1307 				lenfilt1,
1308 				qcfilt1,
1309 				st_.params().mixed,
1310 				true,           // primary
1311 				repRs2 != NULL, // opp aligned
1312 				(repRs2 != NULL) ? repRs2->fw() : false, // opp fw
1313 				scUnMapped,
1314 				xeq);
1315 			g_.reportUnaligned(
1316 				obuf_,      // string to write output to
1317 				staln_,
1318 				threadid_,
1319 				rd1_,    // read 1
1320 				NULL,    // read 2
1321 				rdid_,   // read id
1322 				summ1,   // summ
1323 				ssm1,    //
1324 				ssm2,
1325 				&flags1, // flags 1
1326 				NULL,    // flags 2
1327 				prm,     // per-read metrics
1328 				mapq_,   // MAPQ calculator
1329 				sc,      // scoring scheme
1330 				true);   // get lock?
1331 		}
1332 		if(rd2_ != NULL && nunpair2 == 0) {
1333 			if(nunpair1 > 0) {
1334 				assert_neq(-1, refid);
1335 				summ2.init(
1336 					NULL, rd2_, NULL, NULL, NULL, NULL,
1337 					exhaust1, exhaust2, refid, refoff);
1338 			} else {
1339 				summ2.init(
1340 					NULL, rd2_, NULL, NULL, NULL, NULL,
1341 					exhaust1, exhaust2, -1, -1);
1342 			}
1343 			SeedAlSumm ssm1, ssm2;
1344 			if(sr1 != NULL) sr1->toSeedAlSumm(ssm1);
1345 			if(sr2 != NULL) sr2->toSeedAlSumm(ssm2);
1346 			flags2.init(
1347 				readIsPair() ?
1348 					ALN_FLAG_PAIR_UNPAIRED_MATE2 :
1349 					ALN_FLAG_PAIR_UNPAIRED,
1350 				st_.params().mhitsSet(),
1351 				false,
1352 				false,
1353 				nfilt2,
1354 				scfilt2,
1355 				lenfilt2,
1356 				qcfilt2,
1357 				st_.params().mixed,
1358 				true,           // primary
1359 				repRs1 != NULL, // opp aligned
1360 				(repRs1 != NULL) ? repRs1->fw() : false, // opp fw
1361 				scUnMapped,
1362 				xeq);
1363 			g_.reportUnaligned(
1364 				obuf_,      // string to write output to
1365 				staln_,
1366 				threadid_,
1367 				rd2_,    // read 1
1368 				NULL,    // read 2
1369 				rdid_,   // read id
1370 				summ2,   // summ
1371 				ssm1,
1372 				ssm2,
1373 				&flags2, // flags 1
1374 				NULL,    // flags 2
1375 				prm,     // per-read metrics
1376 				mapq_,   // MAPQ calculator
1377 				sc,      // scoring scheme
1378 				true);   // get lock?
1379 		}
1380 	} // if(suppress alignments)
1381 	init_ = false;
1382 	return;
1383 }
1384 
1385 /**
1386  * Called by the aligner when a new unpaired or paired alignment is
1387  * discovered in the given stage.  This function checks whether the
1388  * addition of this alignment causes the reporting policy to be
1389  * violated (by meeting or exceeding the limits set by -k, -m, -M),
1390  * in which case true is returned immediately and the aligner is
1391  * short circuited.  Otherwise, the alignment is tallied and false
1392  * is returned.
1393  */
report(int stage,const AlnRes * rs1,const AlnRes * rs2)1394 bool AlnSinkWrap::report(
1395 	int stage,
1396 	const AlnRes* rs1,
1397 	const AlnRes* rs2)
1398 {
1399 	assert(init_);
1400 	assert(rs1 != NULL || rs2 != NULL);
1401 	assert(rs1 == NULL || !rs1->empty());
1402 	assert(rs2 == NULL || !rs2->empty());
1403 	assert(rs1 == NULL || rs1->repOk());
1404 	assert(rs2 == NULL || rs2->repOk());
1405 	bool paired = (rs1 != NULL && rs2 != NULL);
1406 	bool one = (rs1 != NULL);
1407 	const AlnRes* rsa = one ? rs1 : rs2;
1408 	const AlnRes* rsb = one ? rs2 : rs1;
1409 	if(paired) {
1410 		assert(readIsPair());
1411 		st_.foundConcordant();
1412 		rs1_.push_back(*rs1);
1413 		rs2_.push_back(*rs2);
1414 	} else {
1415 		st_.foundUnpaired(one);
1416 		if(one) {
1417 			rs1u_.push_back(*rs1);
1418 		} else {
1419 			rs2u_.push_back(*rs2);
1420 		}
1421 	}
1422 	// Tally overall alignment score
1423 	TAlScore score = rsa->score().score();
1424 	if(rsb != NULL) score += rsb->score().score();
1425 	// Update best score so far
1426 	if(paired) {
1427 		if(score > bestPair_) {
1428 			best2Pair_ = bestPair_;
1429 			bestPair_ = score;
1430 		} else if(score > best2Pair_) {
1431 			best2Pair_ = score;
1432 		}
1433 	} else {
1434 		if(one) {
1435 			if(score > bestUnp1_) {
1436 				best2Unp1_ = bestUnp1_;
1437 				bestUnp1_ = score;
1438 			} else if(score > best2Unp1_) {
1439 				best2Unp1_ = score;
1440 			}
1441 		} else {
1442 			if(score > bestUnp2_) {
1443 				best2Unp2_ = bestUnp2_;
1444 				bestUnp2_ = score;
1445 			} else if(score > best2Unp2_) {
1446 				best2Unp2_ = score;
1447 			}
1448 		}
1449 	}
1450 	return st_.done();
1451 }
1452 
1453 /**
1454  * If there is a configuration of unpaired alignments that fits our
1455  * criteria for there being one or more discordant alignments, then
1456  * shift the discordant alignments over to the rs1_/rs2_ lists, clear the
1457  * rs1u_/rs2u_ lists and return true.  Otherwise, return false.
1458  */
prepareDiscordants()1459 bool AlnSinkWrap::prepareDiscordants() {
1460 	if(rs1u_.size() == 1 && rs2u_.size() == 1) {
1461 		assert(rs1_.empty());
1462 		assert(rs2_.empty());
1463 		rs1_.push_back(rs1u_[0]);
1464 		rs2_.push_back(rs2u_[0]);
1465 		return true;
1466 	}
1467 	return false;
1468 }
1469 
1470 /**
1471  * rs1 (possibly together with rs2 if reads are paired) are populated with
1472  * alignments.  Here we prioritize them according to alignment score, and
1473  * some randomness to break ties.  Priorities are returned in the 'select'
1474  * list.
1475  */
selectByScore(const EList<AlnRes> * rs1,const EList<AlnRes> * rs2,uint64_t num,EList<size_t> & select,const EList<AlnRes> * rs1u,const EList<AlnRes> * rs2u,AlnScore & bestUScore,AlnScore & bestUDist,AlnScore & bestP1Score,AlnScore & bestP1Dist,AlnScore & bestP2Score,AlnScore & bestP2Dist,AlnScore & bestCScore,AlnScore & bestCDist,AlnScore & bestUnchosenUScore,AlnScore & bestUnchosenUDist,AlnScore & bestUnchosenP1Score,AlnScore & bestUnchosenP1Dist,AlnScore & bestUnchosenP2Score,AlnScore & bestUnchosenP2Dist,AlnScore & bestUnchosenCScore,AlnScore & bestUnchosenCDist,RandomSource & rnd) const1476 size_t AlnSinkWrap::selectByScore(
1477 	const EList<AlnRes>* rs1,    // alignments to select from (mate 1)
1478 	const EList<AlnRes>* rs2,    // alignments to select from (mate 2, or NULL)
1479 	uint64_t             num,    // number of alignments to select
1480 	EList<size_t>&       select, // prioritized list to put results in
1481 	const EList<AlnRes>* rs1u,   // alignments to select from (mate 1)
1482 	const EList<AlnRes>* rs2u,   // alignments to select from (mate 2, or NULL)
1483 	AlnScore&            bestUScore,
1484 	AlnScore&            bestUDist,
1485 	AlnScore&            bestP1Score,
1486 	AlnScore&            bestP1Dist,
1487 	AlnScore&            bestP2Score,
1488 	AlnScore&            bestP2Dist,
1489 	AlnScore&            bestCScore,
1490 	AlnScore&            bestCDist,
1491 	AlnScore&            bestUnchosenUScore,
1492 	AlnScore&            bestUnchosenUDist,
1493 	AlnScore&            bestUnchosenP1Score,
1494 	AlnScore&            bestUnchosenP1Dist,
1495 	AlnScore&            bestUnchosenP2Score,
1496 	AlnScore&            bestUnchosenP2Dist,
1497 	AlnScore&            bestUnchosenCScore,
1498 	AlnScore&            bestUnchosenCDist,
1499 	RandomSource&        rnd)
1500 	const
1501 {
1502 	assert(init_);
1503 	assert(repOk());
1504 	assert_gt(num, 0);
1505 	assert(rs1 != NULL);
1506 	assert(rs2 == NULL || rs1u != NULL);
1507 	assert(rs2 == NULL || rs2u != NULL);
1508 
1509 	bestUScore.invalidate();
1510 	bestUDist.invalidate();
1511 	bestUnchosenUScore.invalidate();
1512 	bestUnchosenUDist.invalidate();
1513 
1514 	bestCScore.invalidate();
1515 	bestP1Score.invalidate();
1516 	bestP2Score.invalidate();
1517 	bestCDist.invalidate();
1518 	bestP1Dist.invalidate();
1519 	bestP2Dist.invalidate();
1520 	bestUnchosenCScore.invalidate();
1521 	bestUnchosenP1Score.invalidate();
1522 	bestUnchosenP2Score.invalidate();
1523 	bestUnchosenCDist.invalidate();
1524 	bestUnchosenP1Dist.invalidate();
1525 	bestUnchosenP2Dist.invalidate();
1526 
1527 	size_t sz = rs1->size(); // sz = # alignments found
1528 	assert_leq(num, sz);
1529 	if(sz < num) {
1530 		num = sz;
1531 	}
1532 	// num = # to select
1533 	if(sz == 0) {
1534 		return 0;
1535 	}
1536 	select.resize((size_t)num);
1537 	// Use 'selectBuf_' as a temporary list for sorting purposes
1538 	EList<std::pair<AlnScore, size_t> >& buf =
1539 		const_cast<EList<std::pair<AlnScore, size_t> >& >(selectBuf_);
1540 	buf.resize(sz);
1541 	// Sort by score.  If reads are pairs, sort by sum of mate scores.
1542 	for(size_t i = 0; i < sz; i++) {
1543 		buf[i].first = (*rs1)[i].score();
1544 		if(rs2 != NULL) {
1545 			buf[i].first += (*rs2)[i].score();
1546 		}
1547 		buf[i].second = i; // original offset
1548 	}
1549 	buf.sort(); buf.reverse(); // sort in descending order by score
1550 
1551 	// Randomize streaks of alignments that are equal by score
1552 	size_t streak = 0;
1553 	for(size_t i = 1; i < buf.size(); i++) {
1554 		if(buf[i].first == buf[i-1].first) {
1555 			if(streak == 0) { streak = 1; }
1556 			streak++;
1557 		} else {
1558 			if(streak > 1) {
1559 				assert_geq(i, streak);
1560 				buf.shufflePortion(i-streak, streak, rnd);
1561 			}
1562 			streak = 0;
1563 		}
1564 	}
1565 	if(streak > 1) {
1566 		buf.shufflePortion(buf.size() - streak, streak, rnd);
1567 	}
1568 
1569 	// Copy the permutation into the 'select' list
1570 	for(size_t i = 0; i < num; i++) { select[i] = buf[i].second; }
1571 
1572 	if(rs2 == NULL) {
1573 		bestUScore = bestUDist = (*rs1)[select[0]].score();
1574 	}
1575 
1576 	// For paired-end read, find best alignment score among end
1577 	// alignments not chosen, for both ends
1578 	if(rs2 != NULL) {
1579 		bestCScore = bestCDist = (*rs1)[select[0]].score() + (*rs2)[select[0]].score();
1580 		bestP1Score = bestP1Dist = (*rs1)[select[0]].score();
1581 		bestP2Score = bestP2Dist = (*rs2)[select[0]].score();
1582 		for(size_t i = 0; i < rs1u->size(); i++) {
1583 			if((*rs1u)[i].refcoord() == (*rs1)[select[0]].refcoord()) {
1584 				continue;
1585 			}
1586 			if((*rs1u)[i].score() > bestUnchosenP1Score) {
1587 				bestUnchosenP1Score = (*rs1u)[i].score();
1588 			}
1589 			if((*rs1u)[i].score().basesAligned() > bestUnchosenP1Dist.basesAligned()) {
1590 				bestUnchosenP1Dist = (*rs1u)[i].score();
1591 			}
1592 		}
1593 		for(size_t i = 0; i < rs2u->size(); i++) {
1594 			if((*rs2u)[i].refcoord() == (*rs2)[select[0]].refcoord()) {
1595 				continue;
1596 			}
1597 			if((*rs2u)[i].score() > bestUnchosenP2Score) {
1598 				bestUnchosenP2Score = (*rs2u)[i].score();
1599 			}
1600 			if((*rs2u)[i].score().basesAligned() > bestUnchosenP2Dist.basesAligned()) {
1601 				bestUnchosenP2Dist = (*rs2u)[i].score();
1602 			}
1603 		}
1604 		if(buf.size() > 1) {
1605 			bestUnchosenCScore = buf[1].first;
1606 			for(size_t i = 1; i < buf.size(); i++) {
1607 				AlnScore dist = (*rs1)[buf[i].second].score() +
1608 				                (*rs2)[buf[i].second].score();
1609 				if(dist.basesAligned() > bestUnchosenCDist.basesAligned()) {
1610 					bestUnchosenCDist = dist;
1611 				}
1612 			}
1613 		}
1614 	} else if(buf.size() > 1) {
1615 		bestUnchosenUScore = (*rs1)[buf[1].second].score();
1616 		for(size_t i = 1; i < buf.size(); i++) {
1617 			if((*rs1)[buf[1].second].score().basesAligned() > bestUnchosenUDist.basesAligned()) {
1618 				bestUnchosenUDist = (*rs1)[buf[1].second].score();
1619 			}
1620 		}
1621 	}
1622 
1623 	// Returns index of the representative alignment, but in 'select' also
1624 	// returns the indexes of the next best selected alignments in order by
1625 	// score.
1626 	return selectBuf_[0].second;
1627 }
1628 
1629 /**
1630  * Given that rs is already populated with alignments, consider the
1631  * alignment policy and make random selections where necessary.  E.g. if we
1632  * found 10 alignments and the policy is -k 2 -m 20, select 2 alignments at
1633  * random.  We "select" an alignment by setting the parallel entry in the
1634  * 'select' list to true.
1635  *
1636  * Return the "representative" alignment.  This is simply the first one
1637  * selected.  That will also be what SAM calls the "primary" alignment.
1638  */
selectAlnsToReport(const EList<AlnRes> & rs,uint64_t num,EList<size_t> & select,RandomSource & rnd) const1639 size_t AlnSinkWrap::selectAlnsToReport(
1640 	const EList<AlnRes>& rs,     // alignments to select from
1641 	uint64_t             num,    // number of alignments to select
1642 	EList<size_t>&       select, // list to put results in
1643 	RandomSource&        rnd)
1644 	const
1645 {
1646 	assert(init_);
1647 	assert(repOk());
1648 	assert_gt(num, 0);
1649 	size_t sz = rs.size();
1650 	if(sz < num) {
1651 		num = sz;
1652 	}
1653 	if(sz < 1) {
1654 		return 0;
1655 	}
1656 	select.resize((size_t)num);
1657 	if(sz == 1) {
1658 		assert_eq(1, num);
1659 		select[0] = 0;
1660 		return 0;
1661 	}
1662 	// Select a random offset into the list of alignments
1663 	uint32_t off = rnd.nextU32() % (uint32_t)sz;
1664 	uint32_t offOrig = off;
1665 	// Now take elements starting at that offset, wrapping around to 0 if
1666 	// necessary.  Leave the rest.
1667 	for(size_t i = 0; i < num; i++) {
1668 		select[i] = off;
1669 		off++;
1670 		if(off == sz) {
1671 			off = 0;
1672 		}
1673 	}
1674 	return offOrig;
1675 }
1676 
1677 #define NOT_SUPPRESSED !suppress_[field++]
1678 #define BEGIN_FIELD { \
1679 	if(firstfield) firstfield = false; \
1680 	else o.append('\t'); \
1681 }
1682 #define WRITE_TAB { \
1683 	if(firstfield) firstfield = false; \
1684 	else o.append('\t'); \
1685 }
1686 #define WRITE_NUM(o, x) { \
1687 	itoa10(x, buf); \
1688 	o.append(buf); \
1689 }
1690 
1691 /**
1692  * Print a seed summary to the first output stream in the outs_ list.
1693  */
reportSeedSummary(BTString & o,const Read & rd,TReadId rdid,size_t threadId,const SeedResults & rs,bool getLock)1694 void AlnSink::reportSeedSummary(
1695 	BTString&          o,
1696 	const Read&        rd,
1697 	TReadId            rdid,
1698 	size_t             threadId,
1699 	const SeedResults& rs,
1700 	bool               getLock)
1701 {
1702 	appendSeedSummary(
1703 		o,                     // string to write to
1704 		rd,                    // read
1705 		rdid,                  // read id
1706 		rs.numOffs()*2,        // # seeds tried
1707 		rs.nonzeroOffsets(),   // # seeds with non-empty results
1708 		rs.numRanges(),        // # ranges for all seed hits
1709 		rs.numElts(),          // # elements for all seed hits
1710 		rs.numOffs(),          // # seeds tried from fw read
1711 		rs.nonzeroOffsetsFw(), // # seeds with non-empty results from fw read
1712 		rs.numRangesFw(),      // # ranges for seed hits from fw read
1713 		rs.numEltsFw(),        // # elements for seed hits from fw read
1714 		rs.numOffs(),          // # seeds tried from rc read
1715 		rs.nonzeroOffsetsRc(), // # seeds with non-empty results from fw read
1716 		rs.numRangesRc(),      // # ranges for seed hits from fw read
1717 		rs.numEltsRc());       // # elements for seed hits from fw read
1718 }
1719 
1720 /**
1721  * Print an empty seed summary to the first output stream in the outs_ list.
1722  */
reportEmptySeedSummary(BTString & o,const Read & rd,TReadId rdid,size_t threadId,bool getLock)1723 void AlnSink::reportEmptySeedSummary(
1724 	BTString&          o,
1725 	const Read&        rd,
1726 	TReadId            rdid,
1727 	size_t             threadId,
1728 	bool               getLock)
1729 {
1730 	appendSeedSummary(
1731 		o,                     // string to append to
1732 		rd,                    // read
1733 		rdid,                  // read id
1734 		0,                     // # seeds tried
1735 		0,                     // # seeds with non-empty results
1736 		0,                     // # ranges for all seed hits
1737 		0,                     // # elements for all seed hits
1738 		0,                     // # seeds tried from fw read
1739 		0,                     // # seeds with non-empty results from fw read
1740 		0,                     // # ranges for seed hits from fw read
1741 		0,                     // # elements for seed hits from fw read
1742 		0,                     // # seeds tried from rc read
1743 		0,                     // # seeds with non-empty results from fw read
1744 		0,                     // # ranges for seed hits from fw read
1745 		0);                    // # elements for seed hits from fw read
1746 }
1747 
1748 /**
1749  * Print the given string.  If ws = true, print only up to and not
1750  * including the first space or tab.  Useful for printing reference
1751  * names.
1752  */
1753 template<typename T>
printUptoWs(BTString & s,const T & str,bool chopws)1754 static inline void printUptoWs(
1755 	BTString& s,
1756 	const T& str,
1757 	bool chopws)
1758 {
1759 	size_t len = str.length();
1760 	for(size_t i = 0; i < len; i++) {
1761 		if(!chopws || (str[i] != ' ' && str[i] != '\t')) {
1762 			s.append(str[i]);
1763 		} else {
1764 			break;
1765 		}
1766 	}
1767 }
1768 
1769 /**
1770  * Append a batch of unresolved seed alignment summary results (i.e.
1771  * seed alignments where all we know is the reference sequence aligned
1772  * to and its SA range, not where it falls in the reference
1773  * sequence) to the given output stream in Bowtie's seed-sumamry
1774  * verbose-mode format.
1775  *
1776  * The seed summary format is:
1777  *
1778  *  - One line per read
1779  *  - A typical line consists of a set of tab-delimited fields:
1780  *
1781  *    1. Read name
1782  *    2. Total number of seeds extracted from the read
1783  *    3. Total number of seeds that aligned to the reference at
1784  *       least once (always <= field 2)
1785  *    4. Total number of distinct BW ranges found in all seed hits
1786  *       (always >= field 3)
1787  *    5. Total number of distinct BW elements found in all seed
1788  *       hits (always >= field 4)
1789  *    6-9.:   Like 2-5. but just for seeds extracted from the
1790  *            forward representation of the read
1791  *    10-13.: Like 2-5. but just for seeds extracted from the
1792  *            reverse-complement representation of the read
1793  *
1794  *    Note that fields 6 and 10 should add to field 2, 7 and 11
1795  *    should add to 3, etc.
1796  *
1797  *  - Lines for reads that are filtered out for any reason (e.g. too
1798  *    many Ns) have columns 2 through 13 set to 0.
1799  */
appendSeedSummary(BTString & o,const Read & rd,const TReadId rdid,size_t seedsTried,size_t nonzero,size_t ranges,size_t elts,size_t seedsTriedFw,size_t nonzeroFw,size_t rangesFw,size_t eltsFw,size_t seedsTriedRc,size_t nonzeroRc,size_t rangesRc,size_t eltsRc)1800 void AlnSink::appendSeedSummary(
1801 	BTString&     o,
1802 	const Read&   rd,
1803 	const TReadId rdid,
1804 	size_t        seedsTried,
1805 	size_t        nonzero,
1806 	size_t        ranges,
1807 	size_t        elts,
1808 	size_t        seedsTriedFw,
1809 	size_t        nonzeroFw,
1810 	size_t        rangesFw,
1811 	size_t        eltsFw,
1812 	size_t        seedsTriedRc,
1813 	size_t        nonzeroRc,
1814 	size_t        rangesRc,
1815 	size_t        eltsRc)
1816 {
1817 	char buf[1024];
1818 	bool firstfield = true;
1819 	//
1820 	// Read name
1821 	//
1822 	BEGIN_FIELD;
1823 	printUptoWs(o, rd.name, true);
1824 
1825 	//
1826 	// Total number of seeds tried
1827 	//
1828 	BEGIN_FIELD;
1829 	WRITE_NUM(o, seedsTried);
1830 
1831 	//
1832 	// Total number of seeds tried where at least one range was found.
1833 	//
1834 	BEGIN_FIELD;
1835 	WRITE_NUM(o, nonzero);
1836 
1837 	//
1838 	// Total number of ranges found
1839 	//
1840 	BEGIN_FIELD;
1841 	WRITE_NUM(o, ranges);
1842 
1843 	//
1844 	// Total number of elements found
1845 	//
1846 	BEGIN_FIELD;
1847 	WRITE_NUM(o, elts);
1848 
1849 	//
1850 	// The same four numbers, but only for seeds extracted from the
1851 	// forward read representation.
1852 	//
1853 	BEGIN_FIELD;
1854 	WRITE_NUM(o, seedsTriedFw);
1855 
1856 	BEGIN_FIELD;
1857 	WRITE_NUM(o, nonzeroFw);
1858 
1859 	BEGIN_FIELD;
1860 	WRITE_NUM(o, rangesFw);
1861 
1862 	BEGIN_FIELD;
1863 	WRITE_NUM(o, eltsFw);
1864 
1865 	//
1866 	// The same four numbers, but only for seeds extracted from the
1867 	// reverse complement read representation.
1868 	//
1869 	BEGIN_FIELD;
1870 	WRITE_NUM(o, seedsTriedRc);
1871 
1872 	BEGIN_FIELD;
1873 	WRITE_NUM(o, nonzeroRc);
1874 
1875 	BEGIN_FIELD;
1876 	WRITE_NUM(o, rangesRc);
1877 
1878 	BEGIN_FIELD;
1879 	WRITE_NUM(o, eltsRc);
1880 
1881 	o.append('\n');
1882 }
1883 
1884 /**
1885  * Append a single hit to the given output stream in Bowtie's
1886  * verbose-mode format.
1887  */
appendMate(BTString & o,StackedAln & staln,const Read & rd,const Read * rdo,const TReadId rdid,AlnRes * rs,AlnRes * rso,const AlnSetSumm & summ,const SeedAlSumm & ssm,const SeedAlSumm & ssmo,const AlnFlags & flags,const PerReadMetrics & prm,const Mapq & mapqCalc,const Scoring & sc)1888 void AlnSinkSam::appendMate(
1889 	BTString&     o,           // append to this string
1890 	StackedAln&   staln,       // store stacked alignment struct here
1891 	const Read&   rd,
1892 	const Read*   rdo,
1893 	const TReadId rdid,
1894 	AlnRes* rs,
1895 	AlnRes* rso,
1896 	const AlnSetSumm& summ,
1897 	const SeedAlSumm& ssm,
1898 	const SeedAlSumm& ssmo,
1899 	const AlnFlags& flags,
1900 	const PerReadMetrics& prm,
1901 	const Mapq& mapqCalc,
1902 	const Scoring& sc)
1903 {
1904 	if(rs == NULL && samc_.omitUnalignedReads()) {
1905 		return;
1906 	}
1907 	char buf[1024];
1908 	char mapqInps[1024];
1909 	if(rs != NULL) {
1910 		staln.reset();
1911 		rs->initStacked(rd, staln);
1912 		staln.leftAlign(false /* not past MMs */);
1913 	}
1914 	int offAdj = 0;
1915 	// QNAME
1916 	samc_.printReadName(o, rd.name, flags.partOfPair());
1917 	o.append('\t');
1918 	// FLAG
1919 	int fl = 0;
1920 	if(flags.partOfPair()) {
1921 		fl |= SAM_FLAG_PAIRED;
1922 		if(flags.alignedConcordant()) {
1923 			fl |= SAM_FLAG_MAPPED_PAIRED;
1924  		}
1925 		if(!flags.mateAligned()) {
1926 			// Other fragment is unmapped
1927 			fl |= SAM_FLAG_MATE_UNMAPPED;
1928 		}
1929 		fl |= (flags.readMate1() ?
1930 			SAM_FLAG_FIRST_IN_PAIR : SAM_FLAG_SECOND_IN_PAIR);
1931 		if(flags.mateAligned()) {
1932 			bool oppFw = (rso != NULL) ? rso->fw() : flags.isOppFw();
1933 			if (!oppFw) {
1934 				fl |= SAM_FLAG_MATE_STRAND;
1935 			}
1936 		}
1937 	}
1938 	if(!flags.isPrimary()) {
1939 		fl |= SAM_FLAG_NOT_PRIMARY;
1940 	}
1941 	if(rs != NULL && !rs->fw()) {
1942 		fl |= SAM_FLAG_QUERY_STRAND;
1943 	}
1944 	if(rs == NULL) {
1945 		// Failed to align
1946 		fl |= SAM_FLAG_UNMAPPED;
1947 	}
1948 	itoa10<int>(fl, buf);
1949 	o.append(buf);
1950 	o.append('\t');
1951 	// RNAME
1952 	if(rs != NULL) {
1953 		samc_.printRefNameFromIndex(o, (size_t)rs->refid());
1954 		o.append('\t');
1955 	} else {
1956 		if(summ.orefid() != -1) {
1957 			// Opposite mate aligned but this one didn't - print the opposite
1958 			// mate's RNAME and POS as is customary
1959 			assert(flags.partOfPair());
1960 			samc_.printRefNameFromIndex(o, (size_t)summ.orefid());
1961 		} else {
1962 			// No alignment
1963 			o.append('*');
1964 		}
1965 		o.append('\t');
1966 	}
1967 	// POS
1968 	// Note: POS is *after* soft clipping.  I.e. POS points to the
1969 	// upstream-most character *involved in the clipped alignment*.
1970 	if(rs != NULL) {
1971 		itoa10<int64_t>(rs->refoff()+1+offAdj, buf);
1972 		o.append(buf);
1973 		o.append('\t');
1974 	} else {
1975 		if(summ.orefid() != -1) {
1976 			// Opposite mate aligned but this one didn't - print the opposite
1977 			// mate's RNAME and POS as is customary
1978 			assert(flags.partOfPair());
1979 			itoa10<int64_t>(summ.orefoff()+1+offAdj, buf);
1980 			o.append(buf);
1981 		} else {
1982 			// No alignment
1983 			o.append('0');
1984 		}
1985 		o.append('\t');
1986 	}
1987 	// MAPQ
1988 	mapqInps[0] = '\0';
1989 	if(rs != NULL) {
1990 		itoa10<TMapq>(mapqCalc.mapq(
1991 			summ, flags, rd.mate < 2, rd.length(),
1992 			rdo == NULL ? 0 : rdo->length(), mapqInps), buf);
1993 		o.append(buf);
1994 		o.append('\t');
1995 	} else {
1996 		// No alignment
1997 		o.append("0\t");
1998 	}
1999 	// CIGAR
2000 	if(rs != NULL) {
2001 		staln.buildCigar(flags.xeq());
2002 		staln.writeCigar(&o, NULL);
2003 		o.append('\t');
2004 	} else {
2005 		// No alignment
2006 		o.append("*\t");
2007 	}
2008 	// RNEXT
2009 	if(rs != NULL && flags.partOfPair()) {
2010 		if(rso != NULL && rs->refid() != rso->refid()) {
2011 			samc_.printRefNameFromIndex(o, (size_t)rso->refid());
2012 			o.append('\t');
2013 		} else {
2014 			o.append("=\t");
2015 		}
2016 	} else if(summ.orefid() != -1) {
2017 		// The convention if this mate fails to align but the other doesn't is
2018 		// to copy the mate's details into here
2019 		o.append("=\t");
2020 	} else {
2021 		o.append("*\t");
2022 	}
2023 	// PNEXT
2024 	if(rs != NULL && flags.partOfPair()) {
2025 		if(rso != NULL) {
2026 			itoa10<int64_t>(rso->refoff()+1, buf);
2027 			o.append(buf);
2028 			o.append('\t');
2029 		} else {
2030 			// The convenstion is that if this mate aligns but the opposite
2031 			// doesn't, we print this mate's offset here
2032 			itoa10<int64_t>(rs->refoff()+1, buf);
2033 			o.append(buf);
2034 			o.append('\t');
2035 		}
2036 	} else if(summ.orefid() != -1) {
2037 		// The convention if this mate fails to align but the other doesn't is
2038 		// to copy the mate's details into here
2039 		itoa10<int64_t>(summ.orefoff()+1, buf);
2040 		o.append(buf);
2041 		o.append('\t');
2042 	} else {
2043 		o.append("0\t");
2044 	}
2045 	// ISIZE
2046 	if(rs != NULL && rs->isFraglenSet()) {
2047 		itoa10<int64_t>(rs->fragmentLength(), buf);
2048 		o.append(buf);
2049 		o.append('\t');
2050 	} else {
2051 		// No fragment
2052 		o.append("0\t");
2053 	}
2054 	// SEQ
2055 	if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) {
2056 		o.append('*');
2057 	} else {
2058 		// Print the read
2059 		if(rd.patFw.length() == 0) {
2060 			o.append('*');
2061 		} else {
2062 			if(rs == NULL || rs->fw()) {
2063 				o.append(rd.patFw.toZBuf());
2064 			} else {
2065 				o.append(rd.patRc.toZBuf());
2066 			}
2067 		}
2068 	}
2069 	o.append('\t');
2070 	// QUAL
2071 	if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) {
2072 		o.append('*');
2073 	} else {
2074 		// Print the quals
2075 		if(rd.qual.length() == 0) {
2076 			o.append('*');
2077 		} else {
2078 			if(rs == NULL || rs->fw()) {
2079 				o.append(rd.qual.toZBuf());
2080 			} else {
2081 				o.append(rd.qualRev.toZBuf());
2082 			}
2083 		}
2084 	}
2085 	o.append('\t');
2086 	//
2087 	// Optional fields
2088 	//
2089 	if(rs != NULL) {
2090 		samc_.printAlignedOptFlags(
2091 			o,           // output buffer
2092 			true,        // first opt flag printed is first overall?
2093 			rd,          // read
2094 			rdo,         // opposite read
2095 			*rs,         // individual alignment result
2096 			staln,       // stacked alignment
2097 			flags,       // alignment flags
2098 			summ,        // summary of alignments for this read
2099 			ssm,         // seed alignment summary
2100 			prm,         // per-read metrics
2101 			sc,          // scoring scheme
2102 			mapqInps);   // inputs to MAPQ calculation
2103 	} else {
2104 		samc_.printEmptyOptFlags(
2105 			o,           // output buffer
2106 			true,        // first opt flag printed is first overall?
2107 			rd,          // read
2108 			flags,       // alignment flags
2109 			summ,        // summary of alignments for this read
2110 			ssm,         // seed alignment summary
2111 			prm,         // per-read metrics
2112 			sc);         // scoring scheme
2113 	}
2114 	samc_.printPreservedOptFlags(o, rd);
2115 	samc_.printComment(o, rd.name);
2116 	o.append('\n');
2117 }
2118 
2119 #ifdef ALN_SINK_MAIN
2120 
2121 #include <iostream>
2122 
testDones(const ReportingState & st,bool done1,bool done2,bool done3,bool done4,bool done5,bool done6)2123 bool testDones(
2124 	const ReportingState& st,
2125 	bool done1,
2126 	bool done2,
2127 	bool done3,
2128 	bool done4,
2129 	bool done5,
2130 	bool done6)
2131 {
2132 	assert(st.doneConcordant()    == done1);
2133 	assert(st.doneDiscordant()    == done2);
2134 	assert(st.doneUnpaired(true)  == done3);
2135 	assert(st.doneUnpaired(false) == done4);
2136 	assert(st.doneUnpaired()      == done5);
2137 	assert(st.done()              == done6);
2138 	assert(st.repOk());
2139 	return true;
2140 }
2141 
main(void)2142 int main(void) {
2143 	cerr << "Case 1 (simple unpaired 1) ... ";
2144 	{
2145 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2146 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
2147 		ReportingParams rp(
2148 			2,      // khits
2149 			0,      // mhits
2150 			0,      // pengap
2151 			false,  // msample
2152 			false,  // discord
2153 			false); // mixed
2154 		ReportingState st(rp);
2155 		st.nextRead(false); // unpaired read
2156 		assert(testDones(st, true, true, false, true, false, false));
2157 		st.foundUnpaired(true);
2158 		assert(testDones(st, true, true, false, true, false, false));
2159 		st.foundUnpaired(true);
2160 		assert(testDones(st, true, true, true, true, true, true));
2161 		st.finish();
2162 		assert(testDones(st, true, true, true, true, true, true));
2163 		assert_eq(0, st.numConcordant());
2164 		assert_eq(0, st.numDiscordant());
2165 		assert_eq(2, st.numUnpaired1());
2166 		assert_eq(0, st.numUnpaired2());
2167 		assert(st.repOk());
2168 		st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2169 		             pairMax, unpair1Max, unpair2Max);
2170 		assert_eq(0, nconcord);
2171 		assert_eq(0, ndiscord);
2172 		assert_eq(2, nunpair1);
2173 		assert_eq(0, nunpair2);
2174 		assert(!pairMax);
2175 		assert(!unpair1Max);
2176 		assert(!unpair2Max);
2177 	}
2178 	cerr << "PASSED" << endl;
2179 
2180 	cerr << "Case 2 (simple unpaired 1) ... ";
2181 	{
2182 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2183 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
2184 		ReportingParams rp(
2185 			2,      // khits
2186 			3,      // mhits
2187 			0,      // pengap
2188 			false,  // msample
2189 			false,  // discord
2190 			false); // mixed
2191 		ReportingState st(rp);
2192 		st.nextRead(false); // unpaired read
2193 		assert(testDones(st, true, true, false, true, false, false));
2194 		st.foundUnpaired(true);
2195 		assert(testDones(st, true, true, false, true, false, false));
2196 		st.foundUnpaired(true);
2197 		assert(testDones(st, true, true, false, true, false, false));
2198 		st.foundUnpaired(true);
2199 		assert(testDones(st, true, true, false, true, false, false));
2200 		st.foundUnpaired(true);
2201 		assert(testDones(st, true, true, true, true, true, true));
2202 		assert_eq(0, st.numConcordant());
2203 		assert_eq(0, st.numDiscordant());
2204 		assert_eq(4, st.numUnpaired1());
2205 		assert_eq(0, st.numUnpaired2());
2206 		st.finish();
2207 		assert(testDones(st, true, true, true, true, true, true));
2208 		assert_eq(0, st.numConcordant());
2209 		assert_eq(0, st.numDiscordant());
2210 		assert_eq(4, st.numUnpaired1());
2211 		assert_eq(0, st.numUnpaired2());
2212 		assert(st.repOk());
2213 		st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2214 		             pairMax, unpair1Max, unpair2Max);
2215 		assert_eq(0, nconcord);
2216 		assert_eq(0, ndiscord);
2217 		assert_eq(0, nunpair1);
2218 		assert_eq(0, nunpair2);
2219 		assert(!pairMax);
2220 		assert(unpair1Max);
2221 		assert(!unpair2Max);
2222 	}
2223 	cerr << "PASSED" << endl;
2224 
2225 	cerr << "Case 3 (simple paired 1) ... ";
2226 	{
2227 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2228 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
2229 		ReportingParams rp(
2230 			2,      // khits
2231 			3,      // mhits
2232 			0,      // pengap
2233 			false,  // msample
2234 			false,  // discord
2235 			false); // mixed
2236 		ReportingState st(rp);
2237 		st.nextRead(true); // unpaired read
2238 		assert(testDones(st, false, true, true, true, true, false));
2239 		st.foundUnpaired(true);
2240 		assert(testDones(st, false, true, true, true, true, false));
2241 		st.foundUnpaired(true);
2242 		assert(testDones(st, false, true, true, true, true, false));
2243 		st.foundUnpaired(true);
2244 		assert(testDones(st, false, true, true, true, true, false));
2245 		st.foundUnpaired(true);
2246 		assert(testDones(st, false, true, true, true, true, false));
2247 		st.foundUnpaired(false);
2248 		assert(testDones(st, false, true, true, true, true, false));
2249 		st.foundUnpaired(false);
2250 		assert(testDones(st, false, true, true, true, true, false));
2251 		st.foundUnpaired(false);
2252 		assert(testDones(st, false, true, true, true, true, false));
2253 		st.foundUnpaired(false);
2254 		assert(testDones(st, false, true, true, true, true, false));
2255 		st.foundConcordant();
2256 		assert(testDones(st, false, true, true, true, true, false));
2257 		st.foundConcordant();
2258 		assert(testDones(st, false, true, true, true, true, false));
2259 		st.foundConcordant();
2260 		assert(testDones(st, false, true, true, true, true, false));
2261 		st.foundConcordant();
2262 		assert(testDones(st, true, true, true, true, true, true));
2263 		assert_eq(4, st.numConcordant());
2264 		assert_eq(0, st.numDiscordant());
2265 		assert_eq(4, st.numUnpaired1());
2266 		assert_eq(4, st.numUnpaired2());
2267 		st.finish();
2268 		assert(testDones(st, true, true, true, true, true, true));
2269 		assert_eq(4, st.numConcordant());
2270 		assert_eq(0, st.numDiscordant());
2271 		assert_eq(4, st.numUnpaired1());
2272 		assert_eq(4, st.numUnpaired2());
2273 		assert(st.repOk());
2274 		st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2275 		             pairMax, unpair1Max, unpair2Max);
2276 		assert_eq(0, nconcord);
2277 		assert_eq(0, ndiscord);
2278 		assert_eq(0, nunpair1);
2279 		assert_eq(0, nunpair2);
2280 		assert(pairMax);
2281 		assert(!unpair1Max); // because !mixed
2282 		assert(!unpair2Max); // because !mixed
2283 	}
2284 	cerr << "PASSED" << endl;
2285 
2286 	cerr << "Case 4 (simple paired 2) ... ";
2287 	{
2288 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2289 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
2290 		ReportingParams rp(
2291 			2,      // khits
2292 			3,      // mhits
2293 			0,      // pengap
2294 			false,  // msample
2295 			true,   // discord
2296 			true);  // mixed
2297 		ReportingState st(rp);
2298 		st.nextRead(true); // unpaired read
2299 		assert(testDones(st, false, false, false, false, false, false));
2300 		st.foundUnpaired(true);
2301 		assert(testDones(st, false, false, false, false, false, false));
2302 		st.foundUnpaired(true);
2303 		assert(testDones(st, false, true, false, false, false, false));
2304 		st.foundUnpaired(true);
2305 		assert(testDones(st, false, true, false, false, false, false));
2306 		st.foundUnpaired(true);
2307 		assert(testDones(st, false, true, true, false, false, false));
2308 		st.foundUnpaired(false);
2309 		assert(testDones(st, false, true, true, false, false, false));
2310 		st.foundUnpaired(false);
2311 		assert(testDones(st, false, true, true, false, false, false));
2312 		st.foundUnpaired(false);
2313 		assert(testDones(st, false, true, true, false, false, false));
2314 		st.foundUnpaired(false);
2315 		assert(testDones(st, false, true, true, true, true, false));
2316 		st.foundConcordant();
2317 		assert(testDones(st, false, true, true, true, true, false));
2318 		st.foundConcordant();
2319 		assert(testDones(st, false, true, true, true, true, false));
2320 		st.foundConcordant();
2321 		assert(testDones(st, false, true, true, true, true, false));
2322 		st.foundConcordant();
2323 		assert(testDones(st, true, true, true, true, true, true));
2324 		assert_eq(4, st.numConcordant());
2325 		assert_eq(0, st.numDiscordant());
2326 		assert_eq(4, st.numUnpaired1());
2327 		assert_eq(4, st.numUnpaired2());
2328 		st.finish();
2329 		assert(testDones(st, true, true, true, true, true, true));
2330 		assert_eq(4, st.numConcordant());
2331 		assert_eq(0, st.numDiscordant());
2332 		assert_eq(4, st.numUnpaired1());
2333 		assert_eq(4, st.numUnpaired2());
2334 		assert(st.repOk());
2335 		st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2336 		             pairMax, unpair1Max, unpair2Max);
2337 		assert_eq(0, nconcord);
2338 		assert_eq(0, ndiscord);
2339 		assert_eq(0, nunpair1);
2340 		assert_eq(0, nunpair2);
2341 		assert(pairMax);
2342 		assert(unpair1Max);
2343 		assert(unpair2Max);
2344 	}
2345 	cerr << "PASSED" << endl;
2346 
2347 	cerr << "Case 5 (potential discordant after concordant) ... ";
2348 	{
2349 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2350 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
2351 		ReportingParams rp(
2352 			2,      // khits
2353 			3,      // mhits
2354 			0,      // pengap
2355 			false,  // msample
2356 			true,   // discord
2357 			true);  // mixed
2358 		ReportingState st(rp);
2359 		st.nextRead(true);
2360 		assert(testDones(st, false, false, false, false, false, false));
2361 		st.foundUnpaired(true);
2362 		st.foundUnpaired(false);
2363 		st.foundConcordant();
2364 		assert(testDones(st, false, true, false, false, false, false));
2365 		st.finish();
2366 		assert(testDones(st, true, true, true, true, true, true));
2367 		assert_eq(1, st.numConcordant());
2368 		assert_eq(0, st.numDiscordant());
2369 		assert_eq(1, st.numUnpaired1());
2370 		assert_eq(1, st.numUnpaired2());
2371 		assert(st.repOk());
2372 		st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2373 		             pairMax, unpair1Max, unpair2Max);
2374 		assert_eq(1, nconcord);
2375 		assert_eq(0, ndiscord);
2376 		assert_eq(0, nunpair1);
2377 		assert_eq(0, nunpair2);
2378 		assert(!pairMax);
2379 		assert(!unpair1Max);
2380 		assert(!unpair2Max);
2381 	}
2382 	cerr << "PASSED" << endl;
2383 
2384 	cerr << "Case 6 (true discordant) ... ";
2385 	{
2386 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2387 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
2388 		ReportingParams rp(
2389 			2,      // khits
2390 			3,      // mhits
2391 			0,      // pengap
2392 			false,  // msample
2393 			true,   // discord
2394 			true);  // mixed
2395 		ReportingState st(rp);
2396 		st.nextRead(true);
2397 		assert(testDones(st, false, false, false, false, false, false));
2398 		st.foundUnpaired(true);
2399 		st.foundUnpaired(false);
2400 		assert(testDones(st, false, false, false, false, false, false));
2401 		st.finish();
2402 		assert(testDones(st, true, true, true, true, true, true));
2403 		assert_eq(0, st.numConcordant());
2404 		assert_eq(1, st.numDiscordant());
2405 		assert_eq(0, st.numUnpaired1());
2406 		assert_eq(0, st.numUnpaired2());
2407 		assert(st.repOk());
2408 		st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2409 		             pairMax, unpair1Max, unpair2Max);
2410 		assert_eq(0, nconcord);
2411 		assert_eq(1, ndiscord);
2412 		assert_eq(0, nunpair1);
2413 		assert_eq(0, nunpair2);
2414 		assert(!pairMax);
2415 		assert(!unpair1Max);
2416 		assert(!unpair2Max);
2417 	}
2418 	cerr << "PASSED" << endl;
2419 
2420 	cerr << "Case 7 (unaligned pair & uniquely aligned mate, mixed-mode) ... ";
2421 	{
2422 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2423 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
2424 		ReportingParams rp(
2425 			1,      // khits
2426 			1,      // mhits
2427 			0,      // pengap
2428 			false,  // msample
2429 			true,   // discord
2430 			true);  // mixed
2431 		ReportingState st(rp);
2432 		st.nextRead(true); // unpaired read
2433 		// assert(st.doneConcordant()    == done1);
2434 		// assert(st.doneDiscordant()    == done2);
2435 		// assert(st.doneUnpaired(true)  == done3);
2436 		// assert(st.doneUnpaired(false) == done4);
2437 		// assert(st.doneUnpaired()      == done5);
2438 		// assert(st.done()              == done6);
2439 		st.foundUnpaired(true);
2440 		assert(testDones(st, false, false, false, false, false, false));
2441 		st.foundUnpaired(true);
2442 		assert(testDones(st, false, true, true, false, false, false));
2443 		assert_eq(0, st.numConcordant());
2444 		assert_eq(0, st.numDiscordant());
2445 		assert_eq(2, st.numUnpaired1());
2446 		assert_eq(0, st.numUnpaired2());
2447 		st.finish();
2448 		st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2449 		             pairMax, unpair1Max, unpair2Max);
2450 		assert_eq(0, nconcord);
2451 		assert_eq(0, ndiscord);
2452 		assert_eq(0, nunpair1);
2453 		assert_eq(0, nunpair2);
2454 		assert(!pairMax);
2455 		assert(unpair1Max);
2456 		assert(!unpair2Max);
2457 	}
2458 	cerr << "PASSED" << endl;
2459 
2460 	cerr << "Case 8 (unaligned pair & uniquely aligned mate, NOT mixed-mode) ... ";
2461 	{
2462 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2463 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
2464 		ReportingParams rp(
2465 			1,      // khits
2466 			1,      // mhits
2467 			0,      // pengap
2468 			false,  // msample
2469 			true,   // discord
2470 			false); // mixed
2471 		ReportingState st(rp);
2472 		st.nextRead(true); // unpaired read
2473 		// assert(st.doneConcordant()    == done1);
2474 		// assert(st.doneDiscordant()    == done2);
2475 		// assert(st.doneUnpaired(true)  == done3);
2476 		// assert(st.doneUnpaired(false) == done4);
2477 		// assert(st.doneUnpaired()      == done5);
2478 		// assert(st.done()              == done6);
2479 		st.foundUnpaired(true);
2480 		assert(testDones(st, false, false, true, true, true, false));
2481 		st.foundUnpaired(true);
2482 		assert(testDones(st, false, true, true, true, true, false));
2483 		assert_eq(0, st.numConcordant());
2484 		assert_eq(0, st.numDiscordant());
2485 		assert_eq(2, st.numUnpaired1());
2486 		assert_eq(0, st.numUnpaired2());
2487 		st.finish();
2488 		st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2489 		             pairMax, unpair1Max, unpair2Max);
2490 		assert_eq(0, nconcord);
2491 		assert_eq(0, ndiscord);
2492 		assert_eq(0, nunpair1);
2493 		assert_eq(0, nunpair2);
2494 		assert(!pairMax);
2495 		assert(!unpair1Max); // not really relevant
2496 		assert(!unpair2Max); // not really relevant
2497 	}
2498 	cerr << "PASSED" << endl;
2499 
2500 	cerr << "Case 9 (repetitive pair, only one mate repetitive) ... ";
2501 	{
2502 		uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2503 		bool pairMax = false, unpair1Max = false, unpair2Max = false;
2504 		ReportingParams rp(
2505 			1,      // khits
2506 			1,      // mhits
2507 			0,      // pengap
2508 			true,   // msample
2509 			true,   // discord
2510 			true);  // mixed
2511 		ReportingState st(rp);
2512 		st.nextRead(true); // unpaired read
2513 		// assert(st.doneConcordant()    == done1);
2514 		// assert(st.doneDiscordant()    == done2);
2515 		// assert(st.doneUnpaired(true)  == done3);
2516 		// assert(st.doneUnpaired(false) == done4);
2517 		// assert(st.doneUnpaired()      == done5);
2518 		// assert(st.done()              == done6);
2519 		st.foundConcordant();
2520 		assert(st.repOk());
2521 		st.foundUnpaired(true);
2522 		assert(st.repOk());
2523 		st.foundUnpaired(false);
2524 		assert(st.repOk());
2525 		assert(testDones(st, false, true, false, false, false, false));
2526 		assert(st.repOk());
2527 		st.foundConcordant();
2528 		assert(st.repOk());
2529 		st.foundUnpaired(true);
2530 		assert(st.repOk());
2531 		assert(testDones(st, true, true, true, false, false, false));
2532 		assert_eq(2, st.numConcordant());
2533 		assert_eq(0, st.numDiscordant());
2534 		assert_eq(2, st.numUnpaired1());
2535 		assert_eq(1, st.numUnpaired2());
2536 		st.foundUnpaired(false);
2537 		assert(st.repOk());
2538 		assert(testDones(st, true, true, true, true, true, true));
2539 		assert_eq(2, st.numConcordant());
2540 		assert_eq(0, st.numDiscordant());
2541 		assert_eq(2, st.numUnpaired1());
2542 		assert_eq(2, st.numUnpaired2());
2543 		st.finish();
2544 		st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2545 		             pairMax, unpair1Max, unpair2Max);
2546 		assert_eq(1, nconcord);
2547 		assert_eq(0, ndiscord);
2548 		assert_eq(0, nunpair1);
2549 		assert_eq(0, nunpair2);
2550 		assert(pairMax);
2551 		assert(unpair1Max); // not really relevant
2552 		assert(unpair2Max); // not really relevant
2553 	}
2554 	cerr << "PASSED" << endl;
2555 }
2556 
2557 #endif /*def ALN_SINK_MAIN*/
2558