1 /*
2 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3 *
4 * This file is part of Bowtie 2.
5 *
6 * Bowtie 2 is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Bowtie 2 is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <iomanip>
21 #include <limits>
22 #include "aln_sink.h"
23 #include "aligner_seed.h"
24 #include "util.h"
25
26 using namespace std;
27
28
29 /**
30 * Initialize state machine with a new read. The state we start in depends
31 * on whether it's paired-end or unpaired.
32 */
nextRead(bool paired)33 void ReportingState::nextRead(bool paired) {
34 paired_ = paired;
35 if(paired) {
36 state_ = CONCORDANT_PAIRS;
37 doneConcord_ = false;
38 doneDiscord_ = p_.discord ? false : true;
39 doneUnpair1_ = p_.mixed ? false : true;
40 doneUnpair2_ = p_.mixed ? false : true;
41 exitConcord_ = ReportingState::EXIT_DID_NOT_EXIT;
42 exitDiscord_ = p_.discord ?
43 ReportingState::EXIT_DID_NOT_EXIT :
44 ReportingState::EXIT_DID_NOT_ENTER;
45 exitUnpair1_ = p_.mixed ?
46 ReportingState::EXIT_DID_NOT_EXIT :
47 ReportingState::EXIT_DID_NOT_ENTER;
48 exitUnpair2_ = p_.mixed ?
49 ReportingState::EXIT_DID_NOT_EXIT :
50 ReportingState::EXIT_DID_NOT_ENTER;
51 } else {
52 // Unpaired
53 state_ = UNPAIRED;
54 doneConcord_ = true;
55 doneDiscord_ = true;
56 doneUnpair1_ = false;
57 doneUnpair2_ = true;
58 exitConcord_ = ReportingState::EXIT_DID_NOT_ENTER; // not relevant
59 exitDiscord_ = ReportingState::EXIT_DID_NOT_ENTER; // not relevant
60 exitUnpair1_ = ReportingState::EXIT_DID_NOT_EXIT;
61 exitUnpair2_ = ReportingState::EXIT_DID_NOT_ENTER; // not relevant
62 }
63 doneUnpair_ = doneUnpair1_ && doneUnpair2_;
64 done_ = false;
65 nconcord_ = ndiscord_ = nunpair1_ = nunpair2_ = 0;
66 }
67
68 /**
69 * Caller uses this member function to indicate that one additional
70 * concordant alignment has been found.
71 */
foundConcordant()72 bool ReportingState::foundConcordant() {
73 assert(paired_);
74 assert_geq(state_, ReportingState::CONCORDANT_PAIRS);
75 assert(!doneConcord_);
76 nconcord_++;
77 areDone(nconcord_, doneConcord_, exitConcord_);
78 // No need to search for discordant alignments if there are one or more
79 // concordant alignments.
80 doneDiscord_ = true;
81 exitDiscord_ = ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED;
82 if(doneConcord_) {
83 // If we're finished looking for concordant alignments, do we have to
84 // continue on to search for unpaired alignments? Only if our exit
85 // from the concordant stage is EXIT_SHORT_CIRCUIT_M. If it's
86 // EXIT_SHORT_CIRCUIT_k or EXIT_WITH_ALIGNMENTS, we can skip unpaired.
87 assert_neq(ReportingState::EXIT_NO_ALIGNMENTS, exitConcord_);
88 if(exitConcord_ != ReportingState::EXIT_SHORT_CIRCUIT_M) {
89 if(!doneUnpair1_) {
90 doneUnpair1_ = true;
91 exitUnpair1_ = ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED;
92 }
93 if(!doneUnpair2_) {
94 doneUnpair2_ = true;
95 exitUnpair2_ = ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED;
96 }
97 }
98 }
99 updateDone();
100 return done();
101 }
102
103 /**
104 * Caller uses this member function to indicate that one additional unpaired
105 * mate alignment has been found for the specified mate.
106 */
foundUnpaired(bool mate1)107 bool ReportingState::foundUnpaired(bool mate1) {
108 assert_gt(state_, ReportingState::NO_READ);
109 // Note: it's not right to assert !doneUnpair1_/!doneUnpair2_ here.
110 // Even if we're done with finding
111 if(mate1) {
112 nunpair1_++;
113 // Did we just finish with this mate?
114 if(!doneUnpair1_) {
115 areDone(nunpair1_, doneUnpair1_, exitUnpair1_);
116 if(doneUnpair1_) {
117 doneUnpair_ = doneUnpair1_ && doneUnpair2_;
118 updateDone();
119 }
120 }
121 if(nunpair1_ > 1) {
122 doneDiscord_ = true;
123 exitDiscord_ = ReportingState::EXIT_NO_ALIGNMENTS;
124 }
125 } else {
126 nunpair2_++;
127 // Did we just finish with this mate?
128 if(!doneUnpair2_) {
129 areDone(nunpair2_, doneUnpair2_, exitUnpair2_);
130 if(doneUnpair2_) {
131 doneUnpair_ = doneUnpair1_ && doneUnpair2_;
132 updateDone();
133 }
134 }
135 if(nunpair2_ > 1) {
136 doneDiscord_ = true;
137 exitDiscord_ = ReportingState::EXIT_NO_ALIGNMENTS;
138 }
139 }
140 return done();
141 }
142
143 /**
144 * Called to indicate that the aligner has finished searching for
145 * alignments. This gives us a chance to finalize our state.
146 *
147 * TODO: Keep track of short-circuiting information.
148 */
finish()149 void ReportingState::finish() {
150 if(!doneConcord_) {
151 doneConcord_ = true;
152 exitConcord_ =
153 ((nconcord_ > 0) ?
154 ReportingState::EXIT_WITH_ALIGNMENTS :
155 ReportingState::EXIT_NO_ALIGNMENTS);
156 }
157 assert_gt(exitConcord_, EXIT_DID_NOT_EXIT);
158 if(!doneUnpair1_) {
159 doneUnpair1_ = true;
160 exitUnpair1_ =
161 ((nunpair1_ > 0) ?
162 ReportingState::EXIT_WITH_ALIGNMENTS :
163 ReportingState::EXIT_NO_ALIGNMENTS);
164 }
165 assert_gt(exitUnpair1_, EXIT_DID_NOT_EXIT);
166 if(!doneUnpair2_) {
167 doneUnpair2_ = true;
168 exitUnpair2_ =
169 ((nunpair2_ > 0) ?
170 ReportingState::EXIT_WITH_ALIGNMENTS :
171 ReportingState::EXIT_NO_ALIGNMENTS);
172 }
173 assert_gt(exitUnpair2_, EXIT_DID_NOT_EXIT);
174 if(!doneDiscord_) {
175 // Check if the unpaired alignments should be converted to a single
176 // discordant paired-end alignment.
177 assert_eq(0, ndiscord_);
178 if(nconcord_ == 0 && nunpair1_ == 1 && nunpair2_ == 1) {
179 convertUnpairedToDiscordant();
180 }
181 doneDiscord_ = true;
182 exitDiscord_ =
183 ((ndiscord_ > 0) ?
184 ReportingState::EXIT_WITH_ALIGNMENTS :
185 ReportingState::EXIT_NO_ALIGNMENTS);
186 }
187 assert(!paired_ || exitDiscord_ > ReportingState::EXIT_DID_NOT_EXIT);
188 doneUnpair_ = done_ = true;
189 assert(done());
190 }
191
192 /**
193 * Populate given counters with the number of various kinds of alignments
194 * to report for this read. Concordant alignments are preferable to (and
195 * mutually exclusive with) discordant alignments, and paired-end
196 * alignments are preferable to unpaired alignments.
197 *
198 * The caller also needs some additional information for the case where a
199 * pair or unpaired read aligns repetitively. If the read is paired-end
200 * and the paired-end has repetitive concordant alignments, that should be
201 * reported, and 'pairMax' is set to true to indicate this. If the read is
202 * paired-end, does not have any conordant alignments, but does have
203 * repetitive alignments for one or both mates, then that should be
204 * reported, and 'unpair1Max' and 'unpair2Max' are set accordingly.
205 *
206 * Note that it's possible in the case of a paired-end read for the read to
207 * have repetitive concordant alignments, but for one mate to have a unique
208 * unpaired alignment.
209 */
getReport(uint64_t & nconcordAln,uint64_t & ndiscordAln,uint64_t & nunpair1Aln,uint64_t & nunpair2Aln,bool & pairMax,bool & unpair1Max,bool & unpair2Max) const210 void ReportingState::getReport(
211 uint64_t& nconcordAln, // # concordant alignments to report
212 uint64_t& ndiscordAln, // # discordant alignments to report
213 uint64_t& nunpair1Aln, // # unpaired alignments for mate #1 to report
214 uint64_t& nunpair2Aln, // # unpaired alignments for mate #2 to report
215 bool& pairMax, // repetitive concordant alignments
216 bool& unpair1Max, // repetitive alignments for mate #1
217 bool& unpair2Max) // repetitive alignments for mate #2
218 const
219 {
220 nconcordAln = ndiscordAln = nunpair1Aln = nunpair2Aln = 0;
221 pairMax = unpair1Max = unpair2Max = false;
222 assert_gt(p_.khits, 0);
223 assert_gt(p_.mhits, 0);
224 if(paired_) {
225 // Do we have 1 or more concordant alignments to report?
226 if(exitConcord_ == ReportingState::EXIT_SHORT_CIRCUIT_k) {
227 // k at random
228 assert_geq(nconcord_, (uint64_t)p_.khits);
229 nconcordAln = p_.khits;
230 return;
231 } else if(exitConcord_ == ReportingState::EXIT_SHORT_CIRCUIT_M) {
232 assert(p_.msample);
233 assert_gt(nconcord_, 0);
234 pairMax = true; // repetitive concordant alignments
235 if(p_.mixed) {
236 unpair1Max = nunpair1_ > (uint64_t)p_.mhits;
237 unpair2Max = nunpair2_ > (uint64_t)p_.mhits;
238 }
239 // Not sure if this is OK
240 nconcordAln = 1; // 1 at random
241 return;
242 } else if(exitConcord_ == ReportingState::EXIT_WITH_ALIGNMENTS) {
243 assert_gt(nconcord_, 0);
244 // <= k at random
245 nconcordAln = min<uint64_t>(nconcord_, p_.khits);
246 return;
247 }
248 assert(!p_.mhitsSet() || nconcord_ <= (uint64_t)p_.mhits+1);
249
250 // Do we have a discordant alignment to report?
251 if(exitDiscord_ == ReportingState::EXIT_WITH_ALIGNMENTS) {
252 // Report discordant
253 assert(p_.discord);
254 ndiscordAln = 1;
255 return;
256 }
257 }
258
259 assert_neq(ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED, exitUnpair1_);
260 assert_neq(ReportingState::EXIT_SHORT_CIRCUIT_TRUMPED, exitUnpair2_);
261
262 if((paired_ && !p_.mixed) || nunpair1_ + nunpair2_ == 0) {
263 // Unpaired alignments either not reportable or non-existant
264 return;
265 }
266
267 // Do we have 1 or more alignments for mate #1 to report?
268 if(exitUnpair1_ == ReportingState::EXIT_SHORT_CIRCUIT_k) {
269 // k at random
270 assert_geq(nunpair1_, (uint64_t)p_.khits);
271 nunpair1Aln = p_.khits;
272 } else if(exitUnpair1_ == ReportingState::EXIT_SHORT_CIRCUIT_M) {
273 assert(p_.msample);
274 assert_gt(nunpair1_, 0);
275 unpair1Max = true; // repetitive alignments for mate #1
276 nunpair1Aln = 1; // 1 at random
277 } else if(exitUnpair1_ == ReportingState::EXIT_WITH_ALIGNMENTS) {
278 assert_gt(nunpair1_, 0);
279 // <= k at random
280 nunpair1Aln = min<uint64_t>(nunpair1_, (uint64_t)p_.khits);
281 }
282 assert(!p_.mhitsSet() || paired_ || nunpair1_ <= (uint64_t)p_.mhits+1);
283
284 // Do we have 2 or more alignments for mate #2 to report?
285 if(exitUnpair2_ == ReportingState::EXIT_SHORT_CIRCUIT_k) {
286 // k at random
287 nunpair2Aln = p_.khits;
288 } else if(exitUnpair2_ == ReportingState::EXIT_SHORT_CIRCUIT_M) {
289 assert(p_.msample);
290 assert_gt(nunpair2_, 0);
291 unpair2Max = true; // repetitive alignments for mate #1
292 nunpair2Aln = 1; // 1 at random
293 } else if(exitUnpair2_ == ReportingState::EXIT_WITH_ALIGNMENTS) {
294 assert_gt(nunpair2_, 0);
295 // <= k at random
296 nunpair2Aln = min<uint64_t>(nunpair2_, (uint64_t)p_.khits);
297 }
298 assert(!p_.mhitsSet() || paired_ || nunpair2_ <= (uint64_t)p_.mhits+1);
299 }
300
301 /**
302 * Given the number of alignments in a category, check whether we
303 * short-circuited out of the category. Set the done and exit arguments to
304 * indicate whether and how we short-circuited.
305 */
areDone(uint64_t cnt,bool & done,int & exit) const306 inline void ReportingState::areDone(
307 uint64_t cnt, // # alignments in category
308 bool& done, // out: whether we short-circuited out of category
309 int& exit) const // out: if done, how we short-circuited (-k? -m? etc)
310 {
311 assert(!done);
312 // Have we exceeded the -k limit?
313 assert_gt(p_.khits, 0);
314 assert_gt(p_.mhits, 0);
315 if(cnt >= (uint64_t)p_.khits && !p_.mhitsSet()) {
316 done = true;
317 exit = ReportingState::EXIT_SHORT_CIRCUIT_k;
318 }
319 // Have we exceeded the -m or -M limit?
320 else if(p_.mhitsSet() && cnt > (uint64_t)p_.mhits) {
321 done = true;
322 assert(p_.msample);
323 exit = ReportingState::EXIT_SHORT_CIRCUIT_M;
324 }
325 }
326
printPct(std::ostream & os,uint64_t num,uint64_t denom)327 static std::ostream& printPct(
328 std::ostream& os,
329 uint64_t num,
330 uint64_t denom)
331 {
332 double pct = 0.0f;
333 if(denom != 0) { pct = 100.0 * (double)num / (double)denom; }
334 os << fixed << setprecision(2) << pct << '%';
335 return os;
336 }
337 /**
338 * Print a friendly summary of:
339 *
340 * 1. How many reads were aligned and had one or more alignments
341 * reported
342 * 2. How many reads exceeded the -m or -M ceiling and therefore had
343 * their alignments suppressed or sampled
344 * 3. How many reads failed to align entirely
345 *
346 * Optionally print a series of Hadoop streaming-style counter updates
347 * with similar information.
348 */
printAlSumm(const ReportingMetrics & met,size_t repThresh,bool discord,bool mixed,bool hadoopOut)349 void AlnSink::printAlSumm(
350 const ReportingMetrics& met,
351 size_t repThresh, // threshold for uniqueness, or max if no thresh
352 bool discord, // looked for discordant alignments
353 bool mixed, // looked for unpaired alignments where paired failed?
354 bool hadoopOut) // output Hadoop counters?
355 {
356 // NOTE: there's a filtering step at the very beginning, so everything
357 // being reported here is post filtering
358
359 bool canRep = repThresh != MAX_SIZE_T;
360 if(hadoopOut) {
361 cerr << "reporter:counter:Bowtie,Reads processed," << met.nread << endl;
362 }
363 uint64_t totread = met.nread;
364 if(totread > 0) {
365 cerr << "" << met.nread << " reads; of these:" << endl;
366 } else {
367 assert_eq(0, met.npaired);
368 assert_eq(0, met.nunpaired);
369 cerr << "" << totread << " reads" << endl;
370 }
371 uint64_t totpair = met.npaired;
372 if(totpair > 0) {
373 // Paired output
374 cerr << " " << totpair << " (";
375 printPct(cerr, totpair, totread);
376 cerr << ") were paired; of these:" << endl;
377
378 // Concordants
379 cerr << " " << met.nconcord_0 << " (";
380 printPct(cerr, met.nconcord_0, met.npaired);
381 cerr << ") aligned concordantly 0 times" << endl;
382 if(canRep) {
383 // Print the number that aligned concordantly exactly once
384 assert_eq(met.nconcord_uni, met.nconcord_uni1+met.nconcord_uni2);
385 cerr << " " << met.nconcord_uni1 << " (";
386 printPct(cerr, met.nconcord_uni1, met.npaired);
387 cerr << ") aligned concordantly exactly 1 time" << endl;
388
389 // Print the number that aligned concordantly more than once but
390 // fewer times than the limit
391
392 cerr << " " << met.nconcord_uni2+met.nconcord_rep << " (";
393 printPct(cerr, met.nconcord_uni2+met.nconcord_rep, met.npaired);
394 cerr << ") aligned concordantly >1 times" << endl;
395 } else {
396 // Print the number that aligned concordantly exactly once
397 assert_eq(met.nconcord_uni, met.nconcord_uni1+met.nconcord_uni2);
398 cerr << " " << met.nconcord_uni1 << " (";
399 printPct(cerr, met.nconcord_uni1, met.npaired);
400 cerr << ") aligned concordantly exactly 1 time" << endl;
401
402 // Print the number that aligned concordantly more than once
403 cerr << " " << met.nconcord_uni2 << " (";
404 printPct(cerr, met.nconcord_uni2, met.npaired);
405 cerr << ") aligned concordantly >1 times" << endl;
406 }
407 if(discord) {
408 // TODO: what about discoardant and on separate chromosomes?
409
410 // Bring out the unaligned pair total so we can subtract discordants
411 cerr << " ----" << endl;
412 cerr << " " << met.nconcord_0
413 << " pairs aligned concordantly 0 times; of these:" << endl;
414 // Discordants
415 cerr << " " << met.ndiscord << " (";
416 printPct(cerr, met.ndiscord, met.nconcord_0);
417 cerr << ") aligned discordantly 1 time" << endl;
418 }
419 uint64_t ncondiscord_0 = met.nconcord_0 - met.ndiscord;
420 if(mixed) {
421 // Bring out the unaligned pair total so we can subtract discordants
422 cerr << " ----" << endl;
423 cerr << " " << ncondiscord_0
424 << " pairs aligned 0 times concordantly or discordantly; of these:" << endl;
425 cerr << " " << (ncondiscord_0 * 2) << " mates make up the pairs; of these:" << endl;
426 cerr << " " << met.nunp_0_0 << " " << "(";
427 printPct(cerr, met.nunp_0_0, ncondiscord_0 * 2);
428 cerr << ") aligned 0 times" << endl;
429 if(canRep) {
430 // Print the number that aligned exactly once
431 assert_eq(met.nunp_0_uni, met.nunp_0_uni1+met.nunp_0_uni2);
432 cerr << " " << met.nunp_0_uni1 << " (";
433 printPct(cerr, met.nunp_0_uni1, ncondiscord_0 * 2);
434 cerr << ") aligned exactly 1 time" << endl;
435
436 // Print the number that aligned more than once but fewer times
437 // than the limit
438 cerr << " " << met.nunp_0_uni2+met.nunp_0_rep << " (";
439 printPct(cerr, met.nunp_0_uni2+met.nunp_0_rep, ncondiscord_0 * 2);
440 cerr << ") aligned >1 times" << endl;
441 } else {
442 // Print the number that aligned exactly once
443 assert_eq(met.nunp_0_uni, met.nunp_0_uni1+met.nunp_0_uni2);
444 cerr << " " << met.nunp_0_uni1 << " (";
445 printPct(cerr, met.nunp_0_uni1, ncondiscord_0 * 2);
446 cerr << ") aligned exactly 1 time" << endl;
447
448 // Print the number that aligned more than once but fewer times
449 // than the limit
450 cerr << " " << met.nunp_0_uni2 << " (";
451 printPct(cerr, met.nunp_0_uni2, ncondiscord_0 * 2);
452 cerr << ") aligned >1 times" << endl;
453 }
454
455 //if(canRep) {
456 // // Bring out the repetitively aligned pair total so we can subtract discordants
457 // cerr << " ----" << endl;
458 // cerr << " " << met.nconcord_rep
459 // << " pairs aligned concordantly >" << repThresh
460 // << " times; of these:" << endl;
461 // cerr << " " << (met.nconcord_rep * 2) << " mates make up the pairs; of these:" << endl;
462 //
463 // cerr << " " << met.nunp_rep_0 << " (";
464 // printPct(cerr, met.nunp_rep_0, met.nconcord_rep * 2);
465 // cerr << ") aligned 0 times" << endl;
466 //
467 // cerr << " " << met.nunp_rep_uni << " (";
468 // printPct(cerr, met.nunp_rep_uni, met.nconcord_rep * 2);
469 // cerr << ") aligned >0 and <=" << repThresh << " times" << endl;
470 //
471 // cerr << " " << met.nunp_rep_rep << " (";
472 // printPct(cerr, met.nunp_rep_rep, met.nconcord_rep * 2);
473 // cerr << ") aligned >" << repThresh << " times" << endl;
474 //}
475 }
476 }
477 uint64_t totunpair = met.nunpaired;
478 if(totunpair > 0) {
479 // Unpaired output
480 cerr << " " << totunpair << " (";
481 printPct(cerr, totunpair, totread);
482 cerr << ") were unpaired; of these:" << endl;
483
484 cerr << " " << met.nunp_0 << " (";
485 printPct(cerr, met.nunp_0, met.nunpaired);
486 cerr << ") aligned 0 times" << endl;
487 if(hadoopOut) {
488 cerr << "reporter:counter:Bowtie 2,Unpaired reads with 0 alignments,"
489 << met.nunpaired << endl;
490 }
491
492 if(canRep) {
493 // Print the number that aligned exactly once
494 assert_eq(met.nunp_uni, met.nunp_uni1+met.nunp_uni2);
495 cerr << " " << met.nunp_uni1 << " (";
496 printPct(cerr, met.nunp_uni1, met.nunpaired);
497 cerr << ") aligned exactly 1 time" << endl;
498
499 // Print the number that aligned more than once but fewer times
500 // than the limit
501 cerr << " " << met.nunp_uni2+met.nunp_rep << " (";
502 printPct(cerr, met.nunp_uni2+met.nunp_rep, met.nunpaired);
503 cerr << ") aligned >1 times" << endl;
504 } else {
505 // Print the number that aligned exactly once
506 assert_eq(met.nunp_uni, met.nunp_uni1+met.nunp_uni2);
507 cerr << " " << met.nunp_uni1 << " (";
508 printPct(cerr, met.nunp_uni1, met.nunpaired);
509 cerr << ") aligned exactly 1 time" << endl;
510
511 // Print the number that aligned more than once
512 cerr << " " << met.nunp_uni2 << " (";
513 printPct(cerr, met.nunp_uni2, met.nunpaired);
514 cerr << ") aligned >1 times" << endl;
515 }
516 }
517 uint64_t tot_al_cand = totunpair + totpair*2;
518 uint64_t tot_al =
519 (met.nconcord_uni + met.nconcord_rep)*2 +
520 (met.ndiscord)*2 +
521 met.nunp_0_uni +
522 met.nunp_0_rep +
523 met.nunp_uni +
524 met.nunp_rep;
525 assert_leq(tot_al, tot_al_cand);
526 printPct(cerr, tot_al, tot_al_cand);
527 cerr << " overall alignment rate" << endl;
528 }
529
530 /**
531 * Return true iff the read in rd1/rd2 matches the last read handled, which
532 * should still be in rd1_/rd2_.
533 */
sameRead(const Read * rd1,const Read * rd2,bool qualitiesMatter)534 bool AlnSinkWrap::sameRead(
535 // One of the other of rd1, rd2 will = NULL if read is unpaired
536 const Read* rd1, // new mate #1
537 const Read* rd2, // new mate #2
538 bool qualitiesMatter) // aln policy distinguishes b/t quals?
539 {
540 bool same = false;
541 if(rd1_ != NULL || rd2_ != NULL) {
542 // This is not the first time the sink was initialized with
543 // a read. Check if new read/pair is identical to previous
544 // read/pair
545 if((rd1_ == NULL) == (rd1 == NULL) &&
546 (rd2_ == NULL) == (rd2 == NULL))
547 {
548 bool m1same = (rd1 == NULL && rd1_ == NULL);
549 if(!m1same) {
550 assert(rd1 != NULL);
551 assert(rd1_ != NULL);
552 m1same = Read::same(
553 rd1->patFw, // new seq
554 rd1->qual, // new quals
555 rd1_->patFw, // old seq
556 rd1_->qual, // old quals
557 qualitiesMatter);
558 }
559 if(m1same) {
560 bool m2same = (rd2 == NULL && rd2_ == NULL);
561 if(!m2same) {
562 m2same = Read::same(
563 rd2->patFw, // new seq
564 rd2->qual, // new quals
565 rd2_->patFw, // old seq
566 rd2_->qual, // old quals
567 qualitiesMatter);
568 }
569 same = m2same;
570 }
571 }
572 }
573 return same;
574 }
575
576 /**
577 * Initialize the wrapper with a new read pair and return an integer >= -1
578 * indicating which stage the aligner should start at. If -1 is returned, the
579 * aligner can skip the read entirely. Checks if the new read pair is
580 * identical to the previous pair. If it is, then we return the id of the
581 * first stage to run.
582 */
nextRead(const Read * rd1,const Read * rd2,TReadId rdid,bool qualitiesMatter)583 int AlnSinkWrap::nextRead(
584 // One of the other of rd1, rd2 will = NULL if read is unpaired
585 const Read* rd1, // new mate #1
586 const Read* rd2, // new mate #2
587 TReadId rdid, // read ID for new pair
588 bool qualitiesMatter) // aln policy distinguishes b/t quals?
589 {
590 assert(!init_);
591 assert(rd1 != NULL || rd2 != NULL);
592 init_ = true;
593 // Keep copy of new read, so that we can compare it with the
594 // next one
595 if(rd1 != NULL) {
596 rd1_ = rd1;
597 } else rd1_ = NULL;
598 if(rd2 != NULL) {
599 rd2_ = rd2;
600 } else rd2_ = NULL;
601 rdid_ = rdid;
602 // Caller must now align the read
603 maxed1_ = false;
604 maxed2_ = false;
605 maxedOverall_ = false;
606 bestPair_ = best2Pair_ =
607 bestUnp1_ = best2Unp1_ =
608 bestUnp2_ = best2Unp2_ = std::numeric_limits<THitInt>::min();
609 rs1_.clear(); // clear out paired-end alignments
610 rs2_.clear(); // clear out paired-end alignments
611 rs1u_.clear(); // clear out unpaired alignments for mate #1
612 rs2u_.clear(); // clear out unpaired alignments for mate #2
613 st_.nextRead(readIsPair()); // reset state
614 assert(empty());
615 assert(!maxed());
616 // Start from the first stage
617 return 0;
618 }
619
620 /**
621 * Inform global, shared AlnSink object that we're finished with this read.
622 * The global AlnSink is responsible for updating counters, creating the output
623 * record, and delivering the record to the appropriate output stream.
624 *
625 * What gets reported for a paired-end alignment?
626 *
627 * 1. If there are reportable concordant alignments, report those and stop
628 * 2. If there are reportable discordant alignments, report those and stop
629 * 3. If unpaired alignments can be reported:
630 * 3a. Report
631 #
632 * Update metrics. Only ambiguity is: what if a pair aligns repetitively and
633 * one of its mates aligns uniquely?
634 *
635 * uint64_t al; // # mates w/ >= 1 reported alignment
636 * uint64_t unal; // # mates w/ 0 alignments
637 * uint64_t max; // # mates withheld for exceeding -M/-m ceiling
638 * uint64_t al_concord; // # pairs w/ >= 1 concordant alignment
639 * uint64_t al_discord; // # pairs w/ >= 1 discordant alignment
640 * uint64_t max_concord; // # pairs maxed out
641 * uint64_t unal_pair; // # pairs where neither mate aligned
642 */
finishRead(const SeedResults * sr1,const SeedResults * sr2,bool exhaust1,bool exhaust2,bool nfilt1,bool nfilt2,bool scfilt1,bool scfilt2,bool lenfilt1,bool lenfilt2,bool qcfilt1,bool qcfilt2,RandomSource & rnd,ReportingMetrics & met,const PerReadMetrics & prm,const Scoring & sc,bool suppressSeedSummary,bool suppressAlignments,bool scUnMapped,bool xeq)643 void AlnSinkWrap::finishRead(
644 const SeedResults *sr1, // seed alignment results for mate 1
645 const SeedResults *sr2, // seed alignment results for mate 2
646 bool exhaust1, // mate 1 exhausted?
647 bool exhaust2, // mate 2 exhausted?
648 bool nfilt1, // mate 1 N-filtered?
649 bool nfilt2, // mate 2 N-filtered?
650 bool scfilt1, // mate 1 score-filtered?
651 bool scfilt2, // mate 2 score-filtered?
652 bool lenfilt1, // mate 1 length-filtered?
653 bool lenfilt2, // mate 2 length-filtered?
654 bool qcfilt1, // mate 1 qc-filtered?
655 bool qcfilt2, // mate 2 qc-filtered?
656 RandomSource& rnd, // pseudo-random generator
657 ReportingMetrics& met, // reporting metrics
658 const PerReadMetrics& prm, // per-read metrics
659 const Scoring& sc, // scoring scheme
660 bool suppressSeedSummary, // = true
661 bool suppressAlignments, // = false
662 bool scUnMapped, // = false
663 bool xeq) // = false
664 {
665 obuf_.clear();
666 OutputQueueMark qqm(g_.outq(), obuf_, rdid_, threadid_);
667 assert(init_);
668 if(!suppressSeedSummary) {
669 if(sr1 != NULL) {
670 assert(rd1_ != NULL);
671 // Mate exists and has non-empty SeedResults
672 g_.reportSeedSummary(obuf_, *rd1_, rdid_, threadid_, *sr1, true);
673 } else if(rd1_ != NULL) {
674 // Mate exists but has NULL SeedResults
675 g_.reportEmptySeedSummary(obuf_, *rd1_, rdid_, true);
676 }
677 if(sr2 != NULL) {
678 assert(rd2_ != NULL);
679 // Mate exists and has non-empty SeedResults
680 g_.reportSeedSummary(obuf_, *rd2_, rdid_, threadid_, *sr2, true);
681 } else if(rd2_ != NULL) {
682 // Mate exists but has NULL SeedResults
683 g_.reportEmptySeedSummary(obuf_, *rd2_, rdid_, true);
684 }
685 }
686 if(!suppressAlignments) {
687 // Ask the ReportingState what to report
688 st_.finish();
689 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
690 bool pairMax = false, unpair1Max = false, unpair2Max = false;
691 st_.getReport(
692 nconcord,
693 ndiscord,
694 nunpair1,
695 nunpair2,
696 pairMax,
697 unpair1Max,
698 unpair2Max);
699 assert_leq(nconcord, rs1_.size());
700 assert_leq(nunpair1, rs1u_.size());
701 assert_leq(nunpair2, rs2u_.size());
702 assert_leq(ndiscord, 1);
703 assert_gt(rp_.khits, 0);
704 assert_gt(rp_.mhits, 0);
705 assert(!pairMax || rs1_.size() >= (uint64_t)rp_.mhits);
706 assert(!unpair1Max || rs1u_.size() >= (uint64_t)rp_.mhits);
707 assert(!unpair2Max || rs2u_.size() >= (uint64_t)rp_.mhits);
708 met.nread++;
709 if(readIsPair()) {
710 met.npaired++;
711 } else {
712 met.nunpaired++;
713 }
714 // Report concordant paired-end alignments if possible
715 if(nconcord > 0) {
716 AlnSetSumm concordSumm(
717 rd1_, rd2_, &rs1_, &rs2_, &rs1u_, &rs2u_,
718 exhaust1, exhaust2, -1, -1);
719 // Sort by score then pick from low to high
720 AlnScore bestUScore, bestP1Score, bestP2Score, bestCScore;
721 AlnScore bestUDist, bestP1Dist, bestP2Dist, bestCDist;
722 AlnScore bestUnchosenUScore, bestUnchosenP1Score, bestUnchosenP2Score, bestUnchosenCScore;
723 AlnScore bestUnchosenUDist, bestUnchosenP1Dist, bestUnchosenP2Dist, bestUnchosenCDist;
724 // TODO: should probably package these variables up so it's not
725 // such a pain to pass them around
726 size_t off = selectByScore(
727 &rs1_, &rs2_,
728 nconcord, select1_,
729 &rs1u_, &rs2u_,
730 bestUScore,
731 bestUDist,
732 bestP1Score,
733 bestP1Dist,
734 bestP2Score,
735 bestP2Dist,
736 bestCScore,
737 bestCDist,
738 bestUnchosenUScore,
739 bestUnchosenUDist,
740 bestUnchosenP1Score,
741 bestUnchosenP1Dist,
742 bestUnchosenP2Score,
743 bestUnchosenP2Dist,
744 bestUnchosenCScore,
745 bestUnchosenCDist,
746 rnd);
747 concordSumm.setBest(
748 bestUScore,
749 bestUDist,
750 bestP1Score,
751 bestP1Dist,
752 bestP2Score,
753 bestP2Dist,
754 bestCScore,
755 bestCDist,
756 bestUnchosenUScore,
757 bestUnchosenUDist,
758 bestUnchosenP1Score,
759 bestUnchosenP1Dist,
760 bestUnchosenP2Score,
761 bestUnchosenP2Dist,
762 bestUnchosenCScore,
763 bestUnchosenCDist);
764 assert(concordSumm.bestScore(true).valid());
765 assert(concordSumm.bestScore(false).valid());
766 assert_lt(off, rs1_.size());
767 const AlnRes *rs1 = &rs1_[off];
768 const AlnRes *rs2 = &rs2_[off];
769 AlnFlags flags1(
770 ALN_FLAG_PAIR_CONCORD_MATE1,
771 st_.params().mhitsSet(),
772 unpair1Max,
773 pairMax,
774 nfilt1,
775 scfilt1,
776 lenfilt1,
777 qcfilt1,
778 st_.params().mixed,
779 true, // primary
780 true, // opp aligned
781 rs2->fw(), // opp fw
782 scUnMapped,
783 xeq);
784 AlnFlags flags2(
785 ALN_FLAG_PAIR_CONCORD_MATE2,
786 st_.params().mhitsSet(),
787 unpair2Max,
788 pairMax,
789 nfilt2,
790 scfilt2,
791 lenfilt2,
792 qcfilt2,
793 st_.params().mixed,
794 false, // primary
795 true, // opp aligned
796 rs1->fw(), // opp fw
797 scUnMapped,
798 xeq);
799 // Issue: we only set the flags once, but some of the flags might
800 // vary from pair to pair among the pairs we're reporting. For
801 // instance, whether the a given mate aligns to the forward strand.
802 SeedAlSumm ssm1, ssm2;
803 sr1->toSeedAlSumm(ssm1);
804 sr2->toSeedAlSumm(ssm2);
805 for(size_t i = 0; i < rs1_.size(); i++) {
806 rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1);
807 rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2);
808 assert_eq(abs(rs1_[i].fragmentLength()), abs(rs2_[i].fragmentLength()));
809 }
810 assert(!select1_.empty());
811 g_.reportHits(
812 obuf_,
813 staln_,
814 threadid_,
815 rd1_,
816 rd2_,
817 rdid_,
818 select1_,
819 NULL,
820 &rs1_,
821 &rs2_,
822 pairMax,
823 concordSumm,
824 ssm1,
825 ssm2,
826 &flags1,
827 &flags2,
828 prm,
829 mapq_,
830 sc,
831 false);
832 if(pairMax) {
833 met.nconcord_rep++;
834 } else {
835 met.nconcord_uni++;
836 assert(!rs1_.empty());
837 if(rs1_.size() == 1) {
838 met.nconcord_uni1++;
839 } else {
840 met.nconcord_uni2++;
841 }
842 }
843 init_ = false;
844 //g_.outq().finishRead(obuf_, rdid_, threadid_);
845 return;
846 }
847 // Report disconcordant paired-end alignments if possible
848 else if(ndiscord > 0) {
849 ASSERT_ONLY(bool ret =) prepareDiscordants();
850 assert(ret);
851 assert_eq(1, rs1_.size());
852 assert_eq(1, rs2_.size());
853 AlnSetSumm discordSumm(
854 rd1_, rd2_, &rs1_, &rs2_, &rs1u_, &rs2u_,
855 exhaust1, exhaust2, -1, -1);
856 const AlnRes *rs1 = &rs1_[0];
857 const AlnRes *rs2 = &rs2_[0];
858 AlnFlags flags1(
859 ALN_FLAG_PAIR_DISCORD_MATE1,
860 st_.params().mhitsSet(),
861 false,
862 pairMax,
863 nfilt1,
864 scfilt1,
865 lenfilt1,
866 qcfilt1,
867 st_.params().mixed,
868 true, // primary
869 true, // opp aligned
870 rs2->fw(), // opp fw
871 scUnMapped,
872 xeq);
873 AlnFlags flags2(
874 ALN_FLAG_PAIR_DISCORD_MATE2,
875 st_.params().mhitsSet(),
876 false,
877 pairMax,
878 nfilt2,
879 scfilt2,
880 lenfilt2,
881 qcfilt2,
882 st_.params().mixed,
883 false, // primary
884 true, // opp aligned
885 rs1->fw(), // opp fw
886 scUnMapped,
887 xeq);
888 SeedAlSumm ssm1, ssm2;
889 sr1->toSeedAlSumm(ssm1);
890 sr2->toSeedAlSumm(ssm2);
891 for(size_t i = 0; i < rs1_.size(); i++) {
892 rs1_[i].setMateParams(ALN_RES_TYPE_MATE1, &rs2_[i], flags1);
893 rs2_[i].setMateParams(ALN_RES_TYPE_MATE2, &rs1_[i], flags2);
894 assert(rs1_[i].isFraglenSet() == rs2_[i].isFraglenSet());
895 assert(!rs1_[i].isFraglenSet() || abs(rs1_[i].fragmentLength()) == abs(rs2_[i].fragmentLength()));
896 }
897 AlnScore bestUScore, bestP1Score, bestP2Score, bestCScore;
898 AlnScore bestUDist, bestP1Dist, bestP2Dist, bestCDist;
899 AlnScore bestUnchosenUScore, bestUnchosenP1Score, bestUnchosenP2Score, bestUnchosenCScore;
900 AlnScore bestUnchosenUDist, bestUnchosenP1Dist, bestUnchosenP2Dist, bestUnchosenCDist;
901 ASSERT_ONLY(size_t off =) selectByScore(
902 &rs1_, &rs2_,
903 ndiscord, select1_,
904 &rs1u_, &rs2u_,
905 bestUScore,
906 bestUDist,
907 bestP1Score,
908 bestP1Dist,
909 bestP2Score,
910 bestP2Dist,
911 bestCScore,
912 bestCDist,
913 bestUnchosenUScore,
914 bestUnchosenUDist,
915 bestUnchosenP1Score,
916 bestUnchosenP1Dist,
917 bestUnchosenP2Score,
918 bestUnchosenP2Dist,
919 bestUnchosenCScore,
920 bestUnchosenCDist,
921 rnd);
922 discordSumm.setBest(
923 bestUScore,
924 bestUDist,
925 bestP1Score,
926 bestP1Dist,
927 bestP2Score,
928 bestP2Dist,
929 bestCScore,
930 bestCDist,
931 bestUnchosenUScore,
932 bestUnchosenUDist,
933 bestUnchosenP1Score,
934 bestUnchosenP1Dist,
935 bestUnchosenP2Score,
936 bestUnchosenP2Dist,
937 bestUnchosenCScore,
938 bestUnchosenCDist);
939 assert_eq(0, off);
940 assert(!select1_.empty());
941 g_.reportHits(
942 obuf_,
943 staln_,
944 threadid_,
945 rd1_,
946 rd2_,
947 rdid_,
948 select1_,
949 NULL,
950 &rs1_,
951 &rs2_,
952 pairMax,
953 discordSumm,
954 ssm1,
955 ssm2,
956 &flags1,
957 &flags2,
958 prm,
959 mapq_,
960 sc,
961 false);
962 met.nconcord_0++;
963 met.ndiscord++;
964 init_ = false;
965 //g_.outq().finishRead(obuf_, rdid_, threadid_);
966 return;
967 }
968 // If we're at this point, at least one mate failed to align.
969 // BTL: That's not true. It could be that there are no concordant
970 // alignments but both mates have unpaired alignments, with one of
971 // the mates having more than one.
972 //assert(nunpair1 == 0 || nunpair2 == 0);
973 assert(!pairMax);
974
975 // Update counters given that one mate didn't align
976 if(readIsPair()) {
977 met.nconcord_0++;
978 }
979 if(rd1_ != NULL) {
980 if(nunpair1 > 0) {
981 // Update counters
982 if(readIsPair()) {
983 if(unpair1Max) met.nunp_0_rep++;
984 else {
985 met.nunp_0_uni++;
986 assert(!rs1u_.empty());
987 if(rs1u_.size() == 1) {
988 met.nunp_0_uni1++;
989 } else {
990 met.nunp_0_uni2++;
991 }
992 }
993 } else {
994 if(unpair1Max) met.nunp_rep++;
995 else {
996 met.nunp_uni++;
997 assert(!rs1u_.empty());
998 if(rs1u_.size() == 1) {
999 met.nunp_uni1++;
1000 } else {
1001 met.nunp_uni2++;
1002 }
1003 }
1004 }
1005 } else if(unpair1Max) {
1006 // Update counters
1007 if(readIsPair()) met.nunp_0_rep++;
1008 else met.nunp_rep++;
1009 } else {
1010 // Update counters
1011 if(readIsPair()) met.nunp_0_0++;
1012 else met.nunp_0++;
1013 }
1014 }
1015 if(rd2_ != NULL) {
1016 if(nunpair2 > 0) {
1017 // Update counters
1018 if(readIsPair()) {
1019 if(unpair2Max) met.nunp_0_rep++;
1020 else {
1021 assert(!rs2u_.empty());
1022 met.nunp_0_uni++;
1023 if(rs2u_.size() == 1) {
1024 met.nunp_0_uni1++;
1025 } else {
1026 met.nunp_0_uni2++;
1027 }
1028 }
1029 } else {
1030 if(unpair2Max) met.nunp_rep++;
1031 else {
1032 assert(!rs2u_.empty());
1033 met.nunp_uni++;
1034 if(rs2u_.size() == 1) {
1035 met.nunp_uni1++;
1036 } else {
1037 met.nunp_uni2++;
1038 }
1039 }
1040 }
1041 } else if(unpair2Max) {
1042 // Update counters
1043 if(readIsPair()) met.nunp_0_rep++;
1044 else met.nunp_rep++;
1045 } else {
1046 // Update counters
1047 if(readIsPair()) met.nunp_0_0++;
1048 else met.nunp_0++;
1049 }
1050 }
1051
1052 const AlnRes *repRs1 = NULL, *repRs2 = NULL;
1053 AlnSetSumm summ1, summ2;
1054 AlnFlags flags1, flags2;
1055 TRefId refid = -1; TRefOff refoff = -1;
1056 bool rep1 = rd1_ != NULL && nunpair1 > 0;
1057 bool rep2 = rd2_ != NULL && nunpair2 > 0;
1058
1059 // This is the preliminary if statement for mate 1 - here we're
1060 // gathering some preliminary information, making it possible to call
1061 // g_.reportHits(...) with information about both mates potentially
1062 if(rep1) {
1063 // Mate 1 aligned at least once
1064 summ1.init(
1065 rd1_, NULL, NULL, NULL, &rs1u_, NULL,
1066 exhaust1, exhaust2, -1, -1);
1067 // Sort by score then pick from low to high
1068 AlnScore bestUScore, bestP1Score, bestP2Score, bestCScore;
1069 AlnScore bestUDist, bestP1Dist, bestP2Dist, bestCDist;
1070 AlnScore bestUnchosenUScore, bestUnchosenP1Score, bestUnchosenP2Score, bestUnchosenCScore;
1071 AlnScore bestUnchosenUDist, bestUnchosenP1Dist, bestUnchosenP2Dist, bestUnchosenCDist;
1072 size_t off = selectByScore(
1073 &rs1u_, NULL, nunpair1, select1_, NULL, NULL,
1074 bestUScore,
1075 bestUDist,
1076 bestP1Score,
1077 bestP1Dist,
1078 bestP2Score,
1079 bestP2Dist,
1080 bestCScore,
1081 bestCDist,
1082 bestUnchosenUScore,
1083 bestUnchosenUDist,
1084 bestUnchosenP1Score,
1085 bestUnchosenP1Dist,
1086 bestUnchosenP2Score,
1087 bestUnchosenP2Dist,
1088 bestUnchosenCScore,
1089 bestUnchosenCDist,
1090 rnd);
1091 summ1.setBest(
1092 bestUScore,
1093 bestUDist,
1094 bestP1Score,
1095 bestP1Dist,
1096 bestP2Score,
1097 bestP2Dist,
1098 bestCScore,
1099 bestCDist,
1100 bestUnchosenUScore,
1101 bestUnchosenUDist,
1102 bestUnchosenP1Score,
1103 bestUnchosenP1Dist,
1104 bestUnchosenP2Score,
1105 bestUnchosenP2Dist,
1106 bestUnchosenCScore,
1107 bestUnchosenCDist);
1108 repRs1 = &rs1u_[off];
1109 } else if(rd1_ != NULL) {
1110 // Mate 1 failed to align - don't do anything yet. First we want
1111 // to collect information on mate 2 in case that factors into the
1112 // summary
1113 assert(!unpair1Max);
1114 }
1115
1116 if(rep2) {
1117 summ2.init(
1118 NULL, rd2_, NULL, NULL, NULL, &rs2u_,
1119 exhaust1, exhaust2, -1, -1);
1120 // Sort by score then pick from low to high
1121 AlnScore bestUScore, bestP1Score, bestP2Score, bestCScore;
1122 AlnScore bestUDist, bestP1Dist, bestP2Dist, bestCDist;
1123 AlnScore bestUnchosenUScore, bestUnchosenP1Score, bestUnchosenP2Score, bestUnchosenCScore;
1124 AlnScore bestUnchosenUDist, bestUnchosenP1Dist, bestUnchosenP2Dist, bestUnchosenCDist;
1125 size_t off = selectByScore(
1126 &rs2u_, NULL, nunpair2, select2_, NULL, NULL,
1127 bestUScore,
1128 bestUDist,
1129 bestP1Score,
1130 bestP1Dist,
1131 bestP2Score,
1132 bestP2Dist,
1133 bestCScore,
1134 bestCDist,
1135 bestUnchosenUScore,
1136 bestUnchosenUDist,
1137 bestUnchosenP1Score,
1138 bestUnchosenP1Dist,
1139 bestUnchosenP2Score,
1140 bestUnchosenP2Dist,
1141 bestUnchosenCScore,
1142 bestUnchosenCDist,
1143 rnd);
1144 summ2.setBest(
1145 bestUScore,
1146 bestUDist,
1147 bestP1Score,
1148 bestP1Dist,
1149 bestP2Score,
1150 bestP2Dist,
1151 bestCScore,
1152 bestCDist,
1153 bestUnchosenUScore,
1154 bestUnchosenUDist,
1155 bestUnchosenP1Score,
1156 bestUnchosenP1Dist,
1157 bestUnchosenP2Score,
1158 bestUnchosenP2Dist,
1159 bestUnchosenCScore,
1160 bestUnchosenCDist);
1161 repRs2 = &rs2u_[off];
1162 } else if(rd2_ != NULL) {
1163 // Mate 2 failed to align - don't do anything yet. First we want
1164 // to collect information on mate 1 in case that factors into the
1165 // summary
1166 assert(!unpair2Max);
1167 }
1168
1169 // Now set up flags
1170 if(rep1) {
1171 // Initialize flags. Note: We want to have information about how
1172 // the other mate aligned (if it did) at this point
1173 flags1.init(
1174 readIsPair() ?
1175 ALN_FLAG_PAIR_UNPAIRED_MATE1 :
1176 ALN_FLAG_PAIR_UNPAIRED,
1177 st_.params().mhitsSet(),
1178 unpair1Max,
1179 pairMax,
1180 nfilt1,
1181 scfilt1,
1182 lenfilt1,
1183 qcfilt1,
1184 st_.params().mixed,
1185 true, // primary
1186 repRs2 != NULL, // opp aligned
1187 repRs2 == NULL || repRs2->fw(), // opp fw
1188 scUnMapped,
1189 xeq);
1190 for(size_t i = 0; i < rs1u_.size(); i++) {
1191 rs1u_[i].setMateParams(ALN_RES_TYPE_UNPAIRED_MATE1, NULL, flags1);
1192 }
1193 }
1194 if(rep2) {
1195 // Initialize flags. Note: We want to have information about how
1196 // the other mate aligned (if it did) at this point
1197 flags2.init(
1198 readIsPair() ?
1199 ALN_FLAG_PAIR_UNPAIRED_MATE2 :
1200 ALN_FLAG_PAIR_UNPAIRED,
1201 st_.params().mhitsSet(),
1202 unpair2Max,
1203 pairMax,
1204 nfilt2,
1205 scfilt2,
1206 lenfilt2,
1207 qcfilt2,
1208 st_.params().mixed,
1209 true, // primary
1210 repRs1 != NULL, // opp aligned
1211 repRs1 == NULL || repRs1->fw(), // opp fw
1212 scUnMapped,
1213 xeq);
1214 for(size_t i = 0; i < rs2u_.size(); i++) {
1215 rs2u_[i].setMateParams(ALN_RES_TYPE_UNPAIRED_MATE2, NULL, flags2);
1216 }
1217 }
1218
1219 // Now report mate 1
1220 if(rep1) {
1221 SeedAlSumm ssm1, ssm2;
1222 if(sr1 != NULL) sr1->toSeedAlSumm(ssm1);
1223 if(sr2 != NULL) sr2->toSeedAlSumm(ssm2);
1224 assert(!select1_.empty());
1225 g_.reportHits(
1226 obuf_,
1227 staln_,
1228 threadid_,
1229 rd1_,
1230 repRs2 != NULL ? rd2_ : NULL,
1231 rdid_,
1232 select1_,
1233 repRs2 != NULL ? &select2_ : NULL,
1234 &rs1u_,
1235 repRs2 != NULL ? &rs2u_ : NULL,
1236 unpair1Max,
1237 summ1,
1238 ssm1,
1239 ssm2,
1240 &flags1,
1241 repRs2 != NULL ? &flags2 : NULL,
1242 prm,
1243 mapq_,
1244 sc,
1245 false);
1246 assert_lt(select1_[0], rs1u_.size());
1247 refid = rs1u_[select1_[0]].refid();
1248 refoff = rs1u_[select1_[0]].refoff();
1249 }
1250
1251 // Now report mate 2
1252 //if(rep2 && !rep1) {
1253 if(rep2) {
1254 SeedAlSumm ssm1, ssm2;
1255 if(sr1 != NULL) sr1->toSeedAlSumm(ssm1);
1256 if(sr2 != NULL) sr2->toSeedAlSumm(ssm2);
1257 assert(!select2_.empty());
1258 g_.reportHits(
1259 obuf_,
1260 staln_,
1261 threadid_,
1262 rd2_,
1263 repRs1 != NULL ? rd1_ : NULL,
1264 rdid_,
1265 select2_,
1266 repRs1 != NULL ? &select1_ : NULL,
1267 &rs2u_,
1268 repRs1 != NULL ? &rs1u_ : NULL,
1269 unpair2Max,
1270 summ2,
1271 ssm1,
1272 ssm2,
1273 &flags2,
1274 repRs1 != NULL ? &flags1 : NULL,
1275 prm,
1276 mapq_,
1277 sc,
1278 false);
1279 assert_lt(select2_[0], rs2u_.size());
1280 refid = rs2u_[select2_[0]].refid();
1281 refoff = rs2u_[select2_[0]].refoff();
1282 }
1283
1284 if(rd1_ != NULL && nunpair1 == 0) {
1285 if(nunpair2 > 0) {
1286 assert_neq(-1, refid);
1287 summ1.init(
1288 rd1_, NULL, NULL, NULL, NULL, NULL,
1289 exhaust1, exhaust2, refid, refoff);
1290 } else {
1291 summ1.init(
1292 rd1_, NULL, NULL, NULL, NULL, NULL,
1293 exhaust1, exhaust2, -1, -1);
1294 }
1295 SeedAlSumm ssm1, ssm2;
1296 if(sr1 != NULL) sr1->toSeedAlSumm(ssm1);
1297 if(sr2 != NULL) sr2->toSeedAlSumm(ssm2);
1298 flags1.init(
1299 readIsPair() ?
1300 ALN_FLAG_PAIR_UNPAIRED_MATE1 :
1301 ALN_FLAG_PAIR_UNPAIRED,
1302 st_.params().mhitsSet(),
1303 false,
1304 false,
1305 nfilt1,
1306 scfilt1,
1307 lenfilt1,
1308 qcfilt1,
1309 st_.params().mixed,
1310 true, // primary
1311 repRs2 != NULL, // opp aligned
1312 (repRs2 != NULL) ? repRs2->fw() : false, // opp fw
1313 scUnMapped,
1314 xeq);
1315 g_.reportUnaligned(
1316 obuf_, // string to write output to
1317 staln_,
1318 threadid_,
1319 rd1_, // read 1
1320 NULL, // read 2
1321 rdid_, // read id
1322 summ1, // summ
1323 ssm1, //
1324 ssm2,
1325 &flags1, // flags 1
1326 NULL, // flags 2
1327 prm, // per-read metrics
1328 mapq_, // MAPQ calculator
1329 sc, // scoring scheme
1330 true); // get lock?
1331 }
1332 if(rd2_ != NULL && nunpair2 == 0) {
1333 if(nunpair1 > 0) {
1334 assert_neq(-1, refid);
1335 summ2.init(
1336 NULL, rd2_, NULL, NULL, NULL, NULL,
1337 exhaust1, exhaust2, refid, refoff);
1338 } else {
1339 summ2.init(
1340 NULL, rd2_, NULL, NULL, NULL, NULL,
1341 exhaust1, exhaust2, -1, -1);
1342 }
1343 SeedAlSumm ssm1, ssm2;
1344 if(sr1 != NULL) sr1->toSeedAlSumm(ssm1);
1345 if(sr2 != NULL) sr2->toSeedAlSumm(ssm2);
1346 flags2.init(
1347 readIsPair() ?
1348 ALN_FLAG_PAIR_UNPAIRED_MATE2 :
1349 ALN_FLAG_PAIR_UNPAIRED,
1350 st_.params().mhitsSet(),
1351 false,
1352 false,
1353 nfilt2,
1354 scfilt2,
1355 lenfilt2,
1356 qcfilt2,
1357 st_.params().mixed,
1358 true, // primary
1359 repRs1 != NULL, // opp aligned
1360 (repRs1 != NULL) ? repRs1->fw() : false, // opp fw
1361 scUnMapped,
1362 xeq);
1363 g_.reportUnaligned(
1364 obuf_, // string to write output to
1365 staln_,
1366 threadid_,
1367 rd2_, // read 1
1368 NULL, // read 2
1369 rdid_, // read id
1370 summ2, // summ
1371 ssm1,
1372 ssm2,
1373 &flags2, // flags 1
1374 NULL, // flags 2
1375 prm, // per-read metrics
1376 mapq_, // MAPQ calculator
1377 sc, // scoring scheme
1378 true); // get lock?
1379 }
1380 } // if(suppress alignments)
1381 init_ = false;
1382 return;
1383 }
1384
1385 /**
1386 * Called by the aligner when a new unpaired or paired alignment is
1387 * discovered in the given stage. This function checks whether the
1388 * addition of this alignment causes the reporting policy to be
1389 * violated (by meeting or exceeding the limits set by -k, -m, -M),
1390 * in which case true is returned immediately and the aligner is
1391 * short circuited. Otherwise, the alignment is tallied and false
1392 * is returned.
1393 */
report(int stage,const AlnRes * rs1,const AlnRes * rs2)1394 bool AlnSinkWrap::report(
1395 int stage,
1396 const AlnRes* rs1,
1397 const AlnRes* rs2)
1398 {
1399 assert(init_);
1400 assert(rs1 != NULL || rs2 != NULL);
1401 assert(rs1 == NULL || !rs1->empty());
1402 assert(rs2 == NULL || !rs2->empty());
1403 assert(rs1 == NULL || rs1->repOk());
1404 assert(rs2 == NULL || rs2->repOk());
1405 bool paired = (rs1 != NULL && rs2 != NULL);
1406 bool one = (rs1 != NULL);
1407 const AlnRes* rsa = one ? rs1 : rs2;
1408 const AlnRes* rsb = one ? rs2 : rs1;
1409 if(paired) {
1410 assert(readIsPair());
1411 st_.foundConcordant();
1412 rs1_.push_back(*rs1);
1413 rs2_.push_back(*rs2);
1414 } else {
1415 st_.foundUnpaired(one);
1416 if(one) {
1417 rs1u_.push_back(*rs1);
1418 } else {
1419 rs2u_.push_back(*rs2);
1420 }
1421 }
1422 // Tally overall alignment score
1423 TAlScore score = rsa->score().score();
1424 if(rsb != NULL) score += rsb->score().score();
1425 // Update best score so far
1426 if(paired) {
1427 if(score > bestPair_) {
1428 best2Pair_ = bestPair_;
1429 bestPair_ = score;
1430 } else if(score > best2Pair_) {
1431 best2Pair_ = score;
1432 }
1433 } else {
1434 if(one) {
1435 if(score > bestUnp1_) {
1436 best2Unp1_ = bestUnp1_;
1437 bestUnp1_ = score;
1438 } else if(score > best2Unp1_) {
1439 best2Unp1_ = score;
1440 }
1441 } else {
1442 if(score > bestUnp2_) {
1443 best2Unp2_ = bestUnp2_;
1444 bestUnp2_ = score;
1445 } else if(score > best2Unp2_) {
1446 best2Unp2_ = score;
1447 }
1448 }
1449 }
1450 return st_.done();
1451 }
1452
1453 /**
1454 * If there is a configuration of unpaired alignments that fits our
1455 * criteria for there being one or more discordant alignments, then
1456 * shift the discordant alignments over to the rs1_/rs2_ lists, clear the
1457 * rs1u_/rs2u_ lists and return true. Otherwise, return false.
1458 */
prepareDiscordants()1459 bool AlnSinkWrap::prepareDiscordants() {
1460 if(rs1u_.size() == 1 && rs2u_.size() == 1) {
1461 assert(rs1_.empty());
1462 assert(rs2_.empty());
1463 rs1_.push_back(rs1u_[0]);
1464 rs2_.push_back(rs2u_[0]);
1465 return true;
1466 }
1467 return false;
1468 }
1469
1470 /**
1471 * rs1 (possibly together with rs2 if reads are paired) are populated with
1472 * alignments. Here we prioritize them according to alignment score, and
1473 * some randomness to break ties. Priorities are returned in the 'select'
1474 * list.
1475 */
selectByScore(const EList<AlnRes> * rs1,const EList<AlnRes> * rs2,uint64_t num,EList<size_t> & select,const EList<AlnRes> * rs1u,const EList<AlnRes> * rs2u,AlnScore & bestUScore,AlnScore & bestUDist,AlnScore & bestP1Score,AlnScore & bestP1Dist,AlnScore & bestP2Score,AlnScore & bestP2Dist,AlnScore & bestCScore,AlnScore & bestCDist,AlnScore & bestUnchosenUScore,AlnScore & bestUnchosenUDist,AlnScore & bestUnchosenP1Score,AlnScore & bestUnchosenP1Dist,AlnScore & bestUnchosenP2Score,AlnScore & bestUnchosenP2Dist,AlnScore & bestUnchosenCScore,AlnScore & bestUnchosenCDist,RandomSource & rnd) const1476 size_t AlnSinkWrap::selectByScore(
1477 const EList<AlnRes>* rs1, // alignments to select from (mate 1)
1478 const EList<AlnRes>* rs2, // alignments to select from (mate 2, or NULL)
1479 uint64_t num, // number of alignments to select
1480 EList<size_t>& select, // prioritized list to put results in
1481 const EList<AlnRes>* rs1u, // alignments to select from (mate 1)
1482 const EList<AlnRes>* rs2u, // alignments to select from (mate 2, or NULL)
1483 AlnScore& bestUScore,
1484 AlnScore& bestUDist,
1485 AlnScore& bestP1Score,
1486 AlnScore& bestP1Dist,
1487 AlnScore& bestP2Score,
1488 AlnScore& bestP2Dist,
1489 AlnScore& bestCScore,
1490 AlnScore& bestCDist,
1491 AlnScore& bestUnchosenUScore,
1492 AlnScore& bestUnchosenUDist,
1493 AlnScore& bestUnchosenP1Score,
1494 AlnScore& bestUnchosenP1Dist,
1495 AlnScore& bestUnchosenP2Score,
1496 AlnScore& bestUnchosenP2Dist,
1497 AlnScore& bestUnchosenCScore,
1498 AlnScore& bestUnchosenCDist,
1499 RandomSource& rnd)
1500 const
1501 {
1502 assert(init_);
1503 assert(repOk());
1504 assert_gt(num, 0);
1505 assert(rs1 != NULL);
1506 assert(rs2 == NULL || rs1u != NULL);
1507 assert(rs2 == NULL || rs2u != NULL);
1508
1509 bestUScore.invalidate();
1510 bestUDist.invalidate();
1511 bestUnchosenUScore.invalidate();
1512 bestUnchosenUDist.invalidate();
1513
1514 bestCScore.invalidate();
1515 bestP1Score.invalidate();
1516 bestP2Score.invalidate();
1517 bestCDist.invalidate();
1518 bestP1Dist.invalidate();
1519 bestP2Dist.invalidate();
1520 bestUnchosenCScore.invalidate();
1521 bestUnchosenP1Score.invalidate();
1522 bestUnchosenP2Score.invalidate();
1523 bestUnchosenCDist.invalidate();
1524 bestUnchosenP1Dist.invalidate();
1525 bestUnchosenP2Dist.invalidate();
1526
1527 size_t sz = rs1->size(); // sz = # alignments found
1528 assert_leq(num, sz);
1529 if(sz < num) {
1530 num = sz;
1531 }
1532 // num = # to select
1533 if(sz == 0) {
1534 return 0;
1535 }
1536 select.resize((size_t)num);
1537 // Use 'selectBuf_' as a temporary list for sorting purposes
1538 EList<std::pair<AlnScore, size_t> >& buf =
1539 const_cast<EList<std::pair<AlnScore, size_t> >& >(selectBuf_);
1540 buf.resize(sz);
1541 // Sort by score. If reads are pairs, sort by sum of mate scores.
1542 for(size_t i = 0; i < sz; i++) {
1543 buf[i].first = (*rs1)[i].score();
1544 if(rs2 != NULL) {
1545 buf[i].first += (*rs2)[i].score();
1546 }
1547 buf[i].second = i; // original offset
1548 }
1549 buf.sort(); buf.reverse(); // sort in descending order by score
1550
1551 // Randomize streaks of alignments that are equal by score
1552 size_t streak = 0;
1553 for(size_t i = 1; i < buf.size(); i++) {
1554 if(buf[i].first == buf[i-1].first) {
1555 if(streak == 0) { streak = 1; }
1556 streak++;
1557 } else {
1558 if(streak > 1) {
1559 assert_geq(i, streak);
1560 buf.shufflePortion(i-streak, streak, rnd);
1561 }
1562 streak = 0;
1563 }
1564 }
1565 if(streak > 1) {
1566 buf.shufflePortion(buf.size() - streak, streak, rnd);
1567 }
1568
1569 // Copy the permutation into the 'select' list
1570 for(size_t i = 0; i < num; i++) { select[i] = buf[i].second; }
1571
1572 if(rs2 == NULL) {
1573 bestUScore = bestUDist = (*rs1)[select[0]].score();
1574 }
1575
1576 // For paired-end read, find best alignment score among end
1577 // alignments not chosen, for both ends
1578 if(rs2 != NULL) {
1579 bestCScore = bestCDist = (*rs1)[select[0]].score() + (*rs2)[select[0]].score();
1580 bestP1Score = bestP1Dist = (*rs1)[select[0]].score();
1581 bestP2Score = bestP2Dist = (*rs2)[select[0]].score();
1582 for(size_t i = 0; i < rs1u->size(); i++) {
1583 if((*rs1u)[i].refcoord() == (*rs1)[select[0]].refcoord()) {
1584 continue;
1585 }
1586 if((*rs1u)[i].score() > bestUnchosenP1Score) {
1587 bestUnchosenP1Score = (*rs1u)[i].score();
1588 }
1589 if((*rs1u)[i].score().basesAligned() > bestUnchosenP1Dist.basesAligned()) {
1590 bestUnchosenP1Dist = (*rs1u)[i].score();
1591 }
1592 }
1593 for(size_t i = 0; i < rs2u->size(); i++) {
1594 if((*rs2u)[i].refcoord() == (*rs2)[select[0]].refcoord()) {
1595 continue;
1596 }
1597 if((*rs2u)[i].score() > bestUnchosenP2Score) {
1598 bestUnchosenP2Score = (*rs2u)[i].score();
1599 }
1600 if((*rs2u)[i].score().basesAligned() > bestUnchosenP2Dist.basesAligned()) {
1601 bestUnchosenP2Dist = (*rs2u)[i].score();
1602 }
1603 }
1604 if(buf.size() > 1) {
1605 bestUnchosenCScore = buf[1].first;
1606 for(size_t i = 1; i < buf.size(); i++) {
1607 AlnScore dist = (*rs1)[buf[i].second].score() +
1608 (*rs2)[buf[i].second].score();
1609 if(dist.basesAligned() > bestUnchosenCDist.basesAligned()) {
1610 bestUnchosenCDist = dist;
1611 }
1612 }
1613 }
1614 } else if(buf.size() > 1) {
1615 bestUnchosenUScore = (*rs1)[buf[1].second].score();
1616 for(size_t i = 1; i < buf.size(); i++) {
1617 if((*rs1)[buf[1].second].score().basesAligned() > bestUnchosenUDist.basesAligned()) {
1618 bestUnchosenUDist = (*rs1)[buf[1].second].score();
1619 }
1620 }
1621 }
1622
1623 // Returns index of the representative alignment, but in 'select' also
1624 // returns the indexes of the next best selected alignments in order by
1625 // score.
1626 return selectBuf_[0].second;
1627 }
1628
1629 /**
1630 * Given that rs is already populated with alignments, consider the
1631 * alignment policy and make random selections where necessary. E.g. if we
1632 * found 10 alignments and the policy is -k 2 -m 20, select 2 alignments at
1633 * random. We "select" an alignment by setting the parallel entry in the
1634 * 'select' list to true.
1635 *
1636 * Return the "representative" alignment. This is simply the first one
1637 * selected. That will also be what SAM calls the "primary" alignment.
1638 */
selectAlnsToReport(const EList<AlnRes> & rs,uint64_t num,EList<size_t> & select,RandomSource & rnd) const1639 size_t AlnSinkWrap::selectAlnsToReport(
1640 const EList<AlnRes>& rs, // alignments to select from
1641 uint64_t num, // number of alignments to select
1642 EList<size_t>& select, // list to put results in
1643 RandomSource& rnd)
1644 const
1645 {
1646 assert(init_);
1647 assert(repOk());
1648 assert_gt(num, 0);
1649 size_t sz = rs.size();
1650 if(sz < num) {
1651 num = sz;
1652 }
1653 if(sz < 1) {
1654 return 0;
1655 }
1656 select.resize((size_t)num);
1657 if(sz == 1) {
1658 assert_eq(1, num);
1659 select[0] = 0;
1660 return 0;
1661 }
1662 // Select a random offset into the list of alignments
1663 uint32_t off = rnd.nextU32() % (uint32_t)sz;
1664 uint32_t offOrig = off;
1665 // Now take elements starting at that offset, wrapping around to 0 if
1666 // necessary. Leave the rest.
1667 for(size_t i = 0; i < num; i++) {
1668 select[i] = off;
1669 off++;
1670 if(off == sz) {
1671 off = 0;
1672 }
1673 }
1674 return offOrig;
1675 }
1676
1677 #define NOT_SUPPRESSED !suppress_[field++]
1678 #define BEGIN_FIELD { \
1679 if(firstfield) firstfield = false; \
1680 else o.append('\t'); \
1681 }
1682 #define WRITE_TAB { \
1683 if(firstfield) firstfield = false; \
1684 else o.append('\t'); \
1685 }
1686 #define WRITE_NUM(o, x) { \
1687 itoa10(x, buf); \
1688 o.append(buf); \
1689 }
1690
1691 /**
1692 * Print a seed summary to the first output stream in the outs_ list.
1693 */
reportSeedSummary(BTString & o,const Read & rd,TReadId rdid,size_t threadId,const SeedResults & rs,bool getLock)1694 void AlnSink::reportSeedSummary(
1695 BTString& o,
1696 const Read& rd,
1697 TReadId rdid,
1698 size_t threadId,
1699 const SeedResults& rs,
1700 bool getLock)
1701 {
1702 appendSeedSummary(
1703 o, // string to write to
1704 rd, // read
1705 rdid, // read id
1706 rs.numOffs()*2, // # seeds tried
1707 rs.nonzeroOffsets(), // # seeds with non-empty results
1708 rs.numRanges(), // # ranges for all seed hits
1709 rs.numElts(), // # elements for all seed hits
1710 rs.numOffs(), // # seeds tried from fw read
1711 rs.nonzeroOffsetsFw(), // # seeds with non-empty results from fw read
1712 rs.numRangesFw(), // # ranges for seed hits from fw read
1713 rs.numEltsFw(), // # elements for seed hits from fw read
1714 rs.numOffs(), // # seeds tried from rc read
1715 rs.nonzeroOffsetsRc(), // # seeds with non-empty results from fw read
1716 rs.numRangesRc(), // # ranges for seed hits from fw read
1717 rs.numEltsRc()); // # elements for seed hits from fw read
1718 }
1719
1720 /**
1721 * Print an empty seed summary to the first output stream in the outs_ list.
1722 */
reportEmptySeedSummary(BTString & o,const Read & rd,TReadId rdid,size_t threadId,bool getLock)1723 void AlnSink::reportEmptySeedSummary(
1724 BTString& o,
1725 const Read& rd,
1726 TReadId rdid,
1727 size_t threadId,
1728 bool getLock)
1729 {
1730 appendSeedSummary(
1731 o, // string to append to
1732 rd, // read
1733 rdid, // read id
1734 0, // # seeds tried
1735 0, // # seeds with non-empty results
1736 0, // # ranges for all seed hits
1737 0, // # elements for all seed hits
1738 0, // # seeds tried from fw read
1739 0, // # seeds with non-empty results from fw read
1740 0, // # ranges for seed hits from fw read
1741 0, // # elements for seed hits from fw read
1742 0, // # seeds tried from rc read
1743 0, // # seeds with non-empty results from fw read
1744 0, // # ranges for seed hits from fw read
1745 0); // # elements for seed hits from fw read
1746 }
1747
1748 /**
1749 * Print the given string. If ws = true, print only up to and not
1750 * including the first space or tab. Useful for printing reference
1751 * names.
1752 */
1753 template<typename T>
printUptoWs(BTString & s,const T & str,bool chopws)1754 static inline void printUptoWs(
1755 BTString& s,
1756 const T& str,
1757 bool chopws)
1758 {
1759 size_t len = str.length();
1760 for(size_t i = 0; i < len; i++) {
1761 if(!chopws || (str[i] != ' ' && str[i] != '\t')) {
1762 s.append(str[i]);
1763 } else {
1764 break;
1765 }
1766 }
1767 }
1768
1769 /**
1770 * Append a batch of unresolved seed alignment summary results (i.e.
1771 * seed alignments where all we know is the reference sequence aligned
1772 * to and its SA range, not where it falls in the reference
1773 * sequence) to the given output stream in Bowtie's seed-sumamry
1774 * verbose-mode format.
1775 *
1776 * The seed summary format is:
1777 *
1778 * - One line per read
1779 * - A typical line consists of a set of tab-delimited fields:
1780 *
1781 * 1. Read name
1782 * 2. Total number of seeds extracted from the read
1783 * 3. Total number of seeds that aligned to the reference at
1784 * least once (always <= field 2)
1785 * 4. Total number of distinct BW ranges found in all seed hits
1786 * (always >= field 3)
1787 * 5. Total number of distinct BW elements found in all seed
1788 * hits (always >= field 4)
1789 * 6-9.: Like 2-5. but just for seeds extracted from the
1790 * forward representation of the read
1791 * 10-13.: Like 2-5. but just for seeds extracted from the
1792 * reverse-complement representation of the read
1793 *
1794 * Note that fields 6 and 10 should add to field 2, 7 and 11
1795 * should add to 3, etc.
1796 *
1797 * - Lines for reads that are filtered out for any reason (e.g. too
1798 * many Ns) have columns 2 through 13 set to 0.
1799 */
appendSeedSummary(BTString & o,const Read & rd,const TReadId rdid,size_t seedsTried,size_t nonzero,size_t ranges,size_t elts,size_t seedsTriedFw,size_t nonzeroFw,size_t rangesFw,size_t eltsFw,size_t seedsTriedRc,size_t nonzeroRc,size_t rangesRc,size_t eltsRc)1800 void AlnSink::appendSeedSummary(
1801 BTString& o,
1802 const Read& rd,
1803 const TReadId rdid,
1804 size_t seedsTried,
1805 size_t nonzero,
1806 size_t ranges,
1807 size_t elts,
1808 size_t seedsTriedFw,
1809 size_t nonzeroFw,
1810 size_t rangesFw,
1811 size_t eltsFw,
1812 size_t seedsTriedRc,
1813 size_t nonzeroRc,
1814 size_t rangesRc,
1815 size_t eltsRc)
1816 {
1817 char buf[1024];
1818 bool firstfield = true;
1819 //
1820 // Read name
1821 //
1822 BEGIN_FIELD;
1823 printUptoWs(o, rd.name, true);
1824
1825 //
1826 // Total number of seeds tried
1827 //
1828 BEGIN_FIELD;
1829 WRITE_NUM(o, seedsTried);
1830
1831 //
1832 // Total number of seeds tried where at least one range was found.
1833 //
1834 BEGIN_FIELD;
1835 WRITE_NUM(o, nonzero);
1836
1837 //
1838 // Total number of ranges found
1839 //
1840 BEGIN_FIELD;
1841 WRITE_NUM(o, ranges);
1842
1843 //
1844 // Total number of elements found
1845 //
1846 BEGIN_FIELD;
1847 WRITE_NUM(o, elts);
1848
1849 //
1850 // The same four numbers, but only for seeds extracted from the
1851 // forward read representation.
1852 //
1853 BEGIN_FIELD;
1854 WRITE_NUM(o, seedsTriedFw);
1855
1856 BEGIN_FIELD;
1857 WRITE_NUM(o, nonzeroFw);
1858
1859 BEGIN_FIELD;
1860 WRITE_NUM(o, rangesFw);
1861
1862 BEGIN_FIELD;
1863 WRITE_NUM(o, eltsFw);
1864
1865 //
1866 // The same four numbers, but only for seeds extracted from the
1867 // reverse complement read representation.
1868 //
1869 BEGIN_FIELD;
1870 WRITE_NUM(o, seedsTriedRc);
1871
1872 BEGIN_FIELD;
1873 WRITE_NUM(o, nonzeroRc);
1874
1875 BEGIN_FIELD;
1876 WRITE_NUM(o, rangesRc);
1877
1878 BEGIN_FIELD;
1879 WRITE_NUM(o, eltsRc);
1880
1881 o.append('\n');
1882 }
1883
1884 /**
1885 * Append a single hit to the given output stream in Bowtie's
1886 * verbose-mode format.
1887 */
appendMate(BTString & o,StackedAln & staln,const Read & rd,const Read * rdo,const TReadId rdid,AlnRes * rs,AlnRes * rso,const AlnSetSumm & summ,const SeedAlSumm & ssm,const SeedAlSumm & ssmo,const AlnFlags & flags,const PerReadMetrics & prm,const Mapq & mapqCalc,const Scoring & sc)1888 void AlnSinkSam::appendMate(
1889 BTString& o, // append to this string
1890 StackedAln& staln, // store stacked alignment struct here
1891 const Read& rd,
1892 const Read* rdo,
1893 const TReadId rdid,
1894 AlnRes* rs,
1895 AlnRes* rso,
1896 const AlnSetSumm& summ,
1897 const SeedAlSumm& ssm,
1898 const SeedAlSumm& ssmo,
1899 const AlnFlags& flags,
1900 const PerReadMetrics& prm,
1901 const Mapq& mapqCalc,
1902 const Scoring& sc)
1903 {
1904 if(rs == NULL && samc_.omitUnalignedReads()) {
1905 return;
1906 }
1907 char buf[1024];
1908 char mapqInps[1024];
1909 if(rs != NULL) {
1910 staln.reset();
1911 rs->initStacked(rd, staln);
1912 staln.leftAlign(false /* not past MMs */);
1913 }
1914 int offAdj = 0;
1915 // QNAME
1916 samc_.printReadName(o, rd.name, flags.partOfPair());
1917 o.append('\t');
1918 // FLAG
1919 int fl = 0;
1920 if(flags.partOfPair()) {
1921 fl |= SAM_FLAG_PAIRED;
1922 if(flags.alignedConcordant()) {
1923 fl |= SAM_FLAG_MAPPED_PAIRED;
1924 }
1925 if(!flags.mateAligned()) {
1926 // Other fragment is unmapped
1927 fl |= SAM_FLAG_MATE_UNMAPPED;
1928 }
1929 fl |= (flags.readMate1() ?
1930 SAM_FLAG_FIRST_IN_PAIR : SAM_FLAG_SECOND_IN_PAIR);
1931 if(flags.mateAligned()) {
1932 bool oppFw = (rso != NULL) ? rso->fw() : flags.isOppFw();
1933 if (!oppFw) {
1934 fl |= SAM_FLAG_MATE_STRAND;
1935 }
1936 }
1937 }
1938 if(!flags.isPrimary()) {
1939 fl |= SAM_FLAG_NOT_PRIMARY;
1940 }
1941 if(rs != NULL && !rs->fw()) {
1942 fl |= SAM_FLAG_QUERY_STRAND;
1943 }
1944 if(rs == NULL) {
1945 // Failed to align
1946 fl |= SAM_FLAG_UNMAPPED;
1947 }
1948 itoa10<int>(fl, buf);
1949 o.append(buf);
1950 o.append('\t');
1951 // RNAME
1952 if(rs != NULL) {
1953 samc_.printRefNameFromIndex(o, (size_t)rs->refid());
1954 o.append('\t');
1955 } else {
1956 if(summ.orefid() != -1) {
1957 // Opposite mate aligned but this one didn't - print the opposite
1958 // mate's RNAME and POS as is customary
1959 assert(flags.partOfPair());
1960 samc_.printRefNameFromIndex(o, (size_t)summ.orefid());
1961 } else {
1962 // No alignment
1963 o.append('*');
1964 }
1965 o.append('\t');
1966 }
1967 // POS
1968 // Note: POS is *after* soft clipping. I.e. POS points to the
1969 // upstream-most character *involved in the clipped alignment*.
1970 if(rs != NULL) {
1971 itoa10<int64_t>(rs->refoff()+1+offAdj, buf);
1972 o.append(buf);
1973 o.append('\t');
1974 } else {
1975 if(summ.orefid() != -1) {
1976 // Opposite mate aligned but this one didn't - print the opposite
1977 // mate's RNAME and POS as is customary
1978 assert(flags.partOfPair());
1979 itoa10<int64_t>(summ.orefoff()+1+offAdj, buf);
1980 o.append(buf);
1981 } else {
1982 // No alignment
1983 o.append('0');
1984 }
1985 o.append('\t');
1986 }
1987 // MAPQ
1988 mapqInps[0] = '\0';
1989 if(rs != NULL) {
1990 itoa10<TMapq>(mapqCalc.mapq(
1991 summ, flags, rd.mate < 2, rd.length(),
1992 rdo == NULL ? 0 : rdo->length(), mapqInps), buf);
1993 o.append(buf);
1994 o.append('\t');
1995 } else {
1996 // No alignment
1997 o.append("0\t");
1998 }
1999 // CIGAR
2000 if(rs != NULL) {
2001 staln.buildCigar(flags.xeq());
2002 staln.writeCigar(&o, NULL);
2003 o.append('\t');
2004 } else {
2005 // No alignment
2006 o.append("*\t");
2007 }
2008 // RNEXT
2009 if(rs != NULL && flags.partOfPair()) {
2010 if(rso != NULL && rs->refid() != rso->refid()) {
2011 samc_.printRefNameFromIndex(o, (size_t)rso->refid());
2012 o.append('\t');
2013 } else {
2014 o.append("=\t");
2015 }
2016 } else if(summ.orefid() != -1) {
2017 // The convention if this mate fails to align but the other doesn't is
2018 // to copy the mate's details into here
2019 o.append("=\t");
2020 } else {
2021 o.append("*\t");
2022 }
2023 // PNEXT
2024 if(rs != NULL && flags.partOfPair()) {
2025 if(rso != NULL) {
2026 itoa10<int64_t>(rso->refoff()+1, buf);
2027 o.append(buf);
2028 o.append('\t');
2029 } else {
2030 // The convenstion is that if this mate aligns but the opposite
2031 // doesn't, we print this mate's offset here
2032 itoa10<int64_t>(rs->refoff()+1, buf);
2033 o.append(buf);
2034 o.append('\t');
2035 }
2036 } else if(summ.orefid() != -1) {
2037 // The convention if this mate fails to align but the other doesn't is
2038 // to copy the mate's details into here
2039 itoa10<int64_t>(summ.orefoff()+1, buf);
2040 o.append(buf);
2041 o.append('\t');
2042 } else {
2043 o.append("0\t");
2044 }
2045 // ISIZE
2046 if(rs != NULL && rs->isFraglenSet()) {
2047 itoa10<int64_t>(rs->fragmentLength(), buf);
2048 o.append(buf);
2049 o.append('\t');
2050 } else {
2051 // No fragment
2052 o.append("0\t");
2053 }
2054 // SEQ
2055 if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) {
2056 o.append('*');
2057 } else {
2058 // Print the read
2059 if(rd.patFw.length() == 0) {
2060 o.append('*');
2061 } else {
2062 if(rs == NULL || rs->fw()) {
2063 o.append(rd.patFw.toZBuf());
2064 } else {
2065 o.append(rd.patRc.toZBuf());
2066 }
2067 }
2068 }
2069 o.append('\t');
2070 // QUAL
2071 if(!flags.isPrimary() && samc_.omitSecondarySeqQual()) {
2072 o.append('*');
2073 } else {
2074 // Print the quals
2075 if(rd.qual.length() == 0) {
2076 o.append('*');
2077 } else {
2078 if(rs == NULL || rs->fw()) {
2079 o.append(rd.qual.toZBuf());
2080 } else {
2081 o.append(rd.qualRev.toZBuf());
2082 }
2083 }
2084 }
2085 o.append('\t');
2086 //
2087 // Optional fields
2088 //
2089 if(rs != NULL) {
2090 samc_.printAlignedOptFlags(
2091 o, // output buffer
2092 true, // first opt flag printed is first overall?
2093 rd, // read
2094 rdo, // opposite read
2095 *rs, // individual alignment result
2096 staln, // stacked alignment
2097 flags, // alignment flags
2098 summ, // summary of alignments for this read
2099 ssm, // seed alignment summary
2100 prm, // per-read metrics
2101 sc, // scoring scheme
2102 mapqInps); // inputs to MAPQ calculation
2103 } else {
2104 samc_.printEmptyOptFlags(
2105 o, // output buffer
2106 true, // first opt flag printed is first overall?
2107 rd, // read
2108 flags, // alignment flags
2109 summ, // summary of alignments for this read
2110 ssm, // seed alignment summary
2111 prm, // per-read metrics
2112 sc); // scoring scheme
2113 }
2114 samc_.printPreservedOptFlags(o, rd);
2115 samc_.printComment(o, rd.name);
2116 o.append('\n');
2117 }
2118
2119 #ifdef ALN_SINK_MAIN
2120
2121 #include <iostream>
2122
testDones(const ReportingState & st,bool done1,bool done2,bool done3,bool done4,bool done5,bool done6)2123 bool testDones(
2124 const ReportingState& st,
2125 bool done1,
2126 bool done2,
2127 bool done3,
2128 bool done4,
2129 bool done5,
2130 bool done6)
2131 {
2132 assert(st.doneConcordant() == done1);
2133 assert(st.doneDiscordant() == done2);
2134 assert(st.doneUnpaired(true) == done3);
2135 assert(st.doneUnpaired(false) == done4);
2136 assert(st.doneUnpaired() == done5);
2137 assert(st.done() == done6);
2138 assert(st.repOk());
2139 return true;
2140 }
2141
main(void)2142 int main(void) {
2143 cerr << "Case 1 (simple unpaired 1) ... ";
2144 {
2145 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2146 bool pairMax = false, unpair1Max = false, unpair2Max = false;
2147 ReportingParams rp(
2148 2, // khits
2149 0, // mhits
2150 0, // pengap
2151 false, // msample
2152 false, // discord
2153 false); // mixed
2154 ReportingState st(rp);
2155 st.nextRead(false); // unpaired read
2156 assert(testDones(st, true, true, false, true, false, false));
2157 st.foundUnpaired(true);
2158 assert(testDones(st, true, true, false, true, false, false));
2159 st.foundUnpaired(true);
2160 assert(testDones(st, true, true, true, true, true, true));
2161 st.finish();
2162 assert(testDones(st, true, true, true, true, true, true));
2163 assert_eq(0, st.numConcordant());
2164 assert_eq(0, st.numDiscordant());
2165 assert_eq(2, st.numUnpaired1());
2166 assert_eq(0, st.numUnpaired2());
2167 assert(st.repOk());
2168 st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2169 pairMax, unpair1Max, unpair2Max);
2170 assert_eq(0, nconcord);
2171 assert_eq(0, ndiscord);
2172 assert_eq(2, nunpair1);
2173 assert_eq(0, nunpair2);
2174 assert(!pairMax);
2175 assert(!unpair1Max);
2176 assert(!unpair2Max);
2177 }
2178 cerr << "PASSED" << endl;
2179
2180 cerr << "Case 2 (simple unpaired 1) ... ";
2181 {
2182 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2183 bool pairMax = false, unpair1Max = false, unpair2Max = false;
2184 ReportingParams rp(
2185 2, // khits
2186 3, // mhits
2187 0, // pengap
2188 false, // msample
2189 false, // discord
2190 false); // mixed
2191 ReportingState st(rp);
2192 st.nextRead(false); // unpaired read
2193 assert(testDones(st, true, true, false, true, false, false));
2194 st.foundUnpaired(true);
2195 assert(testDones(st, true, true, false, true, false, false));
2196 st.foundUnpaired(true);
2197 assert(testDones(st, true, true, false, true, false, false));
2198 st.foundUnpaired(true);
2199 assert(testDones(st, true, true, false, true, false, false));
2200 st.foundUnpaired(true);
2201 assert(testDones(st, true, true, true, true, true, true));
2202 assert_eq(0, st.numConcordant());
2203 assert_eq(0, st.numDiscordant());
2204 assert_eq(4, st.numUnpaired1());
2205 assert_eq(0, st.numUnpaired2());
2206 st.finish();
2207 assert(testDones(st, true, true, true, true, true, true));
2208 assert_eq(0, st.numConcordant());
2209 assert_eq(0, st.numDiscordant());
2210 assert_eq(4, st.numUnpaired1());
2211 assert_eq(0, st.numUnpaired2());
2212 assert(st.repOk());
2213 st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2214 pairMax, unpair1Max, unpair2Max);
2215 assert_eq(0, nconcord);
2216 assert_eq(0, ndiscord);
2217 assert_eq(0, nunpair1);
2218 assert_eq(0, nunpair2);
2219 assert(!pairMax);
2220 assert(unpair1Max);
2221 assert(!unpair2Max);
2222 }
2223 cerr << "PASSED" << endl;
2224
2225 cerr << "Case 3 (simple paired 1) ... ";
2226 {
2227 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2228 bool pairMax = false, unpair1Max = false, unpair2Max = false;
2229 ReportingParams rp(
2230 2, // khits
2231 3, // mhits
2232 0, // pengap
2233 false, // msample
2234 false, // discord
2235 false); // mixed
2236 ReportingState st(rp);
2237 st.nextRead(true); // unpaired read
2238 assert(testDones(st, false, true, true, true, true, false));
2239 st.foundUnpaired(true);
2240 assert(testDones(st, false, true, true, true, true, false));
2241 st.foundUnpaired(true);
2242 assert(testDones(st, false, true, true, true, true, false));
2243 st.foundUnpaired(true);
2244 assert(testDones(st, false, true, true, true, true, false));
2245 st.foundUnpaired(true);
2246 assert(testDones(st, false, true, true, true, true, false));
2247 st.foundUnpaired(false);
2248 assert(testDones(st, false, true, true, true, true, false));
2249 st.foundUnpaired(false);
2250 assert(testDones(st, false, true, true, true, true, false));
2251 st.foundUnpaired(false);
2252 assert(testDones(st, false, true, true, true, true, false));
2253 st.foundUnpaired(false);
2254 assert(testDones(st, false, true, true, true, true, false));
2255 st.foundConcordant();
2256 assert(testDones(st, false, true, true, true, true, false));
2257 st.foundConcordant();
2258 assert(testDones(st, false, true, true, true, true, false));
2259 st.foundConcordant();
2260 assert(testDones(st, false, true, true, true, true, false));
2261 st.foundConcordant();
2262 assert(testDones(st, true, true, true, true, true, true));
2263 assert_eq(4, st.numConcordant());
2264 assert_eq(0, st.numDiscordant());
2265 assert_eq(4, st.numUnpaired1());
2266 assert_eq(4, st.numUnpaired2());
2267 st.finish();
2268 assert(testDones(st, true, true, true, true, true, true));
2269 assert_eq(4, st.numConcordant());
2270 assert_eq(0, st.numDiscordant());
2271 assert_eq(4, st.numUnpaired1());
2272 assert_eq(4, st.numUnpaired2());
2273 assert(st.repOk());
2274 st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2275 pairMax, unpair1Max, unpair2Max);
2276 assert_eq(0, nconcord);
2277 assert_eq(0, ndiscord);
2278 assert_eq(0, nunpair1);
2279 assert_eq(0, nunpair2);
2280 assert(pairMax);
2281 assert(!unpair1Max); // because !mixed
2282 assert(!unpair2Max); // because !mixed
2283 }
2284 cerr << "PASSED" << endl;
2285
2286 cerr << "Case 4 (simple paired 2) ... ";
2287 {
2288 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2289 bool pairMax = false, unpair1Max = false, unpair2Max = false;
2290 ReportingParams rp(
2291 2, // khits
2292 3, // mhits
2293 0, // pengap
2294 false, // msample
2295 true, // discord
2296 true); // mixed
2297 ReportingState st(rp);
2298 st.nextRead(true); // unpaired read
2299 assert(testDones(st, false, false, false, false, false, false));
2300 st.foundUnpaired(true);
2301 assert(testDones(st, false, false, false, false, false, false));
2302 st.foundUnpaired(true);
2303 assert(testDones(st, false, true, false, false, false, false));
2304 st.foundUnpaired(true);
2305 assert(testDones(st, false, true, false, false, false, false));
2306 st.foundUnpaired(true);
2307 assert(testDones(st, false, true, true, false, false, false));
2308 st.foundUnpaired(false);
2309 assert(testDones(st, false, true, true, false, false, false));
2310 st.foundUnpaired(false);
2311 assert(testDones(st, false, true, true, false, false, false));
2312 st.foundUnpaired(false);
2313 assert(testDones(st, false, true, true, false, false, false));
2314 st.foundUnpaired(false);
2315 assert(testDones(st, false, true, true, true, true, false));
2316 st.foundConcordant();
2317 assert(testDones(st, false, true, true, true, true, false));
2318 st.foundConcordant();
2319 assert(testDones(st, false, true, true, true, true, false));
2320 st.foundConcordant();
2321 assert(testDones(st, false, true, true, true, true, false));
2322 st.foundConcordant();
2323 assert(testDones(st, true, true, true, true, true, true));
2324 assert_eq(4, st.numConcordant());
2325 assert_eq(0, st.numDiscordant());
2326 assert_eq(4, st.numUnpaired1());
2327 assert_eq(4, st.numUnpaired2());
2328 st.finish();
2329 assert(testDones(st, true, true, true, true, true, true));
2330 assert_eq(4, st.numConcordant());
2331 assert_eq(0, st.numDiscordant());
2332 assert_eq(4, st.numUnpaired1());
2333 assert_eq(4, st.numUnpaired2());
2334 assert(st.repOk());
2335 st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2336 pairMax, unpair1Max, unpair2Max);
2337 assert_eq(0, nconcord);
2338 assert_eq(0, ndiscord);
2339 assert_eq(0, nunpair1);
2340 assert_eq(0, nunpair2);
2341 assert(pairMax);
2342 assert(unpair1Max);
2343 assert(unpair2Max);
2344 }
2345 cerr << "PASSED" << endl;
2346
2347 cerr << "Case 5 (potential discordant after concordant) ... ";
2348 {
2349 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2350 bool pairMax = false, unpair1Max = false, unpair2Max = false;
2351 ReportingParams rp(
2352 2, // khits
2353 3, // mhits
2354 0, // pengap
2355 false, // msample
2356 true, // discord
2357 true); // mixed
2358 ReportingState st(rp);
2359 st.nextRead(true);
2360 assert(testDones(st, false, false, false, false, false, false));
2361 st.foundUnpaired(true);
2362 st.foundUnpaired(false);
2363 st.foundConcordant();
2364 assert(testDones(st, false, true, false, false, false, false));
2365 st.finish();
2366 assert(testDones(st, true, true, true, true, true, true));
2367 assert_eq(1, st.numConcordant());
2368 assert_eq(0, st.numDiscordant());
2369 assert_eq(1, st.numUnpaired1());
2370 assert_eq(1, st.numUnpaired2());
2371 assert(st.repOk());
2372 st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2373 pairMax, unpair1Max, unpair2Max);
2374 assert_eq(1, nconcord);
2375 assert_eq(0, ndiscord);
2376 assert_eq(0, nunpair1);
2377 assert_eq(0, nunpair2);
2378 assert(!pairMax);
2379 assert(!unpair1Max);
2380 assert(!unpair2Max);
2381 }
2382 cerr << "PASSED" << endl;
2383
2384 cerr << "Case 6 (true discordant) ... ";
2385 {
2386 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2387 bool pairMax = false, unpair1Max = false, unpair2Max = false;
2388 ReportingParams rp(
2389 2, // khits
2390 3, // mhits
2391 0, // pengap
2392 false, // msample
2393 true, // discord
2394 true); // mixed
2395 ReportingState st(rp);
2396 st.nextRead(true);
2397 assert(testDones(st, false, false, false, false, false, false));
2398 st.foundUnpaired(true);
2399 st.foundUnpaired(false);
2400 assert(testDones(st, false, false, false, false, false, false));
2401 st.finish();
2402 assert(testDones(st, true, true, true, true, true, true));
2403 assert_eq(0, st.numConcordant());
2404 assert_eq(1, st.numDiscordant());
2405 assert_eq(0, st.numUnpaired1());
2406 assert_eq(0, st.numUnpaired2());
2407 assert(st.repOk());
2408 st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2409 pairMax, unpair1Max, unpair2Max);
2410 assert_eq(0, nconcord);
2411 assert_eq(1, ndiscord);
2412 assert_eq(0, nunpair1);
2413 assert_eq(0, nunpair2);
2414 assert(!pairMax);
2415 assert(!unpair1Max);
2416 assert(!unpair2Max);
2417 }
2418 cerr << "PASSED" << endl;
2419
2420 cerr << "Case 7 (unaligned pair & uniquely aligned mate, mixed-mode) ... ";
2421 {
2422 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2423 bool pairMax = false, unpair1Max = false, unpair2Max = false;
2424 ReportingParams rp(
2425 1, // khits
2426 1, // mhits
2427 0, // pengap
2428 false, // msample
2429 true, // discord
2430 true); // mixed
2431 ReportingState st(rp);
2432 st.nextRead(true); // unpaired read
2433 // assert(st.doneConcordant() == done1);
2434 // assert(st.doneDiscordant() == done2);
2435 // assert(st.doneUnpaired(true) == done3);
2436 // assert(st.doneUnpaired(false) == done4);
2437 // assert(st.doneUnpaired() == done5);
2438 // assert(st.done() == done6);
2439 st.foundUnpaired(true);
2440 assert(testDones(st, false, false, false, false, false, false));
2441 st.foundUnpaired(true);
2442 assert(testDones(st, false, true, true, false, false, false));
2443 assert_eq(0, st.numConcordant());
2444 assert_eq(0, st.numDiscordant());
2445 assert_eq(2, st.numUnpaired1());
2446 assert_eq(0, st.numUnpaired2());
2447 st.finish();
2448 st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2449 pairMax, unpair1Max, unpair2Max);
2450 assert_eq(0, nconcord);
2451 assert_eq(0, ndiscord);
2452 assert_eq(0, nunpair1);
2453 assert_eq(0, nunpair2);
2454 assert(!pairMax);
2455 assert(unpair1Max);
2456 assert(!unpair2Max);
2457 }
2458 cerr << "PASSED" << endl;
2459
2460 cerr << "Case 8 (unaligned pair & uniquely aligned mate, NOT mixed-mode) ... ";
2461 {
2462 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2463 bool pairMax = false, unpair1Max = false, unpair2Max = false;
2464 ReportingParams rp(
2465 1, // khits
2466 1, // mhits
2467 0, // pengap
2468 false, // msample
2469 true, // discord
2470 false); // mixed
2471 ReportingState st(rp);
2472 st.nextRead(true); // unpaired read
2473 // assert(st.doneConcordant() == done1);
2474 // assert(st.doneDiscordant() == done2);
2475 // assert(st.doneUnpaired(true) == done3);
2476 // assert(st.doneUnpaired(false) == done4);
2477 // assert(st.doneUnpaired() == done5);
2478 // assert(st.done() == done6);
2479 st.foundUnpaired(true);
2480 assert(testDones(st, false, false, true, true, true, false));
2481 st.foundUnpaired(true);
2482 assert(testDones(st, false, true, true, true, true, false));
2483 assert_eq(0, st.numConcordant());
2484 assert_eq(0, st.numDiscordant());
2485 assert_eq(2, st.numUnpaired1());
2486 assert_eq(0, st.numUnpaired2());
2487 st.finish();
2488 st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2489 pairMax, unpair1Max, unpair2Max);
2490 assert_eq(0, nconcord);
2491 assert_eq(0, ndiscord);
2492 assert_eq(0, nunpair1);
2493 assert_eq(0, nunpair2);
2494 assert(!pairMax);
2495 assert(!unpair1Max); // not really relevant
2496 assert(!unpair2Max); // not really relevant
2497 }
2498 cerr << "PASSED" << endl;
2499
2500 cerr << "Case 9 (repetitive pair, only one mate repetitive) ... ";
2501 {
2502 uint64_t nconcord = 0, ndiscord = 0, nunpair1 = 0, nunpair2 = 0;
2503 bool pairMax = false, unpair1Max = false, unpair2Max = false;
2504 ReportingParams rp(
2505 1, // khits
2506 1, // mhits
2507 0, // pengap
2508 true, // msample
2509 true, // discord
2510 true); // mixed
2511 ReportingState st(rp);
2512 st.nextRead(true); // unpaired read
2513 // assert(st.doneConcordant() == done1);
2514 // assert(st.doneDiscordant() == done2);
2515 // assert(st.doneUnpaired(true) == done3);
2516 // assert(st.doneUnpaired(false) == done4);
2517 // assert(st.doneUnpaired() == done5);
2518 // assert(st.done() == done6);
2519 st.foundConcordant();
2520 assert(st.repOk());
2521 st.foundUnpaired(true);
2522 assert(st.repOk());
2523 st.foundUnpaired(false);
2524 assert(st.repOk());
2525 assert(testDones(st, false, true, false, false, false, false));
2526 assert(st.repOk());
2527 st.foundConcordant();
2528 assert(st.repOk());
2529 st.foundUnpaired(true);
2530 assert(st.repOk());
2531 assert(testDones(st, true, true, true, false, false, false));
2532 assert_eq(2, st.numConcordant());
2533 assert_eq(0, st.numDiscordant());
2534 assert_eq(2, st.numUnpaired1());
2535 assert_eq(1, st.numUnpaired2());
2536 st.foundUnpaired(false);
2537 assert(st.repOk());
2538 assert(testDones(st, true, true, true, true, true, true));
2539 assert_eq(2, st.numConcordant());
2540 assert_eq(0, st.numDiscordant());
2541 assert_eq(2, st.numUnpaired1());
2542 assert_eq(2, st.numUnpaired2());
2543 st.finish();
2544 st.getReport(nconcord, ndiscord, nunpair1, nunpair2,
2545 pairMax, unpair1Max, unpair2Max);
2546 assert_eq(1, nconcord);
2547 assert_eq(0, ndiscord);
2548 assert_eq(0, nunpair1);
2549 assert_eq(0, nunpair2);
2550 assert(pairMax);
2551 assert(unpair1Max); // not really relevant
2552 assert(unpair2Max); // not really relevant
2553 }
2554 cerr << "PASSED" << endl;
2555 }
2556
2557 #endif /*def ALN_SINK_MAIN*/
2558