1 /*
2 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3 *
4 * This file is part of Bowtie 2.
5 *
6 * Bowtie 2 is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Bowtie 2 is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "aligner_bt.h"
21 #include "mask.h"
22
23 using namespace std;
24
25 #define CHECK_ROW_COL(rowc, colc) \
26 if(rowc >= 0 && colc >= 0) { \
27 if(!sawcell_[colc].insert(rowc)) { \
28 /* was already in there */ \
29 abort = true; \
30 return; \
31 } \
32 assert(local || prob_.cper_->debugCell(rowc, colc, hefc)); \
33 }
34
35 /**
36 * Fill in a triangle of the DP table and backtrace from the given cell to
37 * a cell in the previous checkpoint, or to the terminal cell.
38 */
triangleFill(int64_t rw,int64_t cl,int hef,TAlScore targ,TAlScore targ_final,RandomSource & rnd,int64_t & row_new,int64_t & col_new,int & hef_new,TAlScore & targ_new,bool & done,bool & abort)39 void BtBranchTracer::triangleFill(
40 int64_t rw, // row of cell to backtrace from
41 int64_t cl, // column of cell to backtrace from
42 int hef, // cell to backtrace from is H (0), E (1), or F (2)
43 TAlScore targ, // score of cell to backtrace from
44 TAlScore targ_final, // score of alignment we're looking for
45 RandomSource& rnd, // pseudo-random generator
46 int64_t& row_new, // out: row we ended up in after backtrace
47 int64_t& col_new, // out: column we ended up in after backtrace
48 int& hef_new, // out: H/E/F after backtrace
49 TAlScore& targ_new, // out: score up to cell we ended up in
50 bool& done, // out: finished tracing out an alignment?
51 bool& abort) // out: aborted b/c cell was seen before?
52 {
53 assert_geq(rw, 0);
54 assert_geq(cl, 0);
55 assert_range(0, 2, hef);
56 assert_lt(rw, (int64_t)prob_.qrylen_);
57 assert_lt(cl, (int64_t)prob_.reflen_);
58 assert(prob_.usecp_ && prob_.fill_);
59 int64_t row = rw, col = cl;
60 const int64_t colmin = 0;
61 const int64_t rowmin = 0;
62 const int64_t colmax = prob_.reflen_ - 1;
63 const int64_t rowmax = prob_.qrylen_ - 1;
64 assert_leq(prob_.reflen_, (TRefOff)sawcell_.size());
65 assert_leq(col, (int64_t)prob_.cper_->hicol());
66 assert_geq(col, (int64_t)prob_.cper_->locol());
67 assert_geq(prob_.cper_->per(), 2);
68 size_t mod = (row + col) & prob_.cper_->lomask();
69 assert_lt(mod, prob_.cper_->per());
70 // Allocate room for diags
71 size_t depth = mod+1;
72 assert_leq(depth, prob_.cper_->per());
73 size_t breadth = depth;
74 tri_.resize(depth);
75 // Allocate room for each diag
76 for(size_t i = 0; i < depth; i++) {
77 tri_[i].resize(breadth - i);
78 }
79 bool upperleft = false;
80 size_t off = (row + col) >> prob_.cper_->perpow2();
81 if(off == 0) {
82 upperleft = true;
83 } else {
84 off--;
85 }
86 const TAlScore sc_rdo = prob_.sc_->readGapOpen();
87 const TAlScore sc_rde = prob_.sc_->readGapExtend();
88 const TAlScore sc_rfo = prob_.sc_->refGapOpen();
89 const TAlScore sc_rfe = prob_.sc_->refGapExtend();
90 const bool local = !prob_.sc_->monotone;
91 int64_t row_lo = row - (int64_t)mod;
92 const CpQuad *prev2 = NULL, *prev1 = NULL;
93 if(!upperleft) {
94 // Read-only pointer to cells in diagonal -2. Start one row above the
95 // target row.
96 prev2 = prob_.cper_->qdiag1sPtr() + (off * prob_.cper_->nrow() + row_lo - 1);
97 // Read-only pointer to cells in diagonal -1. Start one row above the
98 // target row
99 prev1 = prob_.cper_->qdiag2sPtr() + (off * prob_.cper_->nrow() + row_lo - 1);
100 #ifndef NDEBUG
101 if(row >= (int64_t)mod) {
102 size_t rowc = row - mod, colc = col;
103 if(rowc > 0 && prob_.cper_->isCheckpointed(rowc-1, colc)) {
104 TAlScore al = prev1[0].sc[0];
105 if(al == MIN_I16) al = MIN_I64;
106 assert_eq(prob_.cper_->scoreTriangle(rowc-1, colc, 0), al);
107 }
108 if(rowc > 0 && colc > 0 && prob_.cper_->isCheckpointed(rowc-1, colc-1)) {
109 TAlScore al = prev2[0].sc[0];
110 if(al == MIN_I16) al = MIN_I64;
111 assert_eq(prob_.cper_->scoreTriangle(rowc-1, colc-1, 0), al);
112 }
113 }
114 #endif
115 }
116 // Pointer to cells in current diagonal
117 // For each diagonal we need to fill in
118 for(size_t i = 0; i < depth; i++) {
119 CpQuad * cur = tri_[i].ptr();
120 CpQuad * curc = cur;
121 size_t doff = mod - i; // # diagonals we are away from target diag
122 //assert_geq(row, (int64_t)doff);
123 int64_t rowc = row - doff;
124 int64_t colc = col;
125 size_t neval = 0; // # cells evaluated in this diag
126 ASSERT_ONLY(const CpQuad *last = NULL);
127 // Fill this diagonal from upper right to lower left
128 for(size_t j = 0; j < breadth; j++) {
129 if(rowc >= rowmin && rowc <= rowmax &&
130 colc >= colmin && colc <= colmax)
131 {
132 neval++;
133 int64_t fromend = prob_.qrylen_ - rowc - 1;
134 bool allowGaps = fromend >= prob_.sc_->gapbar && rowc >= prob_.sc_->gapbar;
135 // Fill this cell
136 // Some things we might want to calculate about this cell up front:
137 // 1. How many matches are possible from this cell to the cell in
138 // row, col, in case this allows us to prune
139 // Get character from read
140 int qc = prob_.qry_[rowc];
141 // Get quality value from read
142 int qq = prob_.qual_[rowc];
143 assert_geq(qq, 33);
144 // Get character from reference
145 int rc = prob_.ref_[colc];
146 assert_range(0, 16, rc);
147 int16_t sc_diag = prob_.sc_->score(qc, rc, qq - 33);
148 int16_t sc_h_up = MIN_I16;
149 int16_t sc_f_up = MIN_I16;
150 int16_t sc_h_lf = MIN_I16;
151 int16_t sc_e_lf = MIN_I16;
152 if(allowGaps) {
153 if(rowc > 0) {
154 assert(local || prev1[j+0].sc[2] < 0);
155 if(prev1[j+0].sc[0] > MIN_I16) {
156 sc_h_up = prev1[j+0].sc[0] - sc_rfo;
157 if(local) sc_h_up = max<int16_t>(sc_h_up, 0);
158 }
159 if(prev1[j+0].sc[2] > MIN_I16) {
160 sc_f_up = prev1[j+0].sc[2] - sc_rfe;
161 if(local) sc_f_up = max<int16_t>(sc_f_up, 0);
162 }
163 #ifndef NDEBUG
164 TAlScore hup = prev1[j+0].sc[0];
165 TAlScore fup = prev1[j+0].sc[2];
166 if(hup == MIN_I16) hup = MIN_I64;
167 if(fup == MIN_I16) fup = MIN_I64;
168 if(local) {
169 hup = max<int16_t>(hup, 0);
170 fup = max<int16_t>(fup, 0);
171 }
172 if(prob_.cper_->isCheckpointed(rowc-1, colc)) {
173 assert_eq(hup, prob_.cper_->scoreTriangle(rowc-1, colc, 0));
174 assert_eq(fup, prob_.cper_->scoreTriangle(rowc-1, colc, 2));
175 }
176 #endif
177 }
178 if(colc > 0) {
179 assert(local || prev1[j+1].sc[1] < 0);
180 if(prev1[j+1].sc[0] > MIN_I16) {
181 sc_h_lf = prev1[j+1].sc[0] - sc_rdo;
182 if(local) sc_h_lf = max<int16_t>(sc_h_lf, 0);
183 }
184 if(prev1[j+1].sc[1] > MIN_I16) {
185 sc_e_lf = prev1[j+1].sc[1] - sc_rde;
186 if(local) sc_e_lf = max<int16_t>(sc_e_lf, 0);
187 }
188 #ifndef NDEBUG
189 TAlScore hlf = prev1[j+1].sc[0];
190 TAlScore elf = prev1[j+1].sc[1];
191 if(hlf == MIN_I16) hlf = MIN_I64;
192 if(elf == MIN_I16) elf = MIN_I64;
193 if(local) {
194 hlf = max<int16_t>(hlf, 0);
195 elf = max<int16_t>(elf, 0);
196 }
197 if(prob_.cper_->isCheckpointed(rowc, colc-1)) {
198 assert_eq(hlf, prob_.cper_->scoreTriangle(rowc, colc-1, 0));
199 assert_eq(elf, prob_.cper_->scoreTriangle(rowc, colc-1, 1));
200 }
201 #endif
202 }
203 }
204 assert(rowc <= 1 || colc <= 0 || prev2 != NULL);
205 int16_t sc_h_dg = ((rowc > 0 && colc > 0) ? prev2[j+0].sc[0] : 0);
206 if(colc == 0 && rowc > 0 && !local) {
207 sc_h_dg = MIN_I16;
208 }
209 if(sc_h_dg > MIN_I16) {
210 sc_h_dg += sc_diag;
211 }
212 if(local) sc_h_dg = max<int16_t>(sc_h_dg, 0);
213 // cerr << sc_diag << " " << sc_h_dg << " " << sc_h_up << " " << sc_f_up << " " << sc_h_lf << " " << sc_e_lf << endl;
214 int mask = 0;
215 // Calculate best ways into H, E, F cells starting with H.
216 // Mask bits:
217 // H: 1=diag, 2=hhoriz, 4=ehoriz, 8=hvert, 16=fvert
218 // E: 32=hhoriz, 64=ehoriz
219 // F: 128=hvert, 256=fvert
220 int16_t sc_best = sc_h_dg;
221 if(sc_h_dg > MIN_I64) {
222 mask = 1;
223 }
224 if(colc > 0 && sc_h_lf >= sc_best && sc_h_lf > MIN_I64) {
225 if(sc_h_lf > sc_best) mask = 0;
226 mask |= 2;
227 sc_best = sc_h_lf;
228 }
229 if(colc > 0 && sc_e_lf >= sc_best && sc_e_lf > MIN_I64) {
230 if(sc_e_lf > sc_best) mask = 0;
231 mask |= 4;
232 sc_best = sc_e_lf;
233 }
234 if(rowc > 0 && sc_h_up >= sc_best && sc_h_up > MIN_I64) {
235 if(sc_h_up > sc_best) mask = 0;
236 mask |= 8;
237 sc_best = sc_h_up;
238 }
239 if(rowc > 0 && sc_f_up >= sc_best && sc_f_up > MIN_I64) {
240 if(sc_f_up > sc_best) mask = 0;
241 mask |= 16;
242 sc_best = sc_f_up;
243 }
244 // Calculate best way into E cell
245 int16_t sc_e_best = sc_h_lf;
246 if(colc > 0) {
247 if(sc_h_lf >= sc_e_lf && sc_h_lf > MIN_I64) {
248 if(sc_h_lf == sc_e_lf) {
249 mask |= 64;
250 }
251 mask |= 32;
252 } else if(sc_e_lf > MIN_I64) {
253 sc_e_best = sc_e_lf;
254 mask |= 64;
255 }
256 }
257 if(sc_e_best > sc_best) {
258 sc_best = sc_e_best;
259 mask &= ~31; // don't go diagonal
260 }
261 // Calculate best way into F cell
262 int16_t sc_f_best = sc_h_up;
263 if(rowc > 0) {
264 if(sc_h_up >= sc_f_up && sc_h_up > MIN_I64) {
265 if(sc_h_up == sc_f_up) {
266 mask |= 256;
267 }
268 mask |= 128;
269 } else if(sc_f_up > MIN_I64) {
270 sc_f_best = sc_f_up;
271 mask |= 256;
272 }
273 }
274 if(sc_f_best > sc_best) {
275 sc_best = sc_f_best;
276 mask &= ~127; // don't go horizontal or diagonal
277 }
278 // Install results in cur
279 assert(!prob_.sc_->monotone || sc_best <= 0);
280 assert(!prob_.sc_->monotone || sc_e_best <= 0);
281 assert(!prob_.sc_->monotone || sc_f_best <= 0);
282 curc->sc[0] = sc_best;
283 assert( local || sc_e_best < 0);
284 assert( local || sc_f_best < 0);
285 assert(!local || sc_e_best >= 0 || sc_e_best == MIN_I16);
286 assert(!local || sc_f_best >= 0 || sc_f_best == MIN_I16);
287 curc->sc[1] = sc_e_best;
288 curc->sc[2] = sc_f_best;
289 curc->sc[3] = mask;
290 // cerr << curc->sc[0] << " " << curc->sc[1] << " " << curc->sc[2] << " " << curc->sc[3] << endl;
291 ASSERT_ONLY(last = curc);
292 #ifndef NDEBUG
293 if(prob_.cper_->isCheckpointed(rowc, colc)) {
294 if(local) {
295 sc_e_best = max<int16_t>(sc_e_best, 0);
296 sc_f_best = max<int16_t>(sc_f_best, 0);
297 }
298 TAlScore sc_best64 = sc_best; if(sc_best == MIN_I16) sc_best64 = MIN_I64;
299 TAlScore sc_e_best64 = sc_e_best; if(sc_e_best == MIN_I16) sc_e_best64 = MIN_I64;
300 TAlScore sc_f_best64 = sc_f_best; if(sc_f_best == MIN_I16) sc_f_best64 = MIN_I64;
301 assert_eq(prob_.cper_->scoreTriangle(rowc, colc, 0), sc_best64);
302 assert_eq(prob_.cper_->scoreTriangle(rowc, colc, 1), sc_e_best64);
303 assert_eq(prob_.cper_->scoreTriangle(rowc, colc, 2), sc_f_best64);
304 }
305 #endif
306 }
307 // Update row, col
308 assert_lt(rowc, (int64_t)prob_.qrylen_);
309 rowc++;
310 colc--;
311 curc++;
312 } // for(size_t j = 0; j < breadth; j++)
313 if(i == depth-1) {
314 // Final iteration
315 assert(last != NULL);
316 assert_eq(1, neval);
317 assert_neq(0, last->sc[3]);
318 assert_eq(targ, last->sc[hef]);
319 } else {
320 breadth--;
321 prev2 = prev1 + 1;
322 prev1 = cur;
323 }
324 } // for(size_t i = 0; i < depth; i++)
325 //
326 // Now backtrack through the triangle. Abort as soon as we enter a cell
327 // that was visited by a previous backtrace.
328 //
329 int64_t rowc = row, colc = col;
330 size_t curid;
331 int hefc = hef;
332 if(bs_.empty()) {
333 // Start an initial branch
334 CHECK_ROW_COL(rowc, colc);
335 curid = bs_.alloc();
336 assert_eq(0, curid);
337 Edit e;
338 bs_[curid].init(
339 prob_,
340 0, // parent ID
341 0, // penalty
342 0, // score_en
343 rowc, // row
344 colc, // col
345 e, // edit
346 0, // hef
347 true, // I am the root
348 false); // don't try to extend with exact matches
349 bs_[curid].len_ = 0;
350 } else {
351 curid = bs_.size()-1;
352 }
353 size_t idx_orig = (row + col) >> prob_.cper_->perpow2();
354 while(true) {
355 // What depth are we?
356 size_t mod = (rowc + colc) & prob_.cper_->lomask();
357 assert_lt(mod, prob_.cper_->per());
358 CpQuad * cur = tri_[mod].ptr();
359 int64_t row_off = rowc - row_lo - mod;
360 assert(!local || cur[row_off].sc[0] > 0);
361 assert_geq(row_off, 0);
362 int mask = cur[row_off].sc[3];
363 assert_gt(mask, 0);
364 int sel = -1;
365 // Select what type of move to make, which depends on whether we're
366 // currently in H, E, F:
367 if(hefc == 0) {
368 if( (mask & 1) != 0) {
369 // diagonal
370 sel = 0;
371 } else if((mask & 8) != 0) {
372 // up to H
373 sel = 3;
374 } else if((mask & 16) != 0) {
375 // up to F
376 sel = 4;
377 } else if((mask & 2) != 0) {
378 // left to H
379 sel = 1;
380 } else if((mask & 4) != 0) {
381 // left to E
382 sel = 2;
383 }
384 } else if(hefc == 1) {
385 if( (mask & 32) != 0) {
386 // left to H
387 sel = 5;
388 } else if((mask & 64) != 0) {
389 // left to E
390 sel = 6;
391 }
392 } else {
393 assert_eq(2, hefc);
394 if( (mask & 128) != 0) {
395 // up to H
396 sel = 7;
397 } else if((mask & 256) != 0) {
398 // up to F
399 sel = 8;
400 }
401 }
402 assert_geq(sel, 0);
403 // Get character from read
404 int qc = prob_.qry_[rowc], qq = prob_.qual_[rowc];
405 // Get character from reference
406 int rc = prob_.ref_[colc];
407 assert_range(0, 16, rc);
408 // Now that we know what type of move to make, make it, updating our
409 // row and column and moving updating the branch.
410 if(sel == 0) {
411 assert_geq(rowc, 0);
412 assert_geq(colc, 0);
413 TAlScore scd = prob_.sc_->score(qc, rc, qq - 33);
414 if((rc & (1 << qc)) == 0) {
415 // Mismatch
416 size_t id = curid;
417 // Check if the previous branch was the initial (bottommost)
418 // branch with no matches. If so, the mismatch should be added
419 // to the initial branch, instead of starting a new branch.
420 bool empty = (bs_[curid].len_ == 0 && curid == 0);
421 if(!empty) {
422 id = bs_.alloc();
423 }
424 Edit e((int)rowc, mask2dna[rc], "ACGTN"[qc], EDIT_TYPE_MM);
425 assert_lt(scd, 0);
426 TAlScore score_en = bs_[curid].score_st_ + scd;
427 bs_[id].init(
428 prob_,
429 curid, // parent ID
430 -scd, // penalty
431 score_en, // score_en
432 rowc, // row
433 colc, // col
434 e, // edit
435 hefc, // hef
436 empty, // root?
437 false); // don't try to extend with exact matches
438 //assert(!local || bs_[id].score_st_ >= 0);
439 curid = id;
440 } else {
441 // Match
442 bs_[curid].score_st_ += prob_.sc_->match();
443 bs_[curid].len_++;
444 assert_leq((int64_t)bs_[curid].len_, bs_[curid].row_ + 1);
445 }
446 rowc--;
447 colc--;
448 assert(local || bs_[curid].score_st_ >= targ_final);
449 hefc = 0;
450 } else if((sel >= 1 && sel <= 2) || (sel >= 5 && sel <= 6)) {
451 assert_gt(colc, 0);
452 // Read gap
453 size_t id = bs_.alloc();
454 Edit e((int)rowc+1, mask2dna[rc], '-', EDIT_TYPE_READ_GAP);
455 TAlScore gapp = prob_.sc_->readGapOpen();
456 if(bs_[curid].len_ == 0 && bs_[curid].e_.inited() && bs_[curid].e_.isReadGap()) {
457 gapp = prob_.sc_->readGapExtend();
458 }
459 TAlScore score_en = bs_[curid].score_st_ - gapp;
460 bs_[id].init(
461 prob_,
462 curid, // parent ID
463 gapp, // penalty
464 score_en, // score_en
465 rowc, // row
466 colc-1, // col
467 e, // edit
468 hefc, // hef
469 false, // root?
470 false); // don't try to extend with exact matches
471 colc--;
472 curid = id;
473 assert( local || bs_[curid].score_st_ >= targ_final);
474 //assert(!local || bs_[curid].score_st_ >= 0);
475 if(sel == 1 || sel == 5) {
476 hefc = 0;
477 } else {
478 hefc = 1;
479 }
480 } else {
481 assert_gt(rowc, 0);
482 // Reference gap
483 size_t id = bs_.alloc();
484 Edit e((int)rowc, '-', "ACGTN"[qc], EDIT_TYPE_REF_GAP);
485 TAlScore gapp = prob_.sc_->refGapOpen();
486 if(bs_[curid].len_ == 0 && bs_[curid].e_.inited() && bs_[curid].e_.isRefGap()) {
487 gapp = prob_.sc_->refGapExtend();
488 }
489 TAlScore score_en = bs_[curid].score_st_ - gapp;
490 bs_[id].init(
491 prob_,
492 curid, // parent ID
493 gapp, // penalty
494 score_en, // score_en
495 rowc-1, // row
496 colc, // col
497 e, // edit
498 hefc, // hef
499 false, // root?
500 false); // don't try to extend with exact matches
501 rowc--;
502 curid = id;
503 //assert(!local || bs_[curid].score_st_ >= 0);
504 if(sel == 3 || sel == 7) {
505 hefc = 0;
506 } else {
507 hefc = 2;
508 }
509 }
510 CHECK_ROW_COL(rowc, colc);
511 size_t mod_new = (rowc + colc) & prob_.cper_->lomask();
512 size_t idx = (rowc + colc) >> prob_.cper_->perpow2();
513 assert_lt(mod_new, prob_.cper_->per());
514 int64_t row_off_new = rowc - row_lo - mod_new;
515 CpQuad * cur_new = NULL;
516 if(colc >= 0 && rowc >= 0 && idx == idx_orig) {
517 cur_new = tri_[mod_new].ptr();
518 }
519 bool hit_new_tri = (idx < idx_orig && colc >= 0 && rowc >= 0);
520 // Check whether we made it to the top row or to a cell with score 0
521 if(colc < 0 || rowc < 0 ||
522 (cur_new != NULL && (local && cur_new[row_off_new].sc[0] == 0)))
523 {
524 done = true;
525 assert(bs_[curid].isSolution(prob_));
526 addSolution(curid);
527 #ifndef NDEBUG
528 // A check to see if any two adjacent branches in the backtrace
529 // overlap. If they do, the whole alignment will be filtered out
530 // in trySolution(...)
531 size_t cur = curid;
532 if(!bs_[cur].root_) {
533 size_t next = bs_[cur].parentId_;
534 while(!bs_[next].root_) {
535 assert_neq(cur, next);
536 if(bs_[next].len_ != 0 || bs_[cur].len_ == 0) {
537 assert(!bs_[cur].overlap(prob_, bs_[next]));
538 }
539 cur = next;
540 next = bs_[cur].parentId_;
541 }
542 }
543 #endif
544 return;
545 }
546 if(hit_new_tri) {
547 assert(rowc < 0 || colc < 0 || prob_.cper_->isCheckpointed(rowc, colc));
548 row_new = rowc; col_new = colc;
549 hef_new = hefc;
550 done = false;
551 if(rowc < 0 || colc < 0) {
552 assert(local);
553 targ_new = 0;
554 } else {
555 targ_new = prob_.cper_->scoreTriangle(rowc, colc, hefc);
556 }
557 if(local && targ_new == 0) {
558 done = true;
559 assert(bs_[curid].isSolution(prob_));
560 addSolution(curid);
561 }
562 assert((row_new >= 0 && col_new >= 0) || done);
563 return;
564 }
565 }
566 assert(false);
567 }
568
569 #ifndef NDEBUG
570 #define DEBUG_CHECK(ss, row, col, hef) { \
571 if(prob_.cper_->debug() && row >= 0 && col >= 0) { \
572 TAlScore s = ss; \
573 if(s == MIN_I16) s = MIN_I64; \
574 if(local && s < 0) s = 0; \
575 TAlScore deb = prob_.cper_->debugCell(row, col, hef); \
576 if(local && deb < 0) deb = 0; \
577 assert_eq(s, deb); \
578 } \
579 }
580 #else
581 #define DEBUG_CHECK(ss, row, col, hef)
582 #endif
583
584
585 /**
586 * Fill in a square of the DP table and backtrace from the given cell to
587 * a cell in the previous checkpoint, or to the terminal cell.
588 */
squareFill(int64_t rw,int64_t cl,int hef,TAlScore targ,TAlScore targ_final,RandomSource & rnd,int64_t & row_new,int64_t & col_new,int & hef_new,TAlScore & targ_new,bool & done,bool & abort)589 void BtBranchTracer::squareFill(
590 int64_t rw, // row of cell to backtrace from
591 int64_t cl, // column of cell to backtrace from
592 int hef, // cell to backtrace from is H (0), E (1), or F (2)
593 TAlScore targ, // score of cell to backtrace from
594 TAlScore targ_final, // score of alignment we're looking for
595 RandomSource& rnd, // pseudo-random generator
596 int64_t& row_new, // out: row we ended up in after backtrace
597 int64_t& col_new, // out: column we ended up in after backtrace
598 int& hef_new, // out: H/E/F after backtrace
599 TAlScore& targ_new, // out: score up to cell we ended up in
600 bool& done, // out: finished tracing out an alignment?
601 bool& abort) // out: aborted b/c cell was seen before?
602 {
603 assert_geq(rw, 0);
604 assert_geq(cl, 0);
605 assert_range(0, 2, hef);
606 assert_lt(rw, (int64_t)prob_.qrylen_);
607 assert_lt(cl, (int64_t)prob_.reflen_);
608 assert(prob_.usecp_ && prob_.fill_);
609 const bool is8_ = prob_.cper_->is8_;
610 int64_t row = rw, col = cl;
611 assert_leq(prob_.reflen_, (TRefOff)sawcell_.size());
612 assert_leq(col, (int64_t)prob_.cper_->hicol());
613 assert_geq(col, (int64_t)prob_.cper_->locol());
614 assert_geq(prob_.cper_->per(), 2);
615 size_t xmod = col & prob_.cper_->lomask();
616 size_t ymod = row & prob_.cper_->lomask();
617 size_t xdiv = col >> prob_.cper_->perpow2();
618 size_t ydiv = row >> prob_.cper_->perpow2();
619 size_t sq_ncol = xmod+1, sq_nrow = ymod+1;
620 sq_.resize(sq_ncol * sq_nrow);
621 bool upper = ydiv == 0;
622 bool left = xdiv == 0;
623 const TAlScore sc_rdo = prob_.sc_->readGapOpen();
624 const TAlScore sc_rde = prob_.sc_->readGapExtend();
625 const TAlScore sc_rfo = prob_.sc_->refGapOpen();
626 const TAlScore sc_rfe = prob_.sc_->refGapExtend();
627 const bool local = !prob_.sc_->monotone;
628 const CpQuad *qup = NULL;
629 const __m128i *qlf = NULL;
630 size_t per = prob_.cper_->per_;
631 ASSERT_ONLY(size_t nrow = prob_.cper_->nrow());
632 size_t ncol = prob_.cper_->ncol();
633 assert_eq(prob_.qrylen_, nrow);
634 assert_eq(prob_.reflen_, (TRefOff)ncol);
635 size_t niter = prob_.cper_->niter_;
636 if(!upper) {
637 qup = prob_.cper_->qrows_.ptr() + (ncol * (ydiv-1)) + xdiv * per;
638 }
639 if(!left) {
640 // Set up the column pointers to point to the first __m128i word in the
641 // relevant column
642 size_t off = (niter << 2) * (xdiv-1);
643 qlf = prob_.cper_->qcols_.ptr() + off;
644 }
645 size_t xedge = xdiv * per; // absolute offset of leftmost cell in square
646 size_t yedge = ydiv * per; // absolute offset of topmost cell in square
647 size_t xi = xedge, yi = yedge; // iterators for columns, rows
648 size_t ii = 0; // iterator into packed square
649 // Iterate over rows, then over columns
650 size_t m128mod = yi % prob_.cper_->niter_;
651 size_t m128div = yi / prob_.cper_->niter_;
652 int16_t sc_h_dg_lastrow = MIN_I16;
653 for(size_t i = 0; i <= ymod; i++, yi++) {
654 assert_lt(yi, nrow);
655 xi = xedge;
656 // Handling for first column is done outside the loop
657 size_t fromend = prob_.qrylen_ - yi - 1;
658 bool allowGaps = fromend >= (size_t)prob_.sc_->gapbar && yi >= (size_t)prob_.sc_->gapbar;
659 // Get character, quality from read
660 int qc = prob_.qry_[yi], qq = prob_.qual_[yi];
661 assert_geq(qq, 33);
662 int16_t sc_h_lf_last = MIN_I16;
663 int16_t sc_e_lf_last = MIN_I16;
664 for(size_t j = 0; j <= xmod; j++, xi++) {
665 assert_lt(xi, ncol);
666 // Get character from reference
667 int rc = prob_.ref_[xi];
668 assert_range(0, 16, rc);
669 int16_t sc_diag = prob_.sc_->score(qc, rc, qq - 33);
670 int16_t sc_h_up = MIN_I16, sc_f_up = MIN_I16,
671 sc_h_lf = MIN_I16, sc_e_lf = MIN_I16,
672 sc_h_dg = MIN_I16;
673 int16_t sc_h_up_c = MIN_I16, sc_f_up_c = MIN_I16,
674 sc_h_lf_c = MIN_I16, sc_e_lf_c = MIN_I16,
675 sc_h_dg_c = MIN_I16;
676 if(yi == 0) {
677 // If I'm in the first first row or column set it to 0
678 sc_h_dg = 0;
679 } else if(xi == 0) {
680 // Do nothing; leave it at min
681 if(local) {
682 sc_h_dg = 0;
683 }
684 } else if(i == 0 && j == 0) {
685 // Otherwise, if I'm in the upper-left square corner, I can get
686 // it from the checkpoint
687 sc_h_dg = qup[-1].sc[0];
688 } else if(j == 0) {
689 // Otherwise, if I'm in the leftmost cell of this row, I can
690 // get it from sc_h_lf in first column of previous row
691 sc_h_dg = sc_h_dg_lastrow;
692 } else {
693 // Otherwise, I can get it from qup
694 sc_h_dg = qup[j-1].sc[0];
695 }
696 if(yi > 0 && xi > 0) DEBUG_CHECK(sc_h_dg, yi-1, xi-1, 2);
697
698 // If we're in the leftmost column, calculate sc_h_lf regardless of
699 // allowGaps.
700 if(j == 0 && xi > 0) {
701 // Get values for left neighbors from the checkpoint
702 if(is8_) {
703 size_t vecoff = (m128mod << 6) + m128div;
704 sc_e_lf = ((uint8_t*)(qlf + 0))[vecoff];
705 sc_h_lf = ((uint8_t*)(qlf + 2))[vecoff];
706 if(local) {
707 // No adjustment
708 } else {
709 if(sc_h_lf == 0) sc_h_lf = MIN_I16;
710 else sc_h_lf -= 0xff;
711 if(sc_e_lf == 0) sc_e_lf = MIN_I16;
712 else sc_e_lf -= 0xff;
713 }
714 } else {
715 size_t vecoff = (m128mod << 5) + m128div;
716 sc_e_lf = ((int16_t*)(qlf + 0))[vecoff];
717 sc_h_lf = ((int16_t*)(qlf + 2))[vecoff];
718 if(local) {
719 sc_h_lf += 0x8000; assert_geq(sc_h_lf, 0);
720 sc_e_lf += 0x8000; assert_geq(sc_e_lf, 0);
721 } else {
722 if(sc_h_lf != MIN_I16) sc_h_lf -= 0x7fff;
723 if(sc_e_lf != MIN_I16) sc_e_lf -= 0x7fff;
724 }
725 }
726 DEBUG_CHECK(sc_e_lf, yi, xi-1, 0);
727 DEBUG_CHECK(sc_h_lf, yi, xi-1, 2);
728 sc_h_dg_lastrow = sc_h_lf;
729 }
730
731 if(allowGaps) {
732 if(j == 0 /* at left edge */ && xi > 0 /* not extreme */) {
733 sc_h_lf_c = sc_h_lf;
734 sc_e_lf_c = sc_e_lf;
735 if(sc_h_lf_c != MIN_I16) sc_h_lf_c -= sc_rdo;
736 if(sc_e_lf_c != MIN_I16) sc_e_lf_c -= sc_rde;
737 assert_leq(sc_h_lf_c, prob_.cper_->perf_);
738 assert_leq(sc_e_lf_c, prob_.cper_->perf_);
739 } else if(xi > 0) {
740 // Get values for left neighbors from the previous iteration
741 if(sc_h_lf_last != MIN_I16) {
742 sc_h_lf = sc_h_lf_last;
743 sc_h_lf_c = sc_h_lf - sc_rdo;
744 }
745 if(sc_e_lf_last != MIN_I16) {
746 sc_e_lf = sc_e_lf_last;
747 sc_e_lf_c = sc_e_lf - sc_rde;
748 }
749 }
750 if(yi > 0 /* not extreme */) {
751 // Get column values
752 assert(qup != NULL);
753 assert(local || qup[j].sc[2] < 0);
754 if(qup[j].sc[0] > MIN_I16) {
755 DEBUG_CHECK(qup[j].sc[0], yi-1, xi, 2);
756 sc_h_up = qup[j].sc[0];
757 sc_h_up_c = sc_h_up - sc_rfo;
758 }
759 if(qup[j].sc[2] > MIN_I16) {
760 DEBUG_CHECK(qup[j].sc[2], yi-1, xi, 1);
761 sc_f_up = qup[j].sc[2];
762 sc_f_up_c = sc_f_up - sc_rfe;
763 }
764 }
765 if(local) {
766 sc_h_up_c = max<int16_t>(sc_h_up_c, 0);
767 sc_f_up_c = max<int16_t>(sc_f_up_c, 0);
768 sc_h_lf_c = max<int16_t>(sc_h_lf_c, 0);
769 sc_e_lf_c = max<int16_t>(sc_e_lf_c, 0);
770 }
771 }
772
773 if(sc_h_dg > MIN_I16) {
774 sc_h_dg_c = sc_h_dg + sc_diag;
775 }
776 if(local) sc_h_dg_c = max<int16_t>(sc_h_dg_c, 0);
777
778 int mask = 0;
779 // Calculate best ways into H, E, F cells starting with H.
780 // Mask bits:
781 // H: 1=diag, 2=hhoriz, 4=ehoriz, 8=hvert, 16=fvert
782 // E: 32=hhoriz, 64=ehoriz
783 // F: 128=hvert, 256=fvert
784 int16_t sc_best = sc_h_dg_c;
785 if(sc_h_dg_c > MIN_I64) {
786 mask = 1;
787 }
788 if(xi > 0 && sc_h_lf_c >= sc_best && sc_h_lf_c > MIN_I64) {
789 if(sc_h_lf_c > sc_best) mask = 0;
790 mask |= 2;
791 sc_best = sc_h_lf_c;
792 }
793 if(xi > 0 && sc_e_lf_c >= sc_best && sc_e_lf_c > MIN_I64) {
794 if(sc_e_lf_c > sc_best) mask = 0;
795 mask |= 4;
796 sc_best = sc_e_lf_c;
797 }
798 if(yi > 0 && sc_h_up_c >= sc_best && sc_h_up_c > MIN_I64) {
799 if(sc_h_up_c > sc_best) mask = 0;
800 mask |= 8;
801 sc_best = sc_h_up_c;
802 }
803 if(yi > 0 && sc_f_up_c >= sc_best && sc_f_up_c > MIN_I64) {
804 if(sc_f_up_c > sc_best) mask = 0;
805 mask |= 16;
806 sc_best = sc_f_up_c;
807 }
808 // Calculate best way into E cell
809 int16_t sc_e_best = sc_h_lf_c;
810 if(xi > 0) {
811 if(sc_h_lf_c >= sc_e_lf_c && sc_h_lf_c > MIN_I64) {
812 if(sc_h_lf_c == sc_e_lf_c) {
813 mask |= 64;
814 }
815 mask |= 32;
816 } else if(sc_e_lf_c > MIN_I64) {
817 sc_e_best = sc_e_lf_c;
818 mask |= 64;
819 }
820 }
821 if(sc_e_best > sc_best) {
822 sc_best = sc_e_best;
823 mask &= ~31; // don't go diagonal
824 }
825 // Calculate best way into F cell
826 int16_t sc_f_best = sc_h_up_c;
827 if(yi > 0) {
828 if(sc_h_up_c >= sc_f_up_c && sc_h_up_c > MIN_I64) {
829 if(sc_h_up_c == sc_f_up_c) {
830 mask |= 256;
831 }
832 mask |= 128;
833 } else if(sc_f_up_c > MIN_I64) {
834 sc_f_best = sc_f_up_c;
835 mask |= 256;
836 }
837 }
838 if(sc_f_best > sc_best) {
839 sc_best = sc_f_best;
840 mask &= ~127; // don't go horizontal or diagonal
841 }
842 // Install results in cur
843 assert( local || sc_best <= 0);
844 sq_[ii+j].sc[0] = sc_best;
845 assert( local || sc_e_best < 0);
846 assert( local || sc_f_best < 0);
847 assert(!local || sc_e_best >= 0 || sc_e_best == MIN_I16);
848 assert(!local || sc_f_best >= 0 || sc_f_best == MIN_I16);
849 sq_[ii+j].sc[1] = sc_e_best;
850 sq_[ii+j].sc[2] = sc_f_best;
851 sq_[ii+j].sc[3] = mask;
852 DEBUG_CHECK(sq_[ii+j].sc[0], yi, xi, 2); // H
853 DEBUG_CHECK(sq_[ii+j].sc[1], yi, xi, 0); // E
854 DEBUG_CHECK(sq_[ii+j].sc[2], yi, xi, 1); // F
855 // Update sc_h_lf_last, sc_e_lf_last
856 sc_h_lf_last = sc_best;
857 sc_e_lf_last = sc_e_best;
858 }
859 // Update m128mod, m128div
860 m128mod++;
861 if(m128mod == prob_.cper_->niter_) {
862 m128mod = 0;
863 m128div++;
864 }
865 // update qup
866 ii += sq_ncol;
867 // dimensions of sq_
868 qup = sq_.ptr() + sq_ncol * i;
869 }
870 assert_eq(targ, sq_[ymod * sq_ncol + xmod].sc[hef]);
871 //
872 // Now backtrack through the triangle. Abort as soon as we enter a cell
873 // that was visited by a previous backtrace.
874 //
875 int64_t rowc = row, colc = col;
876 size_t curid;
877 int hefc = hef;
878 if(bs_.empty()) {
879 // Start an initial branch
880 CHECK_ROW_COL(rowc, colc);
881 curid = bs_.alloc();
882 assert_eq(0, curid);
883 Edit e;
884 bs_[curid].init(
885 prob_,
886 0, // parent ID
887 0, // penalty
888 0, // score_en
889 rowc, // row
890 colc, // col
891 e, // edit
892 0, // hef
893 true, // root?
894 false); // don't try to extend with exact matches
895 bs_[curid].len_ = 0;
896 } else {
897 curid = bs_.size()-1;
898 }
899 size_t ymodTimesNcol = ymod * sq_ncol;
900 while(true) {
901 // What depth are we?
902 assert_eq(ymodTimesNcol, ymod * sq_ncol);
903 CpQuad * cur = sq_.ptr() + ymodTimesNcol + xmod;
904 int mask = cur->sc[3];
905 assert_gt(mask, 0);
906 int sel = -1;
907 // Select what type of move to make, which depends on whether we're
908 // currently in H, E, F:
909 if(hefc == 0) {
910 if( (mask & 1) != 0) {
911 // diagonal
912 sel = 0;
913 } else if((mask & 8) != 0) {
914 // up to H
915 sel = 3;
916 } else if((mask & 16) != 0) {
917 // up to F
918 sel = 4;
919 } else if((mask & 2) != 0) {
920 // left to H
921 sel = 1;
922 } else if((mask & 4) != 0) {
923 // left to E
924 sel = 2;
925 }
926 } else if(hefc == 1) {
927 if( (mask & 32) != 0) {
928 // left to H
929 sel = 5;
930 } else if((mask & 64) != 0) {
931 // left to E
932 sel = 6;
933 }
934 } else {
935 assert_eq(2, hefc);
936 if( (mask & 128) != 0) {
937 // up to H
938 sel = 7;
939 } else if((mask & 256) != 0) {
940 // up to F
941 sel = 8;
942 }
943 }
944 assert_geq(sel, 0);
945 // Get character from read
946 int qc = prob_.qry_[rowc], qq = prob_.qual_[rowc];
947 // Get character from reference
948 int rc = prob_.ref_[colc];
949 assert_range(0, 16, rc);
950 bool xexit = false, yexit = false;
951 // Now that we know what type of move to make, make it, updating our
952 // row and column and moving updating the branch.
953 if(sel == 0) {
954 assert_geq(rowc, 0);
955 assert_geq(colc, 0);
956 TAlScore scd = prob_.sc_->score(qc, rc, qq - 33);
957 if((rc & (1 << qc)) == 0) {
958 // Mismatch
959 size_t id = curid;
960 // Check if the previous branch was the initial (bottommost)
961 // branch with no matches. If so, the mismatch should be added
962 // to the initial branch, instead of starting a new branch.
963 bool empty = (bs_[curid].len_ == 0 && curid == 0);
964 if(!empty) {
965 id = bs_.alloc();
966 }
967 Edit e((int)rowc, mask2dna[rc], "ACGTN"[qc], EDIT_TYPE_MM);
968 assert_lt(scd, 0);
969 TAlScore score_en = bs_[curid].score_st_ + scd;
970 bs_[id].init(
971 prob_,
972 curid, // parent ID
973 -scd, // penalty
974 score_en, // score_en
975 rowc, // row
976 colc, // col
977 e, // edit
978 hefc, // hef
979 empty, // root?
980 false); // don't try to extend with exact matches
981 curid = id;
982 //assert(!local || bs_[curid].score_st_ >= 0);
983 } else {
984 // Match
985 bs_[curid].score_st_ += prob_.sc_->match();
986 bs_[curid].len_++;
987 assert_leq((int64_t)bs_[curid].len_, bs_[curid].row_ + 1);
988 }
989 if(xmod == 0) xexit = true;
990 if(ymod == 0) yexit = true;
991 rowc--; ymod--; ymodTimesNcol -= sq_ncol;
992 colc--; xmod--;
993 assert(local || bs_[curid].score_st_ >= targ_final);
994 hefc = 0;
995 } else if((sel >= 1 && sel <= 2) || (sel >= 5 && sel <= 6)) {
996 assert_gt(colc, 0);
997 // Read gap
998 size_t id = bs_.alloc();
999 Edit e((int)rowc+1, mask2dna[rc], '-', EDIT_TYPE_READ_GAP);
1000 TAlScore gapp = prob_.sc_->readGapOpen();
1001 if(bs_[curid].len_ == 0 && bs_[curid].e_.inited() && bs_[curid].e_.isReadGap()) {
1002 gapp = prob_.sc_->readGapExtend();
1003 }
1004 //assert(!local || bs_[curid].score_st_ >= gapp);
1005 TAlScore score_en = bs_[curid].score_st_ - gapp;
1006 bs_[id].init(
1007 prob_,
1008 curid, // parent ID
1009 gapp, // penalty
1010 score_en, // score_en
1011 rowc, // row
1012 colc-1, // col
1013 e, // edit
1014 hefc, // hef
1015 false, // root?
1016 false); // don't try to extend with exact matches
1017 if(xmod == 0) xexit = true;
1018 colc--; xmod--;
1019 curid = id;
1020 assert( local || bs_[curid].score_st_ >= targ_final);
1021 //assert(!local || bs_[curid].score_st_ >= 0);
1022 if(sel == 1 || sel == 5) {
1023 hefc = 0;
1024 } else {
1025 hefc = 1;
1026 }
1027 } else {
1028 assert_gt(rowc, 0);
1029 // Reference gap
1030 size_t id = bs_.alloc();
1031 Edit e((int)rowc, '-', "ACGTN"[qc], EDIT_TYPE_REF_GAP);
1032 TAlScore gapp = prob_.sc_->refGapOpen();
1033 if(bs_[curid].len_ == 0 && bs_[curid].e_.inited() && bs_[curid].e_.isRefGap()) {
1034 gapp = prob_.sc_->refGapExtend();
1035 }
1036 //assert(!local || bs_[curid].score_st_ >= gapp);
1037 TAlScore score_en = bs_[curid].score_st_ - gapp;
1038 bs_[id].init(
1039 prob_,
1040 curid, // parent ID
1041 gapp, // penalty
1042 score_en, // score_en
1043 rowc-1, // row
1044 colc, // col
1045 e, // edit
1046 hefc, // hef
1047 false, // root?
1048 false); // don't try to extend with exact matches
1049 if(ymod == 0) yexit = true;
1050 rowc--; ymod--; ymodTimesNcol -= sq_ncol;
1051 curid = id;
1052 assert( local || bs_[curid].score_st_ >= targ_final);
1053 //assert(!local || bs_[curid].score_st_ >= 0);
1054 if(sel == 3 || sel == 7) {
1055 hefc = 0;
1056 } else {
1057 hefc = 2;
1058 }
1059 }
1060 CHECK_ROW_COL(rowc, colc);
1061 CpQuad * cur_new = NULL;
1062 if(!xexit && !yexit) {
1063 cur_new = sq_.ptr() + ymodTimesNcol + xmod;
1064 }
1065 // Check whether we made it to the top row or to a cell with score 0
1066 if(colc < 0 || rowc < 0 ||
1067 (cur_new != NULL && local && cur_new->sc[0] == 0))
1068 {
1069 done = true;
1070 assert(bs_[curid].isSolution(prob_));
1071 addSolution(curid);
1072 #ifndef NDEBUG
1073 // A check to see if any two adjacent branches in the backtrace
1074 // overlap. If they do, the whole alignment will be filtered out
1075 // in trySolution(...)
1076 size_t cur = curid;
1077 if(!bs_[cur].root_) {
1078 size_t next = bs_[cur].parentId_;
1079 while(!bs_[next].root_) {
1080 assert_neq(cur, next);
1081 if(bs_[next].len_ != 0 || bs_[cur].len_ == 0) {
1082 assert(!bs_[cur].overlap(prob_, bs_[next]));
1083 }
1084 cur = next;
1085 next = bs_[cur].parentId_;
1086 }
1087 }
1088 #endif
1089 return;
1090 }
1091 assert(!xexit || hefc == 0 || hefc == 1);
1092 assert(!yexit || hefc == 0 || hefc == 2);
1093 if(xexit || yexit) {
1094 //assert(rowc < 0 || colc < 0 || prob_.cper_->isCheckpointed(rowc, colc));
1095 row_new = rowc; col_new = colc;
1096 hef_new = hefc;
1097 done = false;
1098 if(rowc < 0 || colc < 0) {
1099 assert(local);
1100 targ_new = 0;
1101 } else {
1102 // TODO: Don't use scoreSquare
1103 targ_new = prob_.cper_->scoreSquare(rowc, colc, hefc);
1104 assert(local || targ_new >= targ);
1105 assert(local || targ_new >= targ_final);
1106 }
1107 if(local && targ_new == 0) {
1108 assert_eq(0, hefc);
1109 done = true;
1110 assert(bs_[curid].isSolution(prob_));
1111 addSolution(curid);
1112 }
1113 assert((row_new >= 0 && col_new >= 0) || done);
1114 return;
1115 }
1116 }
1117 assert(false);
1118 }
1119
1120 /**
1121 * Caller gives us score_en, row and col. We figure out score_st and len_
1122 * by comparing characters from the strings.
1123 *
1124 * If this branch comes after a mismatch, (row, col) describe the cell that the
1125 * mismatch occurs in. len_ is initially set to 1, and the next cell we test
1126 * is the next cell up and to the left (row-1, col-1).
1127 *
1128 * If this branch comes after a read gap, (row, col) describe the leftmost cell
1129 * involved in the gap. len_ is initially set to 0, and the next cell we test
1130 * is the current cell (row, col).
1131 *
1132 * If this branch comes after a reference gap, (row, col) describe the upper
1133 * cell involved in the gap. len_ is initially set to 0, and the next cell we
1134 * test is the current cell (row, col).
1135 */
init(const BtBranchProblem & prob,size_t parentId,TAlScore penalty,TAlScore score_en,int64_t row,int64_t col,Edit e,int hef,bool root,bool extend)1136 void BtBranch::init(
1137 const BtBranchProblem& prob,
1138 size_t parentId,
1139 TAlScore penalty,
1140 TAlScore score_en,
1141 int64_t row,
1142 int64_t col,
1143 Edit e,
1144 int hef,
1145 bool root,
1146 bool extend)
1147 {
1148 score_en_ = score_en;
1149 penalty_ = penalty;
1150 score_st_ = score_en_;
1151 row_ = row;
1152 col_ = col;
1153 parentId_ = parentId;
1154 e_ = e;
1155 root_ = root;
1156 assert(!root_ || parentId == 0);
1157 assert_lt(row, (int64_t)prob.qrylen_);
1158 assert_lt(col, (int64_t)prob.reflen_);
1159 // First match to check is diagonally above and to the left of the cell
1160 // where the edit occurs
1161 int64_t rowc = row;
1162 int64_t colc = col;
1163 len_ = 0;
1164 if(e.inited() && e.isMismatch()) {
1165 rowc--; colc--;
1166 len_ = 1;
1167 }
1168 int64_t match = prob.sc_->match();
1169 bool cp = prob.usecp_;
1170 size_t iters = 0;
1171 curtailed_ = false;
1172 if(extend) {
1173 while(rowc >= 0 && colc >= 0) {
1174 int rfm = prob.ref_[colc];
1175 assert_range(0, 16, rfm);
1176 int rdc = prob.qry_[rowc];
1177 bool matches = (rfm & (1 << rdc)) != 0;
1178 if(!matches) {
1179 // What's the mismatch penalty?
1180 break;
1181 }
1182 // Get score from checkpointer
1183 score_st_ += match;
1184 if(cp && rowc - 1 >= 0 && colc - 1 >= 0 &&
1185 prob.cper_->isCheckpointed(rowc - 1, colc - 1))
1186 {
1187 // Possibly prune
1188 int16_t cpsc;
1189 cpsc = prob.cper_->scoreTriangle(rowc - 1, colc - 1, hef);
1190 if(cpsc + score_st_ < prob.targ_) {
1191 curtailed_ = true;
1192 break;
1193 }
1194 }
1195 iters++;
1196 rowc--; colc--;
1197 }
1198 }
1199 assert_geq(rowc, -1);
1200 assert_geq(colc, -1);
1201 len_ = (int64_t)row - rowc;
1202 assert_leq((int64_t)len_, row_+1);
1203 assert_leq((int64_t)len_, col_+1);
1204 assert_leq((int64_t)score_st_, (int64_t)prob.qrylen_ * match);
1205 }
1206
1207 /**
1208 * Given a potential branch to add to the queue, see if we can follow the
1209 * branch a little further first. If it's still valid, or if we reach a
1210 * choice between valid outgoing paths, go ahead and add it to the queue.
1211 */
examineBranch(int64_t row,int64_t col,const Edit & e,TAlScore pen,TAlScore sc,size_t parentId)1212 void BtBranchTracer::examineBranch(
1213 int64_t row,
1214 int64_t col,
1215 const Edit& e,
1216 TAlScore pen, // penalty associated with edit
1217 TAlScore sc,
1218 size_t parentId)
1219 {
1220 size_t id = bs_.alloc();
1221 bs_[id].init(prob_, parentId, pen, sc, row, col, e, 0, false, true);
1222 if(bs_[id].isSolution(prob_)) {
1223 assert(bs_[id].isValid(prob_));
1224 addSolution(id);
1225 } else {
1226 // Check if this branch is legit
1227 if(bs_[id].isValid(prob_)) {
1228 add(id);
1229 } else {
1230 bs_.pop();
1231 }
1232 }
1233 }
1234
1235 /**
1236 * Take all possible ways of leaving the given branch and add them to the
1237 * branch queue.
1238 */
addOffshoots(size_t bid)1239 void BtBranchTracer::addOffshoots(size_t bid) {
1240 BtBranch& b = bs_[bid];
1241 TAlScore sc = b.score_en_;
1242 int64_t match = prob_.sc_->match();
1243 int64_t scoreFloor = prob_.sc_->monotone ? MIN_I64 : 0;
1244 bool cp = prob_.usecp_; // Are there are any checkpoints?
1245 ASSERT_ONLY(TAlScore perfectScore = prob_.sc_->perfectScore(prob_.qrylen_));
1246 assert_leq(prob_.targ_, perfectScore);
1247 // For each cell in the branch
1248 for(size_t i = 0 ; i < b.len_; i++) {
1249 assert_leq((int64_t)i, b.row_+1);
1250 assert_leq((int64_t)i, b.col_+1);
1251 int64_t row = b.row_ - i, col = b.col_ - i;
1252 int64_t bonusLeft = (row + 1) * match;
1253 int64_t fromend = prob_.qrylen_ - row - 1;
1254 bool allowGaps = fromend >= prob_.sc_->gapbar && row >= prob_.sc_->gapbar;
1255 if(allowGaps && row >= 0 && col >= 0) {
1256 if(col > 0) {
1257 // Try a read gap - it's either an extension or an open
1258 bool extend = b.e_.inited() && b.e_.isReadGap() && i == 0;
1259 TAlScore rdgapPen = extend ?
1260 prob_.sc_->readGapExtend() : prob_.sc_->readGapOpen();
1261 bool prune = false;
1262 assert_gt(rdgapPen, 0);
1263 if(cp && prob_.cper_->isCheckpointed(row, col - 1)) {
1264 // Possibly prune
1265 int16_t cpsc = (int16_t)prob_.cper_->scoreTriangle(row, col - 1, 0);
1266 assert_leq(cpsc, perfectScore);
1267 assert_geq(prob_.sc_->readGapOpen(), prob_.sc_->readGapExtend());
1268 TAlScore bonus = prob_.sc_->readGapOpen() - prob_.sc_->readGapExtend();
1269 assert_geq(bonus, 0);
1270 if(cpsc + bonus + sc - rdgapPen < prob_.targ_) {
1271 prune = true;
1272 }
1273 }
1274 if(prune) {
1275 if(extend) { nrdexPrune_++; } else { nrdopPrune_++; }
1276 } else if(sc - rdgapPen >= scoreFloor && sc - rdgapPen + bonusLeft >= prob_.targ_) {
1277 // Yes, we can introduce a read gap here
1278 Edit e((int)row + 1, mask2dna[(int)prob_.ref_[col]], '-', EDIT_TYPE_READ_GAP);
1279 assert(e.isReadGap());
1280 examineBranch(row, col - 1, e, rdgapPen, sc - rdgapPen, bid);
1281 if(extend) { nrdex_++; } else { nrdop_++; }
1282 }
1283 }
1284 if(row > 0) {
1285 // Try a reference gap - it's either an extension or an open
1286 bool extend = b.e_.inited() && b.e_.isRefGap() && i == 0;
1287 TAlScore rfgapPen = (b.e_.inited() && b.e_.isRefGap()) ?
1288 prob_.sc_->refGapExtend() : prob_.sc_->refGapOpen();
1289 bool prune = false;
1290 assert_gt(rfgapPen, 0);
1291 if(cp && prob_.cper_->isCheckpointed(row - 1, col)) {
1292 // Possibly prune
1293 int16_t cpsc = (int16_t)prob_.cper_->scoreTriangle(row - 1, col, 0);
1294 assert_leq(cpsc, perfectScore);
1295 assert_geq(prob_.sc_->refGapOpen(), prob_.sc_->refGapExtend());
1296 TAlScore bonus = prob_.sc_->refGapOpen() - prob_.sc_->refGapExtend();
1297 assert_geq(bonus, 0);
1298 if(cpsc + bonus + sc - rfgapPen < prob_.targ_) {
1299 prune = true;
1300 }
1301 }
1302 if(prune) {
1303 if(extend) { nrfexPrune_++; } else { nrfopPrune_++; }
1304 } else if(sc - rfgapPen >= scoreFloor && sc - rfgapPen + bonusLeft >= prob_.targ_) {
1305 // Yes, we can introduce a ref gap here
1306 Edit e((int)row, '-', "ACGTN"[(int)prob_.qry_[row]], EDIT_TYPE_REF_GAP);
1307 assert(e.isRefGap());
1308 examineBranch(row - 1, col, e, rfgapPen, sc - rfgapPen, bid);
1309 if(extend) { nrfex_++; } else { nrfop_++; }
1310 }
1311 }
1312 }
1313 // If we're at the top of the branch but not yet at the top of
1314 // the DP table, a mismatch branch is also possible.
1315 if(i == b.len_ && !b.curtailed_ && row >= 0 && col >= 0) {
1316 int rfm = prob_.ref_[col];
1317 assert_lt(row, (int64_t)prob_.qrylen_);
1318 int rdc = prob_.qry_[row];
1319 int rdq = prob_.qual_[row];
1320 int scdiff = prob_.sc_->score(rdc, rfm, rdq - 33);
1321 assert_lt(scdiff, 0); // at end of branch, so can't match
1322 bool prune = false;
1323 if(cp && row > 0 && col > 0 && prob_.cper_->isCheckpointed(row - 1, col - 1)) {
1324 // Possibly prune
1325 int16_t cpsc = prob_.cper_->scoreTriangle(row - 1, col - 1, 0);
1326 assert_leq(cpsc, perfectScore);
1327 assert_leq(cpsc + scdiff + sc, perfectScore);
1328 if(cpsc + scdiff + sc < prob_.targ_) {
1329 prune = true;
1330 }
1331 }
1332 if(prune) {
1333 nmm_++;
1334 } else {
1335 // Yes, we can introduce a mismatch here
1336 if(sc + scdiff >= scoreFloor && sc + scdiff + bonusLeft >= prob_.targ_) {
1337 Edit e((int)row, mask2dna[rfm], "ACGTN"[rdc], EDIT_TYPE_MM);
1338 bool nmm = (mask2dna[rfm] == 'N' || rdc > 4);
1339 assert_neq(e.chr, e.qchr);
1340 assert_lt(scdiff, 0);
1341 examineBranch(row - 1, col - 1, e, -scdiff, sc + scdiff, bid);
1342 if(nmm) { nnmm_++; } else { nmm_++; }
1343 }
1344 }
1345 }
1346 sc += match;
1347 }
1348 }
1349
1350 /**
1351 * Sort unsorted branches, merge them with master sorted list.
1352 */
flushUnsorted()1353 void BtBranchTracer::flushUnsorted() {
1354 if(unsorted_.empty()) {
1355 return;
1356 }
1357 unsorted_.sort();
1358 unsorted_.reverse();
1359 #ifndef NDEBUG
1360 for(size_t i = 1; i < unsorted_.size(); i++) {
1361 assert_leq(bs_[unsorted_[i].second].score_st_, bs_[unsorted_[i-1].second].score_st_);
1362 }
1363 #endif
1364 EList<size_t> *src2 = sortedSel_ ? &sorted1_ : &sorted2_;
1365 EList<size_t> *dest = sortedSel_ ? &sorted2_ : &sorted1_;
1366 // Merge src1 and src2 into dest
1367 dest->clear();
1368 size_t cur1 = 0, cur2 = cur_;
1369 while(cur1 < unsorted_.size() || cur2 < src2->size()) {
1370 // Take from 1 or 2 next?
1371 bool take1 = true;
1372 if(cur1 == unsorted_.size()) {
1373 take1 = false;
1374 } else if(cur2 == src2->size()) {
1375 take1 = true;
1376 } else {
1377 assert_neq(unsorted_[cur1].second, (*src2)[cur2]);
1378 take1 = bs_[unsorted_[cur1].second] < bs_[(*src2)[cur2]];
1379 }
1380 if(take1) {
1381 dest->push_back(unsorted_[cur1++].second); // Take from list 1
1382 } else {
1383 dest->push_back((*src2)[cur2++]); // Take from list 2
1384 }
1385 }
1386 assert_eq(cur1, unsorted_.size());
1387 assert_eq(cur2, src2->size());
1388 sortedSel_ = !sortedSel_;
1389 cur_ = 0;
1390 unsorted_.clear();
1391 }
1392
1393 /**
1394 * Try all the solutions accumulated so far. Solutions might be rejected
1395 * if they, for instance, overlap a previous solution, have too many Ns,
1396 * fail to overlap a core diagonal, etc.
1397 */
trySolutions(bool lookForOlap,SwResult & res,size_t & off,size_t & nrej,RandomSource & rnd,bool & success)1398 bool BtBranchTracer::trySolutions(
1399 bool lookForOlap,
1400 SwResult& res,
1401 size_t& off,
1402 size_t& nrej,
1403 RandomSource& rnd,
1404 bool& success)
1405 {
1406 if(solutions_.size() > 0) {
1407 for(size_t i = 0; i < solutions_.size(); i++) {
1408 int ret = trySolution(solutions_[i], lookForOlap, res, off, nrej, rnd);
1409 if(ret == BT_FOUND) {
1410 success = true;
1411 return true; // there were solutions and one was good
1412 }
1413 }
1414 solutions_.clear();
1415 success = false;
1416 return true; // there were solutions but none were good
1417 }
1418 return false; // there were no solutions to check
1419 }
1420
1421 /**
1422 * Given the id of a branch that completes a successful backtrace, turn the
1423 * chain of branches into
1424 */
trySolution(size_t id,bool lookForOlap,SwResult & res,size_t & off,size_t & nrej,RandomSource & rnd)1425 int BtBranchTracer::trySolution(
1426 size_t id,
1427 bool lookForOlap,
1428 SwResult& res,
1429 size_t& off,
1430 size_t& nrej,
1431 RandomSource& rnd)
1432 {
1433 AlnScore score;
1434 BtBranch *br = &bs_[id];
1435 // 'br' corresponds to the leftmost edit in a right-to-left
1436 // chain of edits.
1437 EList<Edit>& ned = res.alres.ned();
1438 const BtBranch *cur = br, *prev = NULL;
1439 size_t ns = 0, nrefns = 0;
1440 size_t ngap = 0;
1441 while(true) {
1442 if(cur->e_.inited()) {
1443 if(cur->e_.isMismatch()) {
1444 if(cur->e_.qchr == 'N' || cur->e_.chr == 'N') {
1445 ns++;
1446 }
1447 } else if(cur->e_.isGap()) {
1448 ngap++;
1449 }
1450 if(cur->e_.chr == 'N') {
1451 nrefns++;
1452 }
1453 ned.push_back(cur->e_);
1454 }
1455 if(cur->root_) {
1456 break;
1457 }
1458 cur = &bs_[cur->parentId_];
1459 }
1460 if(ns > prob_.nceil_) {
1461 // Alignment has too many Ns in it!
1462 res.reset();
1463 assert(res.alres.ned().empty());
1464 nrej++;
1465 return BT_REJECTED_N;
1466 }
1467 // Update 'seenPaths_'
1468 cur = br;
1469 bool rejSeen = false; // set =true if we overlap prev path
1470 bool rejCore = true; // set =true if we don't touch core diag
1471 while(true) {
1472 // Consider row, col, len, then do something
1473 int64_t row = cur->row_, col = cur->col_;
1474 assert_lt(row, (int64_t)prob_.qrylen_);
1475 size_t fromend = prob_.qrylen_ - row - 1;
1476 size_t diag = fromend + col;
1477 // Calculate the diagonal within the *trimmed* rectangle,
1478 // i.e. the rectangle we dealt with in align, gather and
1479 // backtrack.
1480 int64_t diagi = col - row;
1481 // Now adjust to the diagonal within the *untrimmed*
1482 // rectangle by adding on the amount trimmed from the left.
1483 diagi += prob_.rect_->triml;
1484 assert_lt(diag, seenPaths_.size());
1485 // Does it overlap a core diagonal?
1486 if(diagi >= 0) {
1487 size_t diag = (size_t)diagi;
1488 if(diag >= prob_.rect_->corel &&
1489 diag <= prob_.rect_->corer)
1490 {
1491 // Yes it does - it's OK
1492 rejCore = false;
1493 }
1494 }
1495 if(lookForOlap) {
1496 int64_t newlo, newhi;
1497 if(cur->len_ == 0) {
1498 if(prev != NULL && prev->len_ > 0) {
1499 // If there's a gap at the base of a non-0 length branch, the
1500 // gap will appear to overlap the branch if we give it length 1.
1501 newhi = newlo = 0;
1502 } else {
1503 // Read or ref gap with no matches coming off of it
1504 newlo = row;
1505 newhi = row + 1;
1506 }
1507 } else {
1508 // Diagonal with matches
1509 newlo = row - (cur->len_ - 1);
1510 newhi = row + 1;
1511 }
1512 assert_geq(newlo, 0);
1513 assert_geq(newhi, 0);
1514 // Does the diagonal cover cells?
1515 if(newhi > newlo) {
1516 // Check whether there is any overlap with previously traversed
1517 // cells
1518 bool added = false;
1519 const size_t sz = seenPaths_[diag].size();
1520 for(size_t i = 0; i < sz; i++) {
1521 // Does the new interval overlap this already-seen
1522 // interval? Also of interest: does it abut this
1523 // already-seen interval? If so, we should merge them.
1524 size_t lo = seenPaths_[diag][i].first;
1525 size_t hi = seenPaths_[diag][i].second;
1526 assert_lt(lo, hi);
1527 size_t lo_sm = newlo, hi_sm = newhi;
1528 if(hi - lo < hi_sm - lo_sm) {
1529 swap(lo, lo_sm);
1530 swap(hi, hi_sm);
1531 }
1532 if((lo <= lo_sm && hi > lo_sm) ||
1533 (lo < hi_sm && hi >= hi_sm))
1534 {
1535 // One or both of the shorter interval's end points
1536 // are contained in the longer interval - so they
1537 // overlap.
1538 rejSeen = true;
1539 // Merge them into one longer interval
1540 seenPaths_[diag][i].first = min(lo, lo_sm);
1541 seenPaths_[diag][i].second = max(hi, hi_sm);
1542 #ifndef NDEBUG
1543 for(int64_t ii = seenPaths_[diag][i].first;
1544 ii < (int64_t)seenPaths_[diag][i].second;
1545 ii++)
1546 {
1547 //cerr << "trySolution rejected (" << ii << ", " << (ii + col - row) << ")" << endl;
1548 }
1549 #endif
1550 added = true;
1551 break;
1552 } else if(hi == lo_sm || lo == hi_sm) {
1553 // Merge them into one longer interval
1554 seenPaths_[diag][i].first = min(lo, lo_sm);
1555 seenPaths_[diag][i].second = max(hi, hi_sm);
1556 #ifndef NDEBUG
1557 for(int64_t ii = seenPaths_[diag][i].first;
1558 ii < (int64_t)seenPaths_[diag][i].second;
1559 ii++)
1560 {
1561 //cerr << "trySolution rejected (" << ii << ", " << (ii + col - row) << ")" << endl;
1562 }
1563 #endif
1564 added = true;
1565 // Keep going in case it overlaps one of the other
1566 // intervals
1567 }
1568 }
1569 if(!added) {
1570 seenPaths_[diag].push_back(make_pair(newlo, newhi));
1571 }
1572 }
1573 }
1574 // After the merging that may have occurred above, it's no
1575 // longer guarnateed that all the overlapping intervals in
1576 // the list have been merged. That's OK though. We'll
1577 // still get correct answers to overlap queries.
1578 if(cur->root_) {
1579 assert_eq(0, cur->parentId_);
1580 break;
1581 }
1582 prev = cur;
1583 cur = &bs_[cur->parentId_];
1584 } // while(cur->e_.inited())
1585 if(rejSeen) {
1586 res.reset();
1587 assert(res.alres.ned().empty());
1588 nrej++;
1589 return BT_NOT_FOUND;
1590 }
1591 if(rejCore) {
1592 res.reset();
1593 assert(res.alres.ned().empty());
1594 nrej++;
1595 return BT_REJECTED_CORE_DIAG;
1596 }
1597 off = br->leftmostCol();
1598 size_t trimBeg = br->uppermostRow();
1599 size_t trimEnd = prob_.qrylen_ - prob_.row_ - 1;
1600 score.score_ = prob_.targ_;
1601 score.basesAligned_ = (int)(prob_.qrylen_ - trimBeg - trimEnd - ned.size());
1602 score.edits_ = (int)ned.size();
1603 score.ns_ = ns;
1604 score.gaps_ = ngap;
1605 res.alres.setScore(score);
1606 res.alres.setRefNs(nrefns);
1607 assert_leq(trimBeg, prob_.qrylen_);
1608 assert_leq(trimEnd, prob_.qrylen_);
1609 TRefOff refoff = off + prob_.refoff_ + prob_.rect_->refl;
1610 res.alres.setShape(
1611 prob_.refid_, // ref id
1612 refoff, // 0-based ref offset
1613 prob_.treflen(), // ref length
1614 prob_.fw_, // aligned to Watson?
1615 prob_.qrylen_, // read length
1616 true, // pretrim soft?
1617 0, // pretrim 5' end
1618 0, // pretrim 3' end
1619 true, // alignment trim soft?
1620 prob_.fw_ ? trimBeg : trimEnd, // alignment trim 5' end
1621 prob_.fw_ ? trimEnd : trimBeg); // alignment trim 3' end
1622 return BT_FOUND;
1623 }
1624
1625 /**
1626 * Get the next valid alignment given a backtrace problem. Return false
1627 * if there is no valid solution. Use a backtracking search to find the
1628 * solution. This can be very slow.
1629 */
nextAlignmentBacktrace(size_t maxiter,SwResult & res,size_t & off,size_t & nrej,size_t & niter,RandomSource & rnd)1630 bool BtBranchTracer::nextAlignmentBacktrace(
1631 size_t maxiter,
1632 SwResult& res,
1633 size_t& off,
1634 size_t& nrej,
1635 size_t& niter,
1636 RandomSource& rnd)
1637 {
1638 assert(!empty() || !emptySolution());
1639 assert(prob_.inited());
1640 // There's a subtle case where we might fail to backtracing in
1641 // local-alignment mode. The basic fact to remember is that when we're
1642 // backtracing from the highest-scoring cell in the table, we're guaranteed
1643 // to be able to backtrace without ever dipping below 0. But if we're
1644 // backtracing from a cell other than the highest-scoring cell in the
1645 // table, we might dip below 0. Dipping below 0 implies that there's a
1646 // shorted local alignment with a better score. In which case, it's
1647 // perfectly fair for us to abandon any path that dips below the floor, and
1648 // this might result in the queue becoming empty before we finish.
1649 bool result = false;
1650 niter = 0;
1651 while(!empty()) {
1652 if(trySolutions(true, res, off, nrej, rnd, result)) {
1653 return result;
1654 }
1655 if(niter++ >= maxiter) {
1656 break;
1657 }
1658 size_t brid = best(rnd); // put best branch in 'br'
1659 assert(!seen_.contains(brid));
1660 ASSERT_ONLY(seen_.insert(brid));
1661 #if 0
1662 BtBranch *br = &bs_[brid];
1663 cerr << brid
1664 << ": targ:" << prob_.targ_
1665 << ", sc:" << br->score_st_
1666 << ", row:" << br->uppermostRow()
1667 << ", nmm:" << nmm_
1668 << ", nnmm:" << nnmm_
1669 << ", nrdop:" << nrdop_
1670 << ", nrfop:" << nrfop_
1671 << ", nrdex:" << nrdex_
1672 << ", nrfex:" << nrfex_
1673 << ", nrdop_pr: " << nrdopPrune_
1674 << ", nrfop_pr: " << nrfopPrune_
1675 << ", nrdex_pr: " << nrdexPrune_
1676 << ", nrfex_pr: " << nrfexPrune_
1677 << endl;
1678 #endif
1679 addOffshoots(brid);
1680 }
1681 if(trySolutions(true, res, off, nrej, rnd, result)) {
1682 return result;
1683 }
1684 return false;
1685 }
1686
1687 /**
1688 * Get the next valid alignment given a backtrace problem. Return false
1689 * if there is no valid solution. Use a triangle-fill backtrace to find
1690 * the solution. This is usually fast (it's O(m + n)).
1691 */
nextAlignmentFill(size_t maxiter,SwResult & res,size_t & off,size_t & nrej,size_t & niter,RandomSource & rnd)1692 bool BtBranchTracer::nextAlignmentFill(
1693 size_t maxiter,
1694 SwResult& res,
1695 size_t& off,
1696 size_t& nrej,
1697 size_t& niter,
1698 RandomSource& rnd)
1699 {
1700 assert(prob_.inited());
1701 assert(!emptySolution());
1702 bool result = false;
1703 if(trySolutions(false, res, off, nrej, rnd, result)) {
1704 return result;
1705 }
1706 return false;
1707 }
1708
1709 /**
1710 * Get the next valid alignment given the backtrace problem. Return false
1711 * if there is no valid solution, e.g., if
1712 */
nextAlignment(size_t maxiter,SwResult & res,size_t & off,size_t & nrej,size_t & niter,RandomSource & rnd)1713 bool BtBranchTracer::nextAlignment(
1714 size_t maxiter,
1715 SwResult& res,
1716 size_t& off,
1717 size_t& nrej,
1718 size_t& niter,
1719 RandomSource& rnd)
1720 {
1721 if(prob_.fill_) {
1722 return nextAlignmentFill(
1723 maxiter,
1724 res,
1725 off,
1726 nrej,
1727 niter,
1728 rnd);
1729 } else {
1730 return nextAlignmentBacktrace(
1731 maxiter,
1732 res,
1733 off,
1734 nrej,
1735 niter,
1736 rnd);
1737 }
1738 }
1739
1740 #ifdef MAIN_ALIGNER_BT
1741
1742 #include <iostream>
1743
main(int argc,char ** argv)1744 int main(int argc, char **argv) {
1745 size_t off = 0;
1746 RandomSource rnd(77);
1747 BtBranchTracer tr;
1748 Scoring sc = Scoring::base1();
1749 SwResult res;
1750 tr.init(
1751 "ACGTACGT", // in: read sequence
1752 "IIIIIIII", // in: quality sequence
1753 8, // in: read sequence length
1754 "ACGTACGT", // in: reference sequence
1755 8, // in: reference sequence length
1756 0, // in: reference id
1757 0, // in: reference offset
1758 true, // in: orientation
1759 sc, // in: scoring scheme
1760 0, // in: N ceiling
1761 8, // in: alignment score
1762 7, // start in this row
1763 7, // start in this column
1764 rnd); // random gen, to choose among equal paths
1765 size_t nrej = 0;
1766 tr.nextAlignment(
1767 res,
1768 off,
1769 nrej,
1770 rnd);
1771 }
1772
1773 #endif /*def MAIN_ALIGNER_BT*/
1774