1 /*
2  * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3  *
4  * This file is part of Bowtie 2.
5  *
6  * Bowtie 2 is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Bowtie 2 is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <limits>
21 #include <ctype.h>
22 #include "aligner_seed2.h"
23 #include "assert_helpers.h"
24 #include "bt2_idx.h"
25 
26 /**
27  * Drive the process of descending from all search roots.
28  */
go(const Scoring & sc,const Ebwt & ebwtFw,const Ebwt & ebwtBw,DescentMetrics & met,PerReadMetrics & prm)29 void DescentDriver::go(
30     const Scoring& sc,    // scoring scheme
31     const Ebwt& ebwtFw,   // forward index
32     const Ebwt& ebwtBw,   // mirror index
33     DescentMetrics& met,  // metrics
34 	PerReadMetrics& prm)  // per-read metrics
35 {
36 	assert(q_.repOk());
37     // Convert DescentRoots to the initial Descents
38     for(size_t i = 0; i < roots_.size(); i++) {
39         size_t dfsz = df_.size();
40         size_t pfsz = pf_.size();
41         TDescentId id = df_.alloc();
42         Edit e_null;
43         assert(!e_null.inited());
44         bool succ = df_[id].init(
45             q_,        // read
46             i,         // root and conf id
47             sc,        // scoring scheme
48 			minsc_,    // minimum score
49 			maxpen_,   // maximum penalty
50             id,        // new Descent's id
51             ebwtFw,    // forward index
52             ebwtBw,    // mirror index
53 			re_,       // redundancy checker
54             df_,       // Descent factory
55             pf_,       // DescentPos factory
56             roots_,    // DescentRoots
57             confs_,    // DescentConfs
58             heap_,     // heap
59             alsink_,   // alignment sink
60             met,       // metrics
61 			prm);      // per-read metrics
62 		if(veryVerbose_) {
63 			bool fw = roots_[i].fw;
64 			tmpedit_.clear();
65 			df_[id].print(
66 				&cerr,
67 				"",
68 				q_,
69 				0,
70 				0,
71 				fw,
72 				tmpedit_,
73 				0,
74 				tmpedit_.size(),
75 				tmprfdnastr_);
76 		}
77         if(!succ) {
78             // Reclaim memory we had used for this descent and its DescentPos info
79             df_.resize(dfsz);
80             pf_.resize(pfsz);
81         }
82     }
83     // Advance until some stopping condition
84     bool stop = heap_.empty();
85     while(!stop) {
86 		// Pop off the highest-priority descent.  Note that some outgoing edges
87 		// might have since been explored, which could reduce the priority of
88 		// the descent once we .
89         TDescentPair p = heap_.pop();
90 		df_.alloc(); df_.pop();
91         df_[p.second].followBestOutgoing(
92             q_,        // read
93             ebwtFw,    // index over text
94             ebwtBw,    // index over reverse text
95             sc,        // scoring scheme
96 			minsc_,    // minimum score
97 			maxpen_,   // maximum penalty
98 			re_,       // redundancy checker
99             df_,       // Descent factory
100             pf_,       // DescentPos factory
101             roots_,    //
102             confs_,    //
103             heap_,     // priority queue for Descents
104             alsink_,   // alignment sink
105             met,       // metrics
106 			prm);      // per-read metrics
107         stop = heap_.empty();
108     }
109 }
110 
111 /**
112  * Perform seed alignment until some stopping condition is satisfied.
113  */
advance(const DescentStoppingConditions & stopc,const Scoring & sc,const Ebwt & ebwtFw,const Ebwt & ebwtBw,DescentMetrics & met,PerReadMetrics & prm)114 int DescentDriver::advance(
115 	const DescentStoppingConditions& stopc, // stopping conditions
116     const Scoring& sc,    // scoring scheme
117     const Ebwt& ebwtFw,   // forward index
118     const Ebwt& ebwtBw,   // mirror index
119     DescentMetrics& met,  // metrics
120 	PerReadMetrics& prm)  // per-read metrics
121 {
122 	size_t nbwop_i = met.bwops;
123 	while(rootsInited_ < roots_.size()) {
124 		size_t dfsz = df_.size();
125 		size_t pfsz = pf_.size();
126 		TDescentId id = df_.alloc();
127 		Edit e_null;
128 		assert(!e_null.inited());
129         bool succ = df_[id].init(
130             q_,        // query
131             rootsInited_, // root and conf id
132             sc,        // scoring scheme
133 			minsc_,    // minimum score
134 			maxpen_,   // maximum penalty
135             id,        // new Descent's id
136             ebwtFw,    // forward index
137             ebwtBw,    // mirror index
138 			re_,       // redundancy checker
139             df_,       // Descent factory
140             pf_,       // DescentPos factory
141             roots_,    // DescentRoots
142             confs_,    // DescentConfs
143             heap_,     // heap
144             alsink_,   // alignment sink
145             met,       // metrics
146 			prm);      // per-read metrics
147         if(!succ) {
148             // Reclaim memory we had used for this descent and its DescentPos info
149             df_.resize(dfsz);
150             pf_.resize(pfsz);
151         }
152 		rootsInited_++;
153 		TAlScore best = std::numeric_limits<TAlScore>::max();
154 		if(!heap_.empty()) {
155 			best = heap_.top().first.pen;
156 		}
157 		if(stopc.nfound > 0 && alsink_.nelt() > stopc.nfound) {
158 			return DESCENT_DRIVER_ALN;
159 		}
160 		if(alsink_.stratumDone(best)) {
161 			return DESCENT_DRIVER_STRATA;
162 		}
163 		if(stopc.nbwop > 0 && (met.bwops - nbwop_i) > stopc.nbwop) {
164 			return DESCENT_DRIVER_BWOPS;
165 		}
166 		if(stopc.totsz > 0 && totalSizeBytes() > stopc.totsz) {
167 			return DESCENT_DRIVER_MEM;
168 		}
169     }
170     // Advance until some stopping condition
171     bool stop = heap_.empty();
172     while(!stop) {
173 		// Pop off the highest-priority descent.
174         TDescentPair p = heap_.pop();
175 		df_.alloc(); df_.pop(); // Create new descent
176         df_[p.second].followBestOutgoing(
177             q_,        // read
178             ebwtFw,    // forward index
179             ebwtBw,    // backward index
180             sc,        // scoring scheme
181 			minsc_,    // minimum score
182 			maxpen_,   // maximum penalty
183 			re_,       // redundancy checker
184             df_,       // Descent factory
185             pf_,       // DescentPos factory
186             roots_,    // search roots
187             confs_,    // search root configurations
188             heap_,     // descent heap
189             alsink_,   // alignment sink
190             met,       // metrics
191 			prm);      // per-read metrics
192 		TAlScore best = std::numeric_limits<TAlScore>::max();
193 		if(!heap_.empty()) {
194 			best = heap_.top().first.pen;
195 		}
196 		if(stopc.nfound > 0 && alsink_.nelt() > stopc.nfound) {
197 			return DESCENT_DRIVER_ALN;
198 		}
199 		if(alsink_.stratumDone(best)) {
200 			return DESCENT_DRIVER_STRATA;
201 		}
202 		if(stopc.nbwop > 0 && (met.bwops - nbwop_i) > stopc.nbwop) {
203 			return DESCENT_DRIVER_BWOPS;
204 		}
205 		if(stopc.totsz > 0 && totalSizeBytes() > stopc.totsz) {
206 			return DESCENT_DRIVER_MEM;
207 		}
208         stop = heap_.empty();
209     }
210 	return DESCENT_DRIVER_DONE;
211 }
212 
213 /**
214  * If this is the final descent in a complete end-to-end alignment, report
215  * the alignment.
216  */
reportAlignment(const Read & q,const Ebwt & ebwtFw,const Ebwt & ebwtBw,TIndexOffU topf,TIndexOffU botf,TIndexOffU topb,TIndexOffU botb,TDescentId id,TRootId rid,const Edit & e,TScore pen,EFactory<Descent> & df,EFactory<DescentPos> & pf,const EList<DescentRoot> & rs,const EList<DescentConfig> & cs)217 bool DescentAlignmentSink::reportAlignment(
218 	const Read& q,                  // query string
219 	const Ebwt& ebwtFw,             // forward index
220 	const Ebwt& ebwtBw,             // mirror index
221 	TIndexOffU topf,                 // SA range top in forward index
222 	TIndexOffU botf,                 // SA range bottom in forward index
223 	TIndexOffU topb,                 // SA range top in backward index
224 	TIndexOffU botb,                 // SA range bottom in backward index
225 	TDescentId id,                  // id of leaf Descent
226 	TRootId rid,                    // id of search root
227 	const Edit& e,                  // final edit, if needed
228 	TScore pen,                     // total penalty
229 	EFactory<Descent>& df,          // factory with Descent
230 	EFactory<DescentPos>& pf,       // factory with DescentPoss
231 	const EList<DescentRoot>& rs,   // roots
232 	const EList<DescentConfig>& cs) // configs
233 {
234 	TDescentId cur = id;
235 	ASSERT_ONLY(const Descent& desc = df[id]);
236 	const bool fw = rs[rid].fw;
237 	ASSERT_ONLY(size_t len = q.length());
238 	assert(q.repOk());
239 	assert_lt(desc.al5pf(), len);
240 	// Adjust al5pi and al5pf to take the final edit into account (if
241 	// there is one)
242 	// Check if this is redundant with a previous reported alignment
243 	Triple<TIndexOffU, TIndexOffU, size_t> lhs(topf, botf, 0);
244 	Triple<TIndexOffU, TIndexOffU, size_t> rhs(topb, botb, q.length()-1);
245 	if(!lhs_.insert(lhs)) {
246 		rhs_.insert(rhs);
247 		return false; // Already there
248 	}
249 	if(!rhs_.insert(rhs)) {
250 		return false; // Already there
251 	}
252 	size_t ei = edits_.size();
253 	df[cur].collectEdits(edits_, &e, df);
254 	size_t en = edits_.size() - ei;
255 #ifndef NDEBUG
256 	{
257 		for(size_t i = 1; i < en; i++) {
258 			assert_geq(edits_[ei+i].pos, edits_[ei+i-1].pos);
259 		}
260 		// Now figure out how much we refrained from aligning on either
261 		// side.
262 		size_t trimLf = 0;
263 		size_t trimRg = 0;
264 		BTDnaString& rf = tmprfdnastr_;
265 		rf.clear();
266 		if(!fw) {
267 			// Edit offsets are w/r/t 5' end, but desc.print wants them w/r/t
268 			// the *left* end of the read sequence that aligned
269 			Edit::invertPoss(edits_, len, ei, en, true);
270 		}
271 		desc.print(NULL, "", q, trimLf, trimRg, fw, edits_, ei, en, rf);
272 		if(!fw) {
273 			// Invert them back to how they were before
274 			Edit::invertPoss(edits_, len, ei, en, true);
275 		}
276 		ASSERT_ONLY(TIndexOffU toptmp = 0);
277 		ASSERT_ONLY(TIndexOffU bottmp = 0);
278 		// Check that the edited string occurs in the reference
279 		if(!ebwtFw.contains(rf, &toptmp, &bottmp)) {
280 			std::cerr << rf << std::endl;
281 			assert(false);
282 		}
283 	}
284 #endif
285 	als_.expand();
286 	als_.back().init(pen, fw, topf, botf, ei, en);
287 	nelt_ += (botf - topf);
288 	if(bestPen_ == std::numeric_limits<TAlScore>::max() || pen < bestPen_) {
289 		bestPen_ = pen;
290 	}
291 	if(worstPen_ == std::numeric_limits<TAlScore>::max() || pen > worstPen_) {
292 		worstPen_ = pen;
293 	}
294 	return true;
295 }
296 
297 /**
298  * Initialize a new descent branching from the given descent via the given
299  * edit.  Return false if the Descent has no outgoing edges (and can
300  * therefore have its memory freed), true otherwise.
301  */
init(const Read & q,TRootId rid,const Scoring & sc,TAlScore minsc,TAlScore maxpen,TReadOff al5pi,TReadOff al5pf,TIndexOffU topf,TIndexOffU botf,TIndexOffU topb,TIndexOffU botb,bool l2r,size_t descid,TDescentId parent,TScore pen,const Edit & e,const Ebwt & ebwtFw,const Ebwt & ebwtBw,DescentRedundancyChecker & re,EFactory<Descent> & df,EFactory<DescentPos> & pf,const EList<DescentRoot> & rs,const EList<DescentConfig> & cs,EHeap<TDescentPair> & heap,DescentAlignmentSink & alsink,DescentMetrics & met,PerReadMetrics & prm)302 bool Descent::init(
303     const Read& q,                  // query
304     TRootId rid,                    // root id
305     const Scoring& sc,              // scoring scheme
306 	TAlScore minsc,                 // minimum score
307 	TAlScore maxpen,                // maximum penalty
308     TReadOff al5pi,                 // offset from 5' of 1st aligned char
309     TReadOff al5pf,                 // offset from 5' of last aligned char
310     TIndexOffU topf,                 // SA range top in FW index
311     TIndexOffU botf,                 // SA range bottom in FW index
312     TIndexOffU topb,                 // SA range top in BW index
313     TIndexOffU botb,                 // SA range bottom in BW index
314     bool l2r,                       // direction this descent will go in
315     size_t descid,                  // my ID
316     TDescentId parent,              // parent ID
317     TScore pen,                     // total penalties so far
318     const Edit& e,                  // edit for incoming edge
319     const Ebwt& ebwtFw,             // forward index
320     const Ebwt& ebwtBw,             // mirror index
321 	DescentRedundancyChecker& re,   // redundancy checker
322     EFactory<Descent>& df,          // Descent factory
323     EFactory<DescentPos>& pf,       // DescentPos factory
324     const EList<DescentRoot>& rs,   // roots
325     const EList<DescentConfig>& cs, // configs
326     EHeap<TDescentPair>& heap,      // heap
327     DescentAlignmentSink& alsink,   // alignment sink
328     DescentMetrics& met,            // metrics
329 	PerReadMetrics& prm)            // per-read metrics
330 {
331 	assert(q.repOk());
332     rid_ = rid;
333     al5pi_ = al5pi;
334     al5pf_ = al5pf;
335     l2r_ = l2r;
336     topf_ = topf;
337     botf_ = botf;
338     topb_ = topb;
339     botb_ = botb;
340     descid_ = descid;
341     parent_ = parent;
342     pen_ = pen;
343     posid_ = std::numeric_limits<size_t>::max();
344     len_ = 0;
345     out_.clear();
346     edit_ = e;
347     lastRecalc_ = true;
348 	gapadd_ = df[parent].gapadd_;
349 	if(e.inited()) {
350 		if(e.isReadGap()) {
351 			gapadd_++;
352 		} else if(e.isRefGap()) {
353 			gapadd_--;
354 		}
355 	}
356     bool branches = false, hitEnd = false, done = false;
357     TIndexOffU topf_new = 0, botf_new = 0, topb_new = 0, botb_new = 0;
358     off5p_i_ = 0;
359 #ifndef NDEBUG
360     size_t depth = al5pf_ - al5pi_ + 1;
361     TAlScore maxpend = cs[rid_].cons.get(depth, q.length(), maxpen);
362     assert_geq(maxpend, pen_);    // can't have already exceeded max penalty
363 #endif
364     bool matchSucc = followMatches(
365         q,
366 		sc,
367         ebwtFw,
368         ebwtBw,
369 		re,
370         df,
371         pf,
372         rs,
373         cs,
374         heap,
375         alsink,
376         met,
377 		prm,
378         branches,
379         hitEnd,
380         done,
381         off5p_i_,
382         topf_new,
383         botf_new,
384         topb_new,
385         botb_new);
386     bool bounceSucc = false;
387     if(matchSucc && hitEnd && !done) {
388 		assert(topf_new > 0 || botf_new > 0);
389         bounceSucc = bounce(
390             q,
391             topf_new,
392             botf_new,
393             topb_new,
394             botb_new,
395             ebwtFw,
396             ebwtBw,
397             sc,
398 			minsc,    // minimum score
399 			maxpen,   // maximum penalty
400 			re,
401             df,
402             pf,
403             rs,
404             cs,
405             heap,
406             alsink,
407             met,      // descent metrics
408 			prm);     // per-read metrics
409     }
410 	if(matchSucc) {
411 		// Calculate info about outgoing edges
412 		recalcOutgoing(q, sc, minsc, maxpen, re, pf, rs, cs, prm);
413 		if(!empty()) {
414 			heap.insert(make_pair(out_.bestPri(), descid)); // Add to heap
415 		}
416 	}
417     return !empty() || bounceSucc;
418 }
419 
420 /**
421  * Initialize a new descent beginning at the given root.  Return false if
422  * the Descent has no outgoing edges (and can therefore have its memory
423  * freed), true otherwise.
424  */
init(const Read & q,TRootId rid,const Scoring & sc,TAlScore minsc,TAlScore maxpen,size_t descid,const Ebwt & ebwtFw,const Ebwt & ebwtBw,DescentRedundancyChecker & re,EFactory<Descent> & df,EFactory<DescentPos> & pf,const EList<DescentRoot> & rs,const EList<DescentConfig> & cs,EHeap<TDescentPair> & heap,DescentAlignmentSink & alsink,DescentMetrics & met,PerReadMetrics & prm)425 bool Descent::init(
426     const Read& q,                  // query
427     TRootId rid,                    // root id
428     const Scoring& sc,              // scoring scheme
429 	TAlScore minsc,                 // minimum score
430 	TAlScore maxpen,                // maximum penalty
431     size_t descid,                  // id of this Descent
432     const Ebwt& ebwtFw,             // forward index
433     const Ebwt& ebwtBw,             // mirror index
434 	DescentRedundancyChecker& re,   // redundancy checker
435     EFactory<Descent>& df,          // Descent factory
436     EFactory<DescentPos>& pf,       // DescentPos factory
437     const EList<DescentRoot>& rs,   // roots
438     const EList<DescentConfig>& cs, // configs
439     EHeap<TDescentPair>& heap,      // heap
440     DescentAlignmentSink& alsink,   // alignment sink
441     DescentMetrics& met,            // metrics
442 	PerReadMetrics& prm)            // per-read metrics
443 {
444     rid_ = rid;
445     al5pi_ = rs[rid].off5p;
446     al5pf_ = rs[rid].off5p;
447 	assert_lt(al5pi_, q.length());
448 	assert_lt(al5pf_, q.length());
449     l2r_ = rs[rid].l2r;
450     topf_ = botf_ = topb_ = botb_ = 0;
451     descid_ = descid;
452     parent_ = std::numeric_limits<size_t>::max();
453     pen_ = 0;
454     posid_ = std::numeric_limits<size_t>::max();
455     len_ = 0;
456     out_.clear();
457     edit_.reset();
458     lastRecalc_ = true;
459 	gapadd_ = 0;
460     bool branches = false, hitEnd = false, done = false;
461     TIndexOffU topf_new = 0, botf_new = 0, topb_new = 0, botb_new = 0;
462     off5p_i_ = 0;
463     bool matchSucc = followMatches(
464         q,
465 		sc,
466         ebwtFw,
467         ebwtBw,
468 		re,
469         df,
470         pf,
471         rs,
472         cs,
473         heap,
474         alsink,
475         met,
476 		prm,
477         branches,
478         hitEnd,
479         done,
480         off5p_i_,
481         topf_new,
482         botf_new,
483         topb_new,
484         botb_new);
485     bool bounceSucc = false;
486     if(matchSucc && hitEnd && !done) {
487 		assert(topf_new > 0 || botf_new > 0);
488         bounceSucc = bounce(
489             q,
490             topf_new,
491             botf_new,
492             topb_new,
493             botb_new,
494             ebwtFw,
495             ebwtBw,
496             sc,
497 			minsc,    // minimum score
498 			maxpen,   // maximum penalty
499 			re,
500             df,
501             pf,
502             rs,
503             cs,
504             heap,
505             alsink,
506             met,      // descent metrics
507 			prm);     // per-read metrics
508     }
509     // Calculate info about outgoing edges
510     assert(empty());
511 	if(matchSucc) {
512 		recalcOutgoing(q, sc, minsc, maxpen, re, pf, rs, cs, prm);
513 		if(!empty()) {
514 			heap.insert(make_pair(out_.bestPri(), descid)); // Add to heap
515 		}
516 	}
517     return !empty() || bounceSucc;
518 }
519 
520 /**
521  * Recalculate our summary of the outgoing edges from this descent.  When
522  * deciding what outgoing edges are legal, we abide by constraints.
523  * Typically, they limit the total of the penalties accumulated so far, as
524  * a function of distance from the search root.  E.g. a constraint might
525  * disallow any gaps or mismatches within 20 ply of the search root, then
526  * allow 1 mismatch within 30 ply, then allow up to 1 mismatch or 1 gap
527  * within 40 ply, etc.
528  *
529  * Return the total number of valid outgoing edges found.
530  *
531  * TODO: Eliminate outgoing gap edges that are redundant with others owing to
532  *       the DNA sequence and the fact that we don't care to distinguish among
533  *       "equivalent" homopolymer extensinos and retractions.
534  */
recalcOutgoing(const Read & q,const Scoring & sc,TAlScore minsc,TAlScore maxpen,DescentRedundancyChecker & re,EFactory<DescentPos> & pf,const EList<DescentRoot> & rs,const EList<DescentConfig> & cs,PerReadMetrics & prm)535 size_t Descent::recalcOutgoing(
536     const Read& q,                   // query string
537     const Scoring& sc,               // scoring scheme
538 	TAlScore minsc,                  // minimum score
539 	TAlScore maxpen,                 // maximum penalty
540 	DescentRedundancyChecker& re,    // redundancy checker
541     EFactory<DescentPos>& pf,        // factory with DescentPoss
542     const EList<DescentRoot>& rs,    // roots
543     const EList<DescentConfig>& cs,  // configs
544 	PerReadMetrics& prm)             // per-read metrics
545 {
546     assert_eq(botf_ - topf_, botb_ - topb_);
547 	assert(out_.empty());
548 	assert(repOk(&q));
549 	// Get initial 5' and 3' offsets
550     bool fw = rs[rid_].fw;
551     float rootpri = rs[rid_].pri;
552 	bool toward3p = (l2r_ == fw);
553 	size_t off5p = off5p_i_;
554 	assert_geq(al5pf_, al5pi_);
555 	size_t off3p = q.length() - off5p - 1;
556 	// By "depth" we essentially mean the number of characters already aligned
557 	size_t depth, extrai = 0, extraf = 0;
558 	size_t cur5pi = al5pi_, cur5pf = al5pf_;
559     if(toward3p) {
560 		// Toward 3'
561 		cur5pf = off5p;
562         depth = off5p - al5pi_;
563 		// Failed to match out to the end?
564 		if(al5pf_ < q.length() - 1) {
565 			extraf = 1; // extra
566 		}
567     } else {
568 		// Toward 5'
569 		cur5pi = off5p;
570         depth = al5pf_ - off5p;
571 		if(al5pi_ > 0) {
572 			extrai = 1;
573 		}
574     }
575 	// Get gap penalties
576 	TScore pen_rdg_ex = sc.readGapExtend(), pen_rfg_ex = sc.refGapExtend();
577 	TScore pen_rdg_op = sc.readGapOpen(),   pen_rfg_op = sc.refGapOpen();
578 	// Top and bot in the direction of the descent
579 	TIndexOffU top  = l2r_ ? topb_ : topf_;
580 	TIndexOffU bot  = l2r_ ? botb_ : botf_;
581 	// Top and bot in the opposite direction
582 	TIndexOffU topp = l2r_ ? topf_ : topb_;
583 	TIndexOffU botp = l2r_ ? botf_ : botb_;
584 	assert_eq(botp - topp, bot - top);
585 	DescentEdge edge;
586 	size_t nout = 0;
587 	// Enumerate all outgoing edges, starting at the root and going out
588     size_t d = posid_;
589 	// At first glance, we might think we should be bounded by al5pi_ and
590 	// al5pf_, but those delimit the positions that matched between reference
591 	// and read.  If we hit a position that failed to match as part of
592 	// followMatches, then we also want to evaluate ways of leaving that
593 	// position, which adds one more position to viist.
594 	while(off5p >= al5pi_ - extrai && off5p <= al5pf_ + extraf) {
595         assert_lt(off5p, q.length());
596         assert_lt(off3p, q.length());
597 		TScore maxpend = cs[rid_].cons.get(depth, q.length(), maxpen);
598 		assert(depth > 0 || maxpend == 0);
599 		assert_geq(maxpend, pen_);    // can't have already exceeded max penalty
600 		TScore diff = maxpend - pen_; // room we have left
601 		// Get pointer to SA ranges in the direction of descent
602 		const TIndexOffU *t  = l2r_ ? pf[d].topb : pf[d].topf;
603 		const TIndexOffU *b  = l2r_ ? pf[d].botb : pf[d].botf;
604 		const TIndexOffU *tp = l2r_ ? pf[d].topf : pf[d].topb;
605 		const TIndexOffU *bp = l2r_ ? pf[d].botf : pf[d].botb;
606 		assert_eq(pf[d].botf - pf[d].topf, pf[d].botb - pf[d].topb);
607 		// What are the read char / quality?
608 		std::pair<int, int> p = q.get(off5p, fw);
609 		int c = p.first;
610 		assert_range(0, 4, c);
611 		// Only entertain edits if there is at least one type of edit left and
612 		// there is some penalty budget left
613 		if(!pf[d].flags.exhausted() && diff > 0) {
614 			// What would the penalty be if we mismatched at this position?
615 			// This includes the case where the mismatch is for an N in the
616 			// read.
617 			int qq = p.second;
618             assert_geq(qq, 0);
619 			TScore pen_mm = sc.mm(c, qq);
620 			if(pen_mm <= diff) {
621 				for(int j = 0; j < 4; j++) {
622 					if(j == c) continue; // Match, not mismatch
623 					if(b[j] <= t[j]) {
624 						continue; // No outgoing edge with this nucleotide
625 					}
626 					if(!pf[d].flags.mmExplore(j)) {
627 						continue; // Already been explored
628 					}
629 					TIndexOffU topf = pf[d].topf[j], botf = pf[d].botf[j];
630 					ASSERT_ONLY(TIndexOffU topb = pf[d].topb[j], botb = pf[d].botb[j]);
631 					if(re.contains(fw, l2r_, cur5pi, cur5pf, cur5pf - cur5pi + 1 + gapadd_, topf, botf, pen_ + pen_mm)) {
632 						prm.nRedSkip++;
633 						continue; // Redundant with a path already explored
634 					}
635 					prm.nRedFail++;
636 					TIndexOffU width = b[j] - t[j];
637 					Edit edit((uint32_t)off5p, (int)("ACGTN"[j]), (int)("ACGTN"[c]), EDIT_TYPE_MM);
638 					DescentPriority pri(pen_ + pen_mm, depth, width, rootpri);
639                     assert(topf != 0 || botf != 0);
640                     assert(topb != 0 || botb != 0);
641 					assert_eq(botb - topb, botf - topf);
642 					edge.init(edit, off5p, pri, d
643 #ifndef NDEBUG
644                     , d, topf, botf, topb, botb
645 #endif
646                     );
647 					out_.update(edge);
648 					nout++;
649 				}
650 			}
651 			bool gapsAllowed = (off5p >= (size_t)sc.gapbar && off3p >= (size_t)sc.gapbar);
652 			if(gapsAllowed) {
653 				assert_gt(depth, 0);
654 				// An easy redundancy check is: if all ways of proceeding are
655 				// matches, then there's no need to entertain gaps here.
656 				// Shifting the gap one position further downstream is
657 				// guarnteed not to be worse.
658 				size_t totwidth = (b[0] - t[0]) +
659 				                  (b[1] - t[1]) +
660 								  (b[2] - t[2]) +
661 								  (b[3] - t[3]);
662 				assert(c > 3 || b[c] - t[c] <= totwidth);
663 				bool allmatch = c < 4 && (totwidth == (b[c] - t[c]));
664 				bool rdex = false, rfex = false;
665 				size_t cur5pi_i = cur5pi, cur5pf_i = cur5pf;
666 				if(toward3p) {
667 					cur5pf_i--;
668 				} else {
669 					cur5pi_i++;
670 				}
671 				if(off5p == off5p_i_ && edit_.inited()) {
672 					// If we're at the root of the descent, and the descent
673 					// branched on a gap, then this could be scored as an
674 					// extension of that gap.
675 					if(pen_rdg_ex <= diff && edit_.isReadGap()) {
676 						// Extension of a read gap
677 						rdex = true;
678 						for(int j = 0; j < 4; j++) {
679 							if(b[j] <= t[j]) {
680 								continue; // No outgoing edge with this nucleotide
681 							}
682 							if(!pf[d].flags.rdgExplore(j)) {
683 								continue; // Already been explored
684 							}
685 							TIndexOffU topf = pf[d].topf[j], botf = pf[d].botf[j];
686 							ASSERT_ONLY(TIndexOffU topb = pf[d].topb[j], botb = pf[d].botb[j]);
687 							assert(topf != 0 || botf != 0);
688 							assert(topb != 0 || botb != 0);
689 							if(re.contains(fw, l2r_, cur5pi_i, cur5pf_i, cur5pf - cur5pi + 1 + gapadd_, topf, botf, pen_ + pen_rdg_ex)) {
690 								prm.nRedSkip++;
691 								continue; // Redundant with a path already explored
692 							}
693 							prm.nRedFail++;
694 							TIndexOffU width = b[j] - t[j];
695 							// off5p holds the offset from the 5' of the next
696 							// character we were trying to align when we decided to
697 							// introduce a read gap (before that character).  If we
698 							// were walking toward the 5' end, we need to increment
699 							// by 1.
700 							uint32_t off = (uint32_t)off5p + (toward3p ? 0 : 1);
701 							Edit edit(off, (int)("ACGTN"[j]), '-', EDIT_TYPE_READ_GAP);
702 							assert(edit.pos2 != std::numeric_limits<uint32_t>::max());
703 							edit.pos2 = edit_.pos2 + (toward3p ? 1 : -1);
704 							DescentPriority pri(pen_ + pen_rdg_ex, depth, width, rootpri);
705                             assert(topf != 0 || botf != 0);
706                             assert(topb != 0 || botb != 0);
707 							assert_eq(botb - topb, botf - topf);
708 							edge.init(edit, off5p, pri, d
709 #ifndef NDEBUG
710                             , d,
711                             topf, botf, topb, botb
712 #endif
713                             );
714 							out_.update(edge);
715 							nout++;
716 						}
717 					}
718 					if(pen_rfg_ex <= diff && edit_.isRefGap()) {
719 						// Extension of a reference gap
720 						rfex = true;
721 						if(pf[d].flags.rfgExplore()) {
722                             TIndexOffU topf = l2r_ ? topp : top;
723                             TIndexOffU botf = l2r_ ? botp : bot;
724 							ASSERT_ONLY(TIndexOffU topb = l2r_ ? top : topp);
725 							ASSERT_ONLY(TIndexOffU botb = l2r_ ? bot : botp);
726 							assert(topf != 0 || botf != 0);
727 							assert(topb != 0 || botb != 0);
728 							size_t nrefal = cur5pf - cur5pi + gapadd_;
729 							if(!re.contains(fw, l2r_, cur5pi, cur5pf, nrefal, topf, botf, pen_ + pen_rfg_ex)) {
730 								TIndexOffU width = bot - top;
731 								Edit edit((uint32_t)off5p, '-', (int)("ACGTN"[c]), EDIT_TYPE_REF_GAP);
732 								DescentPriority pri(pen_ + pen_rfg_ex, depth, width, rootpri);
733 								assert(topf != 0 || botf != 0);
734 								assert(topb != 0 || botb != 0);
735 								edge.init(edit, off5p, pri, d
736 #ifndef NDEBUG
737 								// It's a little unclear what the depth ought to be.
738 								// Is it the depth we were at when we did the ref
739 								// gap?  I.e. the depth of the flags where rfgExplore()
740 								// returned true?  Or is it the depth where we can
741 								// retrieve the appropriate top/bot?  We make it the
742 								// latter, might wrap around, indicating we should get
743 								// top/bot from the descent's topf_, ... fields.
744 								, (d == posid_) ? std::numeric_limits<size_t>::max() : (d - 1),
745 								topf, botf, topb, botb
746 #endif
747 								);
748 								out_.update(edge);
749 								nout++;
750 								prm.nRedFail++;
751 							} else {
752 								prm.nRedSkip++;
753 							}
754 						}
755 					}
756 				}
757 				if(!allmatch && pen_rdg_op <= diff && !rdex) {
758 					// Opening a new read gap
759 					for(int j = 0; j < 4; j++) {
760 						if(b[j] <= t[j]) {
761 							continue; // No outgoing edge with this nucleotide
762 						}
763 						if(!pf[d].flags.rdgExplore(j)) {
764 							continue; // Already been explored
765 						}
766 						TIndexOffU topf = pf[d].topf[j], botf = pf[d].botf[j];
767 						ASSERT_ONLY(TIndexOffU topb = pf[d].topb[j], botb = pf[d].botb[j]);
768 						assert(topf != 0 || botf != 0);
769 						assert(topb != 0 || botb != 0);
770 						if(re.contains(fw, l2r_, cur5pi_i, cur5pf_i, cur5pf - cur5pi + 1 + gapadd_, topf, botf, pen_ + pen_rdg_op)) {
771 							prm.nRedSkip++;
772 							continue; // Redundant with a path already explored
773 						}
774 						prm.nRedFail++;
775 						TIndexOffU width = b[j] - t[j];
776 						// off5p holds the offset from the 5' of the next
777 						// character we were trying to align when we decided to
778 						// introduce a read gap (before that character).  If we
779 						// were walking toward the 5' end, we need to increment
780 						// by 1.
781 						uint32_t off = (uint32_t)off5p + (toward3p ? 0 : 1);
782 						Edit edit(off, (int)("ACGTN"[j]), '-', EDIT_TYPE_READ_GAP);
783 						assert(edit.pos2 != std::numeric_limits<uint32_t>::max());
784 						DescentPriority pri(pen_ + pen_rdg_op, depth, width, rootpri);
785                         assert(topf != 0 || botf != 0);
786                         assert(topb != 0 || botb != 0);
787 						assert_eq(botb - topb, botf - topf);
788 						edge.init(edit, off5p, pri, d
789 #ifndef NDEBUG
790                         , d, topf, botf, topb, botb
791 #endif
792                         );
793 						out_.update(edge);
794 						nout++;
795 					}
796 				}
797 				if(!allmatch && pen_rfg_op <= diff && !rfex) {
798 					// Opening a new reference gap
799                     if(pf[d].flags.rfgExplore()) {
800                         TIndexOffU topf = l2r_ ? topp : top;
801                         TIndexOffU botf = l2r_ ? botp : bot;
802 						ASSERT_ONLY(TIndexOffU topb = l2r_ ? top : topp);
803 						ASSERT_ONLY(TIndexOffU botb = l2r_ ? bot : botp);
804 						assert(topf != 0 || botf != 0);
805 						assert(topb != 0 || botb != 0);
806 						size_t nrefal = cur5pf - cur5pi + gapadd_;
807 						if(!re.contains(fw, l2r_, cur5pi, cur5pf, nrefal, topf, botf, pen_ + pen_rfg_op)) {
808 							TIndexOffU width = bot - top;
809 							Edit edit((uint32_t)off5p, '-', (int)("ACGTN"[c]), EDIT_TYPE_REF_GAP);
810 							DescentPriority pri(pen_ + pen_rfg_op, depth, width, rootpri);
811 							assert(topf != 0 || botf != 0);
812 							assert(topb != 0 || botb != 0);
813 							edge.init(edit, off5p, pri, d
814 #ifndef NDEBUG
815 							// It's a little unclear what the depth ought to be.
816 							// Is it the depth we were at when we did the ref
817 							// gap?  I.e. the depth of the flags where rfgExplore()
818 							// returned true?  Or is it the depth where we can
819 							// retrieve the appropriate top/bot?  We make it the
820 							// latter, might wrap around, indicating we should get
821 							// top/bot from the descent's topf_, ... fields.
822 							, (d == posid_) ? std::numeric_limits<size_t>::max() : (d - 1),
823 							topf, botf, topb, botb
824 #endif
825 							);
826 							out_.update(edge);
827 							nout++;
828 							prm.nRedFail++;
829 						} else {
830 							prm.nRedSkip++;
831 						}
832                     }
833 				}
834 			}
835 		}
836 		// Update off5p, off3p, depth
837         d++;
838 		depth++;
839         assert_leq(depth, al5pf_ - al5pi_ + 2);
840         if(toward3p) {
841             if(off3p == 0) {
842                 break;
843             }
844             off5p++;
845             off3p--;
846 			cur5pf++;
847         } else {
848             if(off5p == 0) {
849                 break;
850             }
851             off3p++;
852             off5p--;
853 			cur5pi--;
854         }
855 		// Update top and bot
856 		if(off5p >= al5pi_ - extrai && off5p <= al5pf_ + extraf) {
857 			assert_range(0, 3, c);
858 			top = t[c], topp = tp[c];
859 			bot = b[c], botp = bp[c];
860 			assert_eq(bot-top, botp-topp);
861 		}
862 	}
863 	lastRecalc_ = (nout <= 5);
864     out_.best1.updateFlags(pf);
865     out_.best2.updateFlags(pf);
866     out_.best3.updateFlags(pf);
867     out_.best4.updateFlags(pf);
868     out_.best5.updateFlags(pf);
869 	return nout;
870 }
871 
print(std::ostream * os,const char * prefix,const Read & q,size_t trimLf,size_t trimRg,bool fw,const EList<Edit> & edits,size_t ei,size_t en,BTDnaString & rf) const872 void Descent::print(
873 	std::ostream *os,
874 	const char *prefix,
875 	const Read& q,
876 	size_t trimLf,
877 	size_t trimRg,
878 	bool fw,
879 	const EList<Edit>& edits,
880 	size_t ei,
881 	size_t en,
882 	BTDnaString& rf) const
883 {
884 	const BTDnaString& read = fw ? q.patFw : q.patRc;
885 	size_t eidx = ei;
886 	if(os != NULL) { *os << prefix; }
887 	// Print read
888 	for(size_t i = 0; i < read.length(); i++) {
889 		if(i < trimLf || i >= read.length() - trimRg) {
890 			if(os != NULL) { *os << (char)tolower(read.toChar(i)); }
891 			continue;
892 		}
893 		bool del = false, mm = false;
894 		while(eidx < ei + en && edits[eidx].pos == i) {
895 			if(edits[eidx].isReadGap()) {
896 				if(os != NULL) { *os << '-'; }
897 			} else if(edits[eidx].isRefGap()) {
898 				del = true;
899 				assert_eq((int)edits[eidx].qchr, read.toChar(i));
900 				if(os != NULL) { *os << read.toChar(i); }
901 			} else {
902 				mm = true;
903 				assert(edits[eidx].isMismatch());
904 				assert_eq((int)edits[eidx].qchr, read.toChar(i));
905 				if(os != NULL) { *os << (char)edits[eidx].qchr; }
906 			}
907 			eidx++;
908 		}
909 		if(!del && !mm) {
910 			// Print read character
911 			if(os != NULL) { *os << read.toChar(i); }
912 		}
913 	}
914 	if(os != NULL) {
915 		*os << endl;
916 		*os << prefix;
917 	}
918 	eidx = ei;
919 	// Print match bars
920 	for(size_t i = 0; i < read.length(); i++) {
921 		if(i < trimLf || i >= read.length() - trimRg) {
922 			if(os != NULL) { *os << ' '; }
923 			continue;
924 		}
925 		bool del = false, mm = false;
926 		while(eidx < ei + en && edits[eidx].pos == i) {
927 			if(edits[eidx].isReadGap()) {
928 				if(os != NULL) { *os << ' '; }
929 			} else if(edits[eidx].isRefGap()) {
930 				del = true;
931 				if(os != NULL) { *os << ' '; }
932 			} else {
933 				mm = true;
934 				assert(edits[eidx].isMismatch());
935 				if(os != NULL) { *os << ' '; }
936 			}
937 			eidx++;
938 		}
939 		if(!del && !mm && os != NULL) { *os << '|'; }
940 	}
941 	if(os != NULL) {
942 		*os << endl;
943 		*os << prefix;
944 	}
945 	eidx = ei;
946 	// Print reference
947 	for(size_t i = 0; i < read.length(); i++) {
948 		if(i < trimLf || i >= read.length() - trimRg) {
949 			if(os != NULL) { *os << ' '; }
950 			continue;
951 		}
952 		bool del = false, mm = false;
953 		while(eidx < ei + en && edits[eidx].pos == i) {
954 			if(edits[eidx].isReadGap()) {
955 				rf.appendChar((char)edits[eidx].chr);
956 				if(os != NULL) { *os << (char)edits[eidx].chr; }
957 			} else if(edits[eidx].isRefGap()) {
958 				del = true;
959 				if(os != NULL) { *os << '-'; }
960 			} else {
961 				mm = true;
962 				assert(edits[eidx].isMismatch());
963 				rf.appendChar((char)edits[eidx].chr);
964 				if(os != NULL) { *os << (char)edits[eidx].chr; }
965 			}
966 			eidx++;
967 		}
968 		if(!del && !mm) {
969 			rf.append(read[i]);
970 			if(os != NULL) { *os << read.toChar(i); }
971 		}
972 	}
973 	if(os != NULL) { *os << endl; }
974 }
975 
976 /**
977  * Create a new Descent
978  */
bounce(const Read & q,TIndexOffU topf,TIndexOffU botf,TIndexOffU topb,TIndexOffU botb,const Ebwt & ebwtFw,const Ebwt & ebwtBw,const Scoring & sc,TAlScore minsc,TAlScore maxpen,DescentRedundancyChecker & re,EFactory<Descent> & df,EFactory<DescentPos> & pf,const EList<DescentRoot> & rs,const EList<DescentConfig> & cs,EHeap<TDescentPair> & heap,DescentAlignmentSink & alsink,DescentMetrics & met,PerReadMetrics & prm)979 bool Descent::bounce(
980 	const Read& q,                  // query string
981     TIndexOffU topf,                 // SA range top in fw index
982     TIndexOffU botf,                 // SA range bottom in fw index
983     TIndexOffU topb,                 // SA range top in bw index
984     TIndexOffU botb,                 // SA range bottom in bw index
985 	const Ebwt& ebwtFw,             // forward index
986 	const Ebwt& ebwtBw,             // mirror index
987 	const Scoring& sc,              // scoring scheme
988 	TAlScore minsc,                 // minimum score
989 	TAlScore maxpen,                // maximum penalty
990 	DescentRedundancyChecker& re,   // redundancy checker
991 	EFactory<Descent>& df,          // factory with Descent
992 	EFactory<DescentPos>& pf,       // factory with DescentPoss
993     const EList<DescentRoot>& rs,   // roots
994     const EList<DescentConfig>& cs, // configs
995 	EHeap<TDescentPair>& heap,      // heap of descents
996     DescentAlignmentSink& alsink,   // alignment sink
997     DescentMetrics& met,            // metrics
998 	PerReadMetrics& prm)            // per-read metrics
999 {
1000     assert_gt(botf, topf);
1001     assert(al5pi_ == 0 || al5pf_ == q.length()-1);
1002     assert(!(al5pi_ == 0 && al5pf_ == q.length()-1));
1003     size_t dfsz = df.size();
1004     size_t pfsz = pf.size();
1005 	TDescentId id = df.alloc();
1006     Edit e_null;
1007     assert(!e_null.inited());
1008 	// Follow matches
1009 	bool succ = df[id].init(
1010 		q,         // query
1011         rid_,      // root id
1012 		sc,        // scoring scheme
1013 		minsc,     // minimum score
1014 		maxpen,    // maximum penalty
1015 		al5pi_,    // new near-5' extreme
1016 		al5pf_,    // new far-5' extreme
1017 		topf,      // SA range top in FW index
1018 		botf,      // SA range bottom in FW index
1019 		topb,      // SA range top in BW index
1020 		botb,      // SA range bottom in BW index
1021 		!l2r_,     // direction this descent will go in; opposite from parent
1022 		id,        // my ID
1023 		descid_,   // parent ID
1024 		pen_,      // total penalties so far - same as parent
1025 		e_null,    // edit for incoming edge; uninitialized if bounced
1026 		ebwtFw,    // forward index
1027 		ebwtBw,    // mirror index
1028 		re,        // redundancy checker
1029 		df,        // Descent factory
1030 		pf,        // DescentPos factory
1031         rs,        // DescentRoot list
1032         cs,        // DescentConfig list
1033 		heap,      // heap
1034         alsink,    // alignment sink
1035 		met,       // metrics
1036 		prm);      // per-read metrics
1037     if(!succ) {
1038         // Reclaim memory we had used for this descent and its DescentPos info
1039         df.resize(dfsz);
1040         pf.resize(pfsz);
1041     }
1042     return succ;
1043 }
1044 
1045 /**
1046  * Take the best outgoing edge and place it in the heap.  When deciding what
1047  * outgoing edges exist, abide by constraints in DescentConfig.  These
1048  * constraints limit total penalty accumulated so far versus distance from
1049  * search root.  E.g. a constraint might disallow any gaps or mismatches within
1050  * 20 ply of the root, then allow 1 mismatch within 30 ply, 1 mismatch or 1 gap
1051  * within 40 ply, etc.
1052  */
followBestOutgoing(const Read & q,const Ebwt & ebwtFw,const Ebwt & ebwtBw,const Scoring & sc,TAlScore minsc,TAlScore maxpen,DescentRedundancyChecker & re,EFactory<Descent> & df,EFactory<DescentPos> & pf,const EList<DescentRoot> & rs,const EList<DescentConfig> & cs,EHeap<TDescentPair> & heap,DescentAlignmentSink & alsink,DescentMetrics & met,PerReadMetrics & prm)1053 void Descent::followBestOutgoing(
1054 	const Read& q,                  // query string
1055 	const Ebwt& ebwtFw,             // forward index
1056 	const Ebwt& ebwtBw,             // mirror index
1057 	const Scoring& sc,              // scoring scheme
1058 	TAlScore minsc,                 // minimum score
1059 	TAlScore maxpen,                // maximum penalty
1060 	DescentRedundancyChecker& re,   // redundancy checker
1061 	EFactory<Descent>& df,          // factory with Descent
1062 	EFactory<DescentPos>& pf,       // factory with DescentPoss
1063     const EList<DescentRoot>& rs,   // roots
1064     const EList<DescentConfig>& cs, // configs
1065 	EHeap<TDescentPair>& heap,      // heap of descents
1066     DescentAlignmentSink& alsink,   // alignment sink
1067 	DescentMetrics& met,            // metrics
1068 	PerReadMetrics& prm)            // per-read metrics
1069 {
1070 	// We assume this descent has been popped off the heap.  We'll re-add it if
1071 	// it hasn't been exhausted.
1072 	assert(q.repOk());
1073 	assert(!empty());
1074 	assert(!out_.empty());
1075 	while(!out_.empty()) {
1076 		DescentPriority best = out_.bestPri();
1077 		DescentEdge e = out_.rotate();
1078 		TReadOff al5pi_new = al5pi_, al5pf_new = al5pf_;
1079 		bool fw = rs[rid_].fw;
1080 		bool toward3p = (l2r_ == fw);
1081 		TReadOff edoff = e.off5p; // 5' offset of edit
1082 		assert_leq(edoff, al5pf_ + 1);
1083 		assert_geq(edoff + 1, al5pi_);
1084 		if(out_.empty()) {
1085 			if(!lastRecalc_) {
1086 				// This might allocate new Descents
1087 				recalcOutgoing(q, sc, minsc, maxpen, re, pf, rs, cs, prm);
1088 				if(empty()) {
1089 					// Could happen, since some outgoing edges may have become
1090 					// redundant in the meantime.
1091 					break;
1092 				}
1093 			} else {
1094 				assert(empty());
1095 			}
1096 		}
1097 		TReadOff doff; // edit's offset into this descent
1098 		int chr = asc2dna[e.e.chr];
1099 		// hitEnd is set to true iff this edit pushes us to the extreme 5' or 3'
1100 		// end of the alignment
1101 		bool hitEnd = false;
1102 		// done is set to true iff this edit aligns the only remaining character of
1103 		// the read
1104 		bool done = false;
1105 		if(toward3p) {
1106 			// The 3' extreme of the new Descent is further in (away from the 3'
1107 			// end) than the parent's.
1108 			al5pf_new = doff = edoff;
1109 			if(e.e.isReadGap()) {
1110 				// We didn't actually consume the read character at 'edoff', so
1111 				// retract al5pf_new by one position.  This doesn't effect the
1112 				// "depth" (doff) of the SA range we took, though.
1113 				assert_gt(al5pf_new, 0);
1114 				al5pf_new--;
1115 			}
1116 			assert_lt(al5pf_new, q.length());
1117 			hitEnd = (al5pf_new == q.length() - 1);
1118 			done = (hitEnd && al5pi_new == 0);
1119 			assert_geq(doff, off5p_i_);
1120 			doff = doff - off5p_i_;
1121 			assert_leq(doff, len_);
1122 		} else {
1123 			// The 5' extreme of the new Descent is further in (away from the 5'
1124 			// end) than the parent's.
1125 			al5pi_new = doff = edoff;
1126 			if(e.e.isReadGap()) {
1127 				// We didn't actually consume the read character at 'edoff', so
1128 				// move al5pi_new closer to the 3' end by one position.  This
1129 				// doesn't effect the "depth" (doff) of the SA range we took,
1130 				// though.
1131 				al5pi_new++;
1132 			}
1133 			hitEnd = (al5pi_new == 0);
1134 			done = (hitEnd && al5pf_new == q.length() - 1);
1135 			assert_geq(off5p_i_, doff);
1136 			doff = off5p_i_ - doff;
1137 			assert_leq(doff, len_);
1138 		}
1139 		// Check if this is redundant with an already-explored path
1140 		bool l2r = l2r_; // gets overridden if we bounce
1141 		if(!done && hitEnd) {
1142 			// Alignment finsihed extending in one direction
1143 			l2r = !l2r;
1144 		}
1145 		size_t dfsz = df.size();
1146 		size_t pfsz = pf.size();
1147 		TIndexOffU topf, botf, topb, botb;
1148 		size_t d = posid_ + doff;
1149 		if(e.e.isRefGap()) {
1150 			d--; // might underflow
1151 			if(doff == 0) {
1152 				topf = topf_;
1153 				botf = botf_;
1154 				topb = topb_;
1155 				botb = botb_;
1156 				d = std::numeric_limits<size_t>::max();
1157 				assert_eq(botf-topf, botb-topb);
1158 			} else {
1159 				assert_gt(al5pf_new, 0);
1160 				assert_gt(d, 0);
1161 				chr = pf[d].c;
1162 				assert(pf[d].inited());
1163 				assert_range(0, 3, chr);
1164 				topf = pf[d].topf[chr];
1165 				botf = pf[d].botf[chr];
1166 				topb = pf[d].topb[chr];
1167 				botb = pf[d].botb[chr];
1168 				assert_eq(botf-topf, botb-topb);
1169 			}
1170 		} else {
1171 			// A read gap or a mismatch
1172 			assert(pf[d].inited());
1173 			topf = pf[d].topf[chr];
1174 			botf = pf[d].botf[chr];
1175 			topb = pf[d].topb[chr];
1176 			botb = pf[d].botb[chr];
1177 			assert_eq(botf-topf, botb-topb);
1178 		}
1179 		assert_eq(d, e.d);
1180 		assert_eq(topf, e.topf);
1181 		assert_eq(botf, e.botf);
1182 		assert_eq(topb, e.topb);
1183 		assert_eq(botb, e.botb);
1184 		if(done) {
1185 			// Aligned the entire read end-to-end.  Presumably there's no need to
1186 			// create a new Descent object.  We just report the alignment.
1187 			alsink.reportAlignment(
1188 				q,        // query
1189 				ebwtFw,   // forward index
1190 				ebwtBw,   // backward index
1191 				topf,     // top of SA range in forward index
1192 				botf,     // bottom of SA range in forward index
1193 				topb,     // top of SA range in backward index
1194 				botb,     // bottom of SA range in backward index
1195 				descid_,  // Descent at the leaf
1196 				rid_,     // root id
1197 				e.e,      // extra edit, if necessary
1198 				best.pen, // penalty
1199 				df,       // factory with Descent
1200 				pf,       // factory with DescentPoss
1201 				rs,       // roots
1202 				cs);      // configs
1203 			assert(alsink.repOk());
1204 			return;
1205 		}
1206 		assert(al5pi_new != 0 || al5pf_new != q.length() - 1);
1207 		TDescentId id = df.alloc();
1208 		bool succ = df[id].init(
1209 			q,         // query
1210 			rid_,      // root id
1211 			sc,        // scoring scheme
1212 			minsc,     // minimum score
1213 			maxpen,    // maximum penalty
1214 			al5pi_new, // new near-5' extreme
1215 			al5pf_new, // new far-5' extreme
1216 			topf,      // SA range top in FW index
1217 			botf,      // SA range bottom in FW index
1218 			topb,      // SA range top in BW index
1219 			botb,      // SA range bottom in BW index
1220 			l2r,       // direction this descent will go in
1221 			id,        // my ID
1222 			descid_,   // parent ID
1223 			best.pen,  // total penalties so far
1224 			e.e,       // edit for incoming edge; uninitialized if bounced
1225 			ebwtFw,    // forward index
1226 			ebwtBw,    // mirror index
1227 			re,        // redundancy checker
1228 			df,        // Descent factory
1229 			pf,        // DescentPos factory
1230 			rs,        // DescentRoot list
1231 			cs,        // DescentConfig list
1232 			heap,      // heap
1233 			alsink,    // alignment sink
1234 			met,       // metrics
1235 			prm);      // per-read metrics
1236 		if(!succ) {
1237 			// Reclaim memory we had used for this descent and its DescentPos info
1238 			df.resize(dfsz);
1239 			pf.resize(pfsz);
1240 		}
1241 		break;
1242 	}
1243 	if(!empty()) {
1244 		// Re-insert this Descent with its new priority
1245 		heap.insert(make_pair(out_.bestPri(), descid_));
1246 	}
1247 }
1248 
1249 /**
1250  * Given the forward and backward indexes, and given topf/botf/topb/botb, get
1251  * tloc, bloc ready for the next step.
1252  */
nextLocsBi(const Ebwt & ebwtFw,const Ebwt & ebwtBw,SideLocus & tloc,SideLocus & bloc,TIndexOffU topf,TIndexOffU botf,TIndexOffU topb,TIndexOffU botb)1253 void Descent::nextLocsBi(
1254 	const Ebwt& ebwtFw, // forward index
1255 	const Ebwt& ebwtBw, // mirror index
1256 	SideLocus& tloc,    // top locus
1257 	SideLocus& bloc,    // bot locus
1258 	TIndexOffU topf,     // top in BWT
1259 	TIndexOffU botf,     // bot in BWT
1260 	TIndexOffU topb,     // top in BWT'
1261 	TIndexOffU botb)     // bot in BWT'
1262 {
1263 	assert_gt(botf, 0);
1264 	// Which direction are we going in next?
1265 	if(l2r_) {
1266 		// Left to right; use BWT'
1267 		if(botb - topb == 1) {
1268 			// Already down to 1 row; just init top locus
1269 			tloc.initFromRow(topb, ebwtBw.eh(), ebwtBw.ebwt());
1270 			bloc.invalidate();
1271 		} else {
1272 			SideLocus::initFromTopBot(
1273 				topb, botb, ebwtBw.eh(), ebwtBw.ebwt(), tloc, bloc);
1274 			assert(bloc.valid());
1275 		}
1276 	} else {
1277 		// Right to left; use BWT
1278 		if(botf - topf == 1) {
1279 			// Already down to 1 row; just init top locus
1280 			tloc.initFromRow(topf, ebwtFw.eh(), ebwtFw.ebwt());
1281 			bloc.invalidate();
1282 		} else {
1283 			SideLocus::initFromTopBot(
1284 				topf, botf, ebwtFw.eh(), ebwtFw.ebwt(), tloc, bloc);
1285 			assert(bloc.valid());
1286 		}
1287 	}
1288 	// Check if we should update the tracker with this refinement
1289 	assert(botf - topf == 1 ||  bloc.valid());
1290 	assert(botf - topf > 1  || !bloc.valid());
1291 }
1292 
1293 /**
1294  * Advance this descent by following read matches as far as possible.
1295  *
1296  * This routine doesn't have to consider the whole gamut of constraints on
1297  * which outgoing edges can be followed.  If it is a root descent, it does have
1298  * to know how deep the no-edit constraint goes, though, so we can decide
1299  * whether using the ftab would potentially jump over relevant branch points.
1300  * Apart from that, though, we simply proceed as far as it can go by matching
1301  * characters in the query, irrespective of the constraints.
1302  * recalcOutgoing(...) and followBestOutgoing(...) do have to consider these
1303  * constraints, though.
1304  *
1305  * Conceptually, as we make descending steps, we have:
1306  * 1. Before each step, a single range indicating how we departed the previous
1307  *    step
1308  * 2. As part of each step, a quad of ranges indicating what range would result
1309  *    if we proceeded on an a, c, g ot t
1310  *
1311  * Return true iff it is possible to branch from this descent.  If we haven't
1312  * exceeded the no-branch depth.
1313  */
followMatches(const Read & q,const Scoring & sc,const Ebwt & ebwtFw,const Ebwt & ebwtBw,DescentRedundancyChecker & re,EFactory<Descent> & df,EFactory<DescentPos> & pf,const EList<DescentRoot> & rs,const EList<DescentConfig> & cs,EHeap<TDescentPair> & heap,DescentAlignmentSink & alsink,DescentMetrics & met,PerReadMetrics & prm,bool & branches,bool & hitEnd,bool & done,TReadOff & off5p_i,TIndexOffU & topf_bounce,TIndexOffU & botf_bounce,TIndexOffU & topb_bounce,TIndexOffU & botb_bounce)1314 bool Descent::followMatches(
1315 	const Read& q,     // query string
1316     const Scoring& sc,         // scoring scheme
1317 	const Ebwt& ebwtFw,        // forward index
1318 	const Ebwt& ebwtBw,        // mirror index
1319 	DescentRedundancyChecker& re, // redundancy checker
1320 	EFactory<Descent>& df,     // Descent factory
1321 	EFactory<DescentPos>& pf,  // DescentPos factory
1322     const EList<DescentRoot>& rs,   // roots
1323     const EList<DescentConfig>& cs, // configs
1324 	EHeap<TDescentPair>& heap, // heap
1325     DescentAlignmentSink& alsink, // alignment sink
1326 	DescentMetrics& met,       // metrics
1327 	PerReadMetrics& prm,       // per-read metrics
1328     bool& branches,            // out: true -> there are > 0 ways to branch
1329     bool& hitEnd,              // out: true -> hit read end with non-empty range
1330     bool& done,                // out: true -> we made a full alignment
1331     TReadOff& off5p_i,         // out: initial 5' offset
1332     TIndexOffU& topf_bounce,    // out: top of SA range for fw idx for bounce
1333     TIndexOffU& botf_bounce,    // out: bot of SA range for fw idx for bounce
1334     TIndexOffU& topb_bounce,    // out: top of SA range for bw idx for bounce
1335     TIndexOffU& botb_bounce)    // out: bot of SA range for bw idx for bounce
1336 {
1337 	// TODO: make these full-fledged parameters
1338 	size_t nobranchDepth = 20;
1339 	bool stopOnN = true;
1340 	assert(q.repOk());
1341 	assert(repOk(&q));
1342 	assert_eq(ebwtFw.eh().ftabChars(), ebwtBw.eh().ftabChars());
1343 #ifndef NDEBUG
1344 	for(int i = 0; i < 4; i++) {
1345 		assert_eq(ebwtFw.fchr()[i], ebwtBw.fchr()[i]);
1346 	}
1347 #endif
1348 	SideLocus tloc, bloc;
1349 	TIndexOffU topf = topf_, botf = botf_, topb = topb_, botb = botb_;
1350     bool fw = rs[rid_].fw;
1351 	bool toward3p;
1352 	size_t off5p;
1353 	assert_lt(al5pi_, q.length());
1354 	assert_lt(al5pf_, q.length());
1355 	while(true) {
1356 		toward3p = (l2r_ == fw);
1357 		assert_geq(al5pf_, al5pi_);
1358 		assert(al5pi_ != 0 || al5pf_ != q.length() - 1);
1359 		if(toward3p) {
1360 			if(al5pf_ == q.length()-1) {
1361 				l2r_ = !l2r_;
1362 				continue;
1363 			}
1364 			if(al5pi_ == al5pf_ && root()) {
1365 				off5p = off5p_i = al5pi_;
1366 			} else {
1367 				off5p = off5p_i = (al5pf_ + 1);
1368 			}
1369 		} else {
1370 			if(al5pi_ == 0) {
1371 				l2r_ = !l2r_;
1372 				continue;
1373 			}
1374 			assert_gt(al5pi_, 0);
1375 			if(al5pi_ == al5pf_ && root()) {
1376 				off5p = off5p_i = al5pi_;
1377 			} else {
1378 				off5p = off5p_i = (al5pi_ - 1);
1379 			}
1380 		}
1381 		break;
1382 	}
1383 	size_t off3p = q.length() - off5p - 1;
1384 	assert_lt(off5p, q.length());
1385 	assert_lt(off3p, q.length());
1386 	bool firstPos = true;
1387 	assert_eq(0, len_);
1388 
1389 	// Number of times pf.alloc() is called.  So we can sanity check it.
1390 	size_t nalloc = 0;
1391     // Set to true as soon as we encounter a branch point along this descent.
1392     branches = false;
1393     // hitEnd is set to true iff this edit pushes us to the extreme 5' or 3'
1394     // end of the alignment
1395     hitEnd = false;
1396     // done is set to true iff this edit aligns the only remaining character of
1397     // the read
1398     done = false;
1399 	if(root()) {
1400         assert_eq(al5pi_, al5pf_);
1401 		// Check whether/how far we can jump using ftab
1402 		int ftabLen = ebwtFw.eh().ftabChars();
1403 		bool ftabFits = true;
1404 		if(toward3p && ftabLen + off5p > q.length()) {
1405 			ftabFits = false;
1406 		} else if(!toward3p && off5p < (size_t)ftabLen) {
1407 			ftabFits = false;
1408 		}
1409 		bool useFtab = ftabLen > 1 && (size_t)ftabLen <= nobranchDepth && ftabFits;
1410 		bool ftabFailed = false;
1411 		if(useFtab) {
1412 			prm.nFtabs++;
1413 			// Forward index: right-to-left
1414 			size_t off_r2l = fw ? off5p : q.length() - off5p - 1;
1415 			if(l2r_) {
1416 				//
1417 			} else {
1418 				assert_geq((int)off_r2l, ftabLen - 1);
1419 				off_r2l -= (ftabLen - 1);
1420 			}
1421 			bool ret = ebwtFw.ftabLoHi(fw ? q.patFw : q.patRc, off_r2l,
1422 			                           false, // reverse
1423 			                           topf, botf);
1424 			if(!ret) {
1425 				// Encountered an N or something else that made it impossible
1426 				// to use the ftab
1427 				ftabFailed = true;
1428 			} else {
1429 				if(botf - topf == 0) {
1430 					return false;
1431 				}
1432 				int c_r2l = fw ? q.patFw[off_r2l] : q.patRc[off_r2l];
1433 				// Backward index: left-to-right
1434 				size_t off_l2r = fw ? off5p : q.length() - off5p - 1;
1435 				if(l2r_) {
1436 					//
1437 				} else {
1438 					assert_geq((int)off_l2r, ftabLen - 1);
1439 					off_l2r -= (ftabLen - 1);
1440 				}
1441 				ASSERT_ONLY(bool ret2 = )
1442 				ebwtBw.ftabLoHi(fw ? q.patFw : q.patRc, off_l2r,
1443 								false, // don't reverse
1444 								topb, botb);
1445 				assert(ret == ret2);
1446 				int c_l2r = fw ? q.patFw[off_l2r + ftabLen - 1] :
1447 				                 q.patRc[off_l2r + ftabLen - 1];
1448 				assert_eq(botf - topf, botb - topb);
1449 				if(toward3p) {
1450 					assert_geq((int)off3p, ftabLen - 1);
1451 					off5p += ftabLen; off3p -= ftabLen;
1452 				} else {
1453 					assert_geq((int)off5p, ftabLen - 1);
1454 					off5p -= ftabLen; off3p += ftabLen;
1455 				}
1456 				len_ += ftabLen;
1457 				if(toward3p) {
1458 					// By convention, al5pf_ and al5pi_ start out equal, so we only
1459 					// advance al5pf_ by ftabLen - 1 (not ftabLen)
1460 					al5pf_ += (ftabLen - 1); // -1 accounts for inclusive al5pf_
1461 					if(al5pf_ == q.length() - 1) {
1462 						hitEnd = true;
1463 						done = (al5pi_ == 0);
1464 					}
1465 				} else {
1466 					// By convention, al5pf_ and al5pi_ start out equal, so we only
1467 					// advance al5pi_ by ftabLen - 1 (not ftabLen)
1468 					al5pi_ -= (ftabLen - 1);
1469 					if(al5pi_ == 0) {
1470 						hitEnd = true;
1471 						done = (al5pf_ == q.length()-1);
1472 					}
1473 				}
1474 				// Allocate DescentPos data structures and leave them empty.  We
1475 				// jumped over them by doing our lookup in the ftab, so we have no
1476 				// info about outgoing edges from them, besides the matching
1477 				// outgoing edge from the last pos which is in topf/botf and
1478 				// topb/botb.
1479 				size_t id = 0;
1480 				if(firstPos) {
1481 					posid_ = pf.alloc();
1482 					pf[posid_].reset();
1483 					firstPos = false;
1484 					for(int i = 1; i < ftabLen; i++) {
1485 						id = pf.alloc();
1486 						pf[id].reset();
1487 					}
1488 				} else {
1489 					for(int i = 0; i < ftabLen; i++) {
1490 						id = pf.alloc();
1491 						pf[id].reset();
1492 					}
1493 				}
1494 				assert_eq(botf-topf, botb-topb);
1495 				pf[id].c = l2r_ ? c_l2r : c_r2l;
1496 				pf[id].topf[l2r_ ? c_l2r : c_r2l] = topf;
1497 				pf[id].botf[l2r_ ? c_l2r : c_r2l] = botf;
1498 				pf[id].topb[l2r_ ? c_l2r : c_r2l] = topb;
1499 				pf[id].botb[l2r_ ? c_l2r : c_r2l] = botb;
1500 				assert(pf[id].inited());
1501 				nalloc += ftabLen;
1502 			}
1503 		}
1504 		if(!useFtab || ftabFailed) {
1505 			// Can't use ftab, use fchr instead
1506 			int rdc = q.getc(off5p, fw);
1507 			// If rdc is N, that's pretty bad!  That means we placed a root
1508 			// right on an N.  The only thing we can reasonably do is to pick a
1509 			// nucleotide at random and proceed.
1510 			if(rdc > 3) {
1511 				return false;
1512 			}
1513 			assert_range(0, 3, rdc);
1514 			topf = topb = ebwtFw.fchr()[rdc];
1515 			botf = botb = ebwtFw.fchr()[rdc+1];
1516 			if(botf - topf == 0) {
1517 				return false;
1518 			}
1519 			if(toward3p) {
1520 				off5p++; off3p--;
1521 			} else {
1522 				off5p--; off3p++;
1523 			}
1524 			len_++;
1525             if(toward3p) {
1526                 if(al5pf_ == q.length()-1) {
1527                     hitEnd = true;
1528                     done = (al5pi_ == 0);
1529                 }
1530             } else {
1531                 if(al5pi_ == 0) {
1532                     hitEnd = true;
1533                     done = (al5pf_ == q.length()-1);
1534                 }
1535             }
1536 			// Allocate DescentPos data structure.  We could fill it with the
1537 			// four ranges from fchr if we wanted to, but that will never be
1538 			// relevant.
1539 			size_t id = 0;
1540 			if(firstPos) {
1541 				posid_ = id = pf.alloc();
1542                 firstPos = false;
1543 			} else {
1544 				id = pf.alloc();
1545 			}
1546 			assert_eq(botf-topf, botb-topb);
1547 			pf[id].c = rdc;
1548 			pf[id].topf[rdc] = topf;
1549 			pf[id].botf[rdc] = botf;
1550 			pf[id].topb[rdc] = topb;
1551 			pf[id].botb[rdc] = botb;
1552 			assert(pf[id].inited());
1553 			nalloc++;
1554 		}
1555 		assert_gt(botf, topf);
1556 		assert_eq(botf - topf, botb - topb);
1557 		// Check if this is redundant with an already-explored path
1558 		if(!re.check(fw, l2r_, al5pi_, al5pf_, al5pf_ - al5pi_ + 1 + gapadd_,
1559 		             topf, botf, pen_))
1560 		{
1561 			prm.nRedSkip++;
1562 			return false;
1563 		}
1564 		prm.nRedFail++; // not pruned by redundancy list
1565 		prm.nRedIns++;  // inserted into redundancy list
1566 	}
1567     if(done) {
1568         Edit eempty;
1569         alsink.reportAlignment(
1570             q,        // query
1571 			ebwtFw,   // forward index
1572 			ebwtBw,   // backward index
1573 			topf,     // top of SA range in forward index
1574 			botf,     // bottom of SA range in forward index
1575 			topb,     // top of SA range in backward index
1576 			botb,     // bottom of SA range in backward index
1577             descid_,  // Descent at the leaf
1578 			rid_,     // root id
1579             eempty,   // extra edit, if necessary
1580             pen_,     // penalty
1581             df,       // factory with Descent
1582             pf,       // factory with DescentPoss
1583             rs,       // roots
1584             cs);      // configs
1585 		assert(alsink.repOk());
1586         return true;
1587     } else if(hitEnd) {
1588 		assert(botf > 0 || topf > 0);
1589         assert_gt(botf, topf);
1590         topf_bounce = topf;
1591         botf_bounce = botf;
1592         topb_bounce = topb;
1593         botb_bounce = botb;
1594         return true; // Bounced
1595     }
1596     // We just advanced either ftabLen characters, or 1 character,
1597     // depending on whether we used ftab or fchr.
1598     nextLocsBi(ebwtFw, ebwtBw, tloc, bloc, topf, botf, topb, botb);
1599     assert(tloc.valid());
1600 	assert(botf - topf == 1 ||  bloc.valid());
1601 	assert(botf - topf > 1  || !bloc.valid());
1602 	TIndexOffU t[4], b[4];   // dest BW ranges
1603 	TIndexOffU tp[4], bp[4]; // dest BW ranges for "prime" index
1604 	ASSERT_ONLY(TIndexOffU lasttot = botf - topf);
1605 	bool fail = false;
1606 	while(!fail && !hitEnd) {
1607         assert(!done);
1608 		int rdc = q.getc(off5p, fw);
1609 		int rdq = q.getq(off5p);
1610 		assert_range(0, 4, rdc);
1611 		assert_gt(botf, topf);
1612 		assert(botf - topf == 1 ||  bloc.valid());
1613 		assert(botf - topf > 1  || !bloc.valid());
1614 		assert(tloc.valid());
1615         TIndexOffU width = botf - topf;
1616 		bool ltr = l2r_;
1617 		const Ebwt& ebwt = ltr ? ebwtBw : ebwtFw;
1618 		t[0] = t[1] = t[2] = t[3] = b[0] = b[1] = b[2] = b[3] = 0;
1619 		int only = -1; // if we only get 1 non-empty range, this is the char
1620 		size_t nopts = 1;
1621 		if(bloc.valid()) {
1622 			// Set up initial values for the primes
1623 			if(ltr) {
1624 				tp[0] = tp[1] = tp[2] = tp[3] = topf;
1625 				bp[0] = bp[1] = bp[2] = bp[3] = botf;
1626 			} else {
1627 				tp[0] = tp[1] = tp[2] = tp[3] = topb;
1628 				bp[0] = bp[1] = bp[2] = bp[3] = botb;
1629 			}
1630 			// Range delimited by tloc/bloc has size >1.  If size == 1,
1631 			// we use a simpler query (see if(!bloc.valid()) blocks below)
1632 			met.bwops++;
1633 			met.bwops_bi++;
1634 			prm.nSdFmops++;
1635 			if(prm.doFmString) {
1636 				prm.fmString.add(false, pen_, 1);
1637 			}
1638 			ebwt.mapBiLFEx(tloc, bloc, t, b, tp, bp);
1639 			// t, b, tp and bp now filled
1640 			ASSERT_ONLY(TIndexOffU tot = (b[0]-t[0])+(b[1]-t[1])+(b[2]-t[2])+(b[3]-t[3]));
1641 			ASSERT_ONLY(TIndexOffU totp = (bp[0]-tp[0])+(bp[1]-tp[1])+(bp[2]-tp[2])+(bp[3]-tp[3]));
1642 			assert_eq(tot, totp);
1643 			assert_leq(tot, lasttot);
1644 			ASSERT_ONLY(lasttot = tot);
1645 			fail = (rdc > 3 || b[rdc] <= t[rdc]);
1646 			size_t nopts = 0;
1647 			if(b[0] > t[0]) { nopts++; only = 0; }
1648 			if(b[1] > t[1]) { nopts++; only = 1; }
1649 			if(b[2] > t[2]) { nopts++; only = 2; }
1650 			if(b[3] > t[3]) { nopts++; only = 3; }
1651             if(!fail && b[rdc] - t[rdc] < width) {
1652                 branches = true;
1653             }
1654 		} else {
1655 			tp[0] = tp[1] = tp[2] = tp[3] = bp[0] = bp[1] = bp[2] = bp[3] = 0;
1656 			// Range delimited by tloc/bloc has size 1
1657 			TIndexOffU ntop = ltr ? topb : topf;
1658 			met.bwops++;
1659 			met.bwops_1++;
1660 			prm.nSdFmops++;
1661 			if(prm.doFmString) {
1662 				prm.fmString.add(false, pen_, 1);
1663 			}
1664 			int cc = ebwt.mapLF1(ntop, tloc);
1665 			assert_range(-1, 3, cc);
1666             fail = (cc != rdc);
1667             if(fail) {
1668                 branches = true;
1669             }
1670 			if(cc >= 0) {
1671 				only = cc;
1672 				t[cc] = ntop; b[cc] = ntop+1;
1673 				tp[cc] = ltr ? topf : topb;
1674 				bp[cc] = ltr ? botf : botb;
1675 			}
1676 		}
1677 		// Now figure out what to do with our N.
1678 		int origRdc = rdc;
1679 		if(rdc == 4) {
1680 			fail = true;
1681 		} else {
1682 			topf = ltr ? tp[rdc] : t[rdc];
1683 			botf = ltr ? bp[rdc] : b[rdc];
1684 			topb = ltr ? t[rdc] : tp[rdc];
1685 			botb = ltr ? b[rdc] : bp[rdc];
1686 			assert_eq(botf - topf, botb - topb);
1687 		}
1688 		// The trouble with !stopOnN is that we don't have a way to store the N
1689 		// edits.  There could be several per Descent.
1690 		if(rdc == 4 && !stopOnN && nopts == 1) {
1691 			fail = false;
1692 			rdc = only;
1693 			int pen = sc.n(rdq);
1694 			assert_gt(pen, 0);
1695 			pen_ += pen;
1696 		}
1697 		assert_range(0, 4, origRdc);
1698 		assert_range(0, 4, rdc);
1699         // If 'fail' is true, we failed to align this read character.  We still
1700         // install the SA ranges into the DescentPos and increment len_ in this
1701         // case.
1702 
1703 		// Convert t, tp, b, bp info tf, bf, tb, bb
1704 		TIndexOffU *tf = ltr ? tp : t;
1705 		TIndexOffU *bf = ltr ? bp : b;
1706 		TIndexOffU *tb = ltr ? t : tp;
1707 		TIndexOffU *bb = ltr ? b : bp;
1708 		// Allocate DescentPos data structure.
1709 		if(firstPos) {
1710 			posid_ = pf.alloc();
1711             firstPos = false;
1712 		} else {
1713 			pf.alloc();
1714 		}
1715 		nalloc++;
1716 		pf[posid_ + len_].reset();
1717         pf[posid_ + len_].c = origRdc;
1718 		for(size_t i = 0; i < 4; i++) {
1719 			pf[posid_ + len_].topf[i] = tf[i];
1720 			pf[posid_ + len_].botf[i] = bf[i];
1721 			pf[posid_ + len_].topb[i] = tb[i];
1722 			pf[posid_ + len_].botb[i] = bb[i];
1723 			assert_eq(pf[posid_ + len_].botf[i] - pf[posid_ + len_].topf[i],
1724 			          pf[posid_ + len_].botb[i] - pf[posid_ + len_].topb[i]);
1725 		}
1726 		if(!fail) {
1727 			// Check if this is redundant with an already-explored path
1728 			size_t al5pf = al5pf_, al5pi = al5pi_;
1729 			if(toward3p) {
1730 				al5pf++;
1731 			} else {
1732 				al5pi--;
1733 			}
1734 			fail = !re.check(fw, l2r_, al5pi, al5pf,
1735 			                 al5pf - al5pi + 1 + gapadd_, topf, botf, pen_);
1736 			if(fail) {
1737 				prm.nRedSkip++;
1738 			} else {
1739 				prm.nRedFail++; // not pruned by redundancy list
1740 				prm.nRedIns++;  // inserted into redundancy list
1741 			}
1742 		}
1743 		if(!fail) {
1744 			len_++;
1745 			if(toward3p) {
1746 				al5pf_++;
1747 				off5p++;
1748 				off3p--;
1749 				if(al5pf_ == q.length() - 1) {
1750 					hitEnd = true;
1751 					done = (al5pi_ == 0);
1752 				}
1753 			} else {
1754 				assert_gt(al5pi_, 0);
1755 				al5pi_--;
1756 				off5p--;
1757 				off3p++;
1758 				if(al5pi_ == 0) {
1759 					hitEnd = true;
1760 					done = (al5pf_ == q.length() - 1);
1761 				}
1762 			}
1763 		}
1764         if(!fail && !hitEnd) {
1765             nextLocsBi(ebwtFw, ebwtBw, tloc, bloc, tf[rdc], bf[rdc], tb[rdc], bb[rdc]);
1766         }
1767 	}
1768 	assert_geq(al5pf_, al5pi_);
1769 	assert(!root() || al5pf_ - al5pi_ + 1 == nalloc || al5pf_ - al5pi_ + 2 == nalloc);
1770 	assert_geq(pf.size(), nalloc);
1771     if(done) {
1772         Edit eempty;
1773         alsink.reportAlignment(
1774             q,        // query
1775 			ebwtFw,   // forward index
1776 			ebwtBw,   // backward index
1777 			topf,     // top of SA range in forward index
1778 			botf,     // bottom of SA range in forward index
1779 			topb,     // top of SA range in backward index
1780 			botb,     // bottom of SA range in backward index
1781             descid_,  // Descent at the leaf
1782 			rid_,     // root id
1783             eempty,   // extra edit, if necessary
1784             pen_,     // penalty
1785             df,       // factory with Descent
1786             pf,       // factory with DescentPoss
1787             rs,       // roots
1788             cs);      // configs
1789 		assert(alsink.repOk());
1790         return true;
1791     } else if(hitEnd) {
1792         assert(botf > 0 || topf > 0);
1793         assert_gt(botf, topf);
1794         topf_bounce = topf;
1795         botf_bounce = botf;
1796         topb_bounce = topb;
1797         botb_bounce = botb;
1798         return true; // Bounced
1799     }
1800     assert(repOk(&q));
1801 	assert(!hitEnd || topf_bounce > 0 || botf_bounce > 0);
1802 	return true;
1803 }
1804 
1805 #ifdef ALIGNER_SEED2_MAIN
1806 
1807 #include <string>
1808 #include "sstring.h"
1809 #include "aligner_driver.h"
1810 
1811 using namespace std;
1812 
1813 bool gReportOverhangs = true;
1814 
1815 /**
1816  * A way of feeding simply tests to the seed alignment infrastructure.
1817  */
main(int argc,char ** argv)1818 int main(int argc, char **argv) {
1819 
1820     EList<string> strs;
1821     //                            GCTATATAGCGCGCTCGCATCATTTTGTGT
1822     strs.push_back(string("CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA"
1823                           "NNNNNNNNNN"
1824                           "CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA"));
1825     //                            GCTATATAGCGCGCTTGCATCATTTTGTGT
1826     //                                           ^
1827     bool packed = false;
1828 	pair<Ebwt*, Ebwt*> ebwts = Ebwt::fromStrings<SString<char> >(
1829 		strs,
1830 		packed,
1831 		0,
1832 		REF_READ_REVERSE,
1833 		Ebwt::default_bigEndian,
1834 		Ebwt::default_lineRate,
1835 		Ebwt::default_offRate,
1836 		Ebwt::default_ftabChars,
1837 		".aligner_seed2.cpp.tmp",
1838 		Ebwt::default_useBlockwise,
1839 		Ebwt::default_bmax,
1840 		Ebwt::default_bmaxMultSqrt,
1841 		Ebwt::default_bmaxDivN,
1842 		Ebwt::default_dcv,
1843 		Ebwt::default_seed,
1844 		false,  // verbose
1845 		false,  // autoMem
1846 		false); // sanity
1847 
1848     ebwts.first->loadIntoMemory (0, -1, true, true, true, true, false);
1849     ebwts.second->loadIntoMemory(0,  1, true, true, true, true, false);
1850 
1851 	int testnum = 0;
1852 
1853 	// Query is longer than ftab and matches exactly twice
1854     for(int rc = 0; rc < 2; rc++) {
1855 		for(int i = 0; i < 2; i++) {
1856 			cerr << "Test " << (++testnum) << endl;
1857 			cerr << "  Query with length greater than ftab" << endl;
1858 			DescentMetrics mets;
1859 			PerReadMetrics prm;
1860 			DescentDriver dr;
1861 
1862 			// Set up the read
1863 			BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
1864 			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
1865 			if(rc) {
1866 				seq.reverseComp();
1867 				qual.reverse();
1868 			}
1869 			dr.initRead(
1870 				Read("test", seq.toZBuf(), qual.toZBuf()),
1871 				false, // nofw
1872 				false, // norc
1873 				-30,   // minsc
1874 				30);   // maxpen
1875 
1876 			// Set up the DescentConfig
1877 			DescentConfig conf;
1878 			conf.cons.init(Ebwt::default_ftabChars, 1.0);
1879 			conf.expol = DESC_EX_NONE;
1880 
1881 			// Set up the search roots
1882 			dr.addRoot(
1883 				conf,   // DescentConfig
1884 				(i == 0) ? 0 : (seq.length() - 1), // 5' offset into read of root
1885 				(i == 0) ? true : false,           // left-to-right?
1886 				rc == 0,   // forward?
1887 				1,         // landing
1888 				0.0f);   // root priority
1889 
1890 			// Do the search
1891 			Scoring sc = Scoring::base1();
1892 			dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
1893 
1894 			// Confirm that an exact-matching alignment was found
1895 			assert_eq(1, dr.sink().nrange());
1896 			assert_eq(2, dr.sink().nelt());
1897 		}
1898 	}
1899 
1900 	// Query has length euqal to ftab and matches exactly twice
1901     for(int i = 0; i < 2; i++) {
1902 		cerr << "Test " << (++testnum) << endl;
1903 		cerr << "  Query with length equal to ftab" << endl;
1904         DescentMetrics mets;
1905 		PerReadMetrics prm;
1906         DescentDriver dr;
1907 
1908         // Set up the read
1909         BTDnaString seq ("GCTATATAGC", true);
1910         BTString    qual("ABCDEFGHIa");
1911 		dr.initRead(
1912 			Read("test", seq.toZBuf(), qual.toZBuf()),
1913 			false, // nofw
1914 			false, // norc
1915 			-30,   // minsc
1916 			30);   // maxpen
1917 
1918         // Set up the DescentConfig
1919         DescentConfig conf;
1920         conf.cons.init(Ebwt::default_ftabChars, 1.0);
1921         conf.expol = DESC_EX_NONE;
1922 
1923         // Set up the search roots
1924         dr.addRoot(
1925             conf,   // DescentConfig
1926             (i == 0) ? 0 : (seq.length() - 1), // 5' offset into read of root
1927             (i == 0) ? true : false,           // left-to-right?
1928             true,   // forward?
1929 			1,      // landing
1930             0.0f);  // root priority
1931 
1932         // Do the search
1933         Scoring sc = Scoring::base1();
1934         dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
1935 
1936 		// Confirm that an exact-matching alignment was found
1937 		assert_eq(1, dr.sink().nrange());
1938 		assert_eq(2, dr.sink().nelt());
1939     }
1940 
1941 	// Query has length less than ftab length and matches exactly twice
1942     for(int i = 0; i < 2; i++) {
1943 		cerr << "Test " << (++testnum) << endl;
1944 		cerr << "  Query with length less than ftab" << endl;
1945         DescentMetrics mets;
1946 		PerReadMetrics prm;
1947         DescentDriver dr;
1948 
1949         // Set up the read
1950         BTDnaString seq ("GCTATATAG", true);
1951         BTString    qual("ABCDEFGHI");
1952 		dr.initRead(
1953 			Read("test", seq.toZBuf(), qual.toZBuf()),
1954 			false, // nofw
1955 			false, // norc
1956 			-30,   // minsc
1957 			30);   // maxpen
1958 
1959         // Set up the DescentConfig
1960         DescentConfig conf;
1961         conf.cons.init(Ebwt::default_ftabChars, 1.0);
1962         conf.expol = DESC_EX_NONE;
1963 
1964         // Set up the search roots
1965         dr.addRoot(
1966             conf,   // DescentConfig
1967             (i == 0) ? 0 : (seq.length() - 1), // 5' offset into read of root
1968             (i == 0) ? true : false,           // left-to-right?
1969             true,   // forward?
1970 			1,      // landing
1971             0.0f);   // root priority
1972 
1973         // Do the search
1974         Scoring sc = Scoring::base1();
1975         dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
1976 
1977 		// Confirm that an exact-matching alignment was found
1978 		assert_eq(1, dr.sink().nrange());
1979 		assert_eq(2, dr.sink().nelt());
1980     }
1981 
1982 	// Search root is in the middle of the read, requiring a bounce
1983     for(int i = 0; i < 2; i++) {
1984 		cerr << "Test " << (++testnum) << endl;
1985 		cerr << "  Search root in middle of read" << endl;
1986         DescentMetrics mets;
1987 		PerReadMetrics prm;
1988         DescentDriver dr;
1989 
1990         // Set up the read
1991 		//                012345678901234567890123456789
1992         BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
1993         BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
1994 		TIndexOffU top, bot;
1995 		top = bot = 0;
1996 		bool ret = ebwts.first->contains("GCGCTCGCATCATTTTGTGT", &top, &bot);
1997 		cerr << ret << ", " << top << ", " << bot << endl;
1998 		dr.initRead(
1999 			Read("test", seq.toZBuf(), qual.toZBuf()),
2000 			false, // nofw
2001 			false, // norc
2002 			-30,   // minsc
2003 			30);   // maxpen
2004 
2005         // Set up the DescentConfig
2006         DescentConfig conf;
2007         conf.cons.init(Ebwt::default_ftabChars, 1.0);
2008         conf.expol = DESC_EX_NONE;
2009 
2010         // Set up the search roots
2011         dr.addRoot(
2012             conf,   // DescentConfig
2013             (i == 0) ? 10 : (seq.length() - 1 - 10), // 5' offset into read of root
2014             (i == 0) ? true : false,                 // left-to-right?
2015             true,   // forward?
2016 			1,      // landing
2017             0.0f);   // root priority
2018 
2019         // Do the search
2020         Scoring sc = Scoring::base1();
2021         dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2022 
2023 		// Confirm that an exact-matching alignment was found
2024 		assert_eq(1, dr.sink().nrange());
2025 		assert_eq(2, dr.sink().nelt());
2026     }
2027 
2028 	delete ebwts.first;
2029 	delete ebwts.second;
2030 
2031 	strs.clear();
2032     strs.push_back(string("CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA"
2033                           "NNNNNNNNNN"
2034                           "CATGTCAGCTATATAGCG"));
2035 	ebwts = Ebwt::fromStrings<SString<char> >(
2036 		strs,
2037 		packed,
2038 		0,
2039 		REF_READ_REVERSE,
2040 		Ebwt::default_bigEndian,
2041 		Ebwt::default_lineRate,
2042 		Ebwt::default_offRate,
2043 		Ebwt::default_ftabChars,
2044 		".aligner_seed2.cpp.tmp",
2045 		Ebwt::default_useBlockwise,
2046 		Ebwt::default_bmax,
2047 		Ebwt::default_bmaxMultSqrt,
2048 		Ebwt::default_bmaxDivN,
2049 		Ebwt::default_dcv,
2050 		Ebwt::default_seed,
2051 		false,  // verbose
2052 		false,  // autoMem
2053 		false); // sanity
2054 
2055     ebwts.first->loadIntoMemory (0, -1, true, true, true, true, false);
2056     ebwts.second->loadIntoMemory(0,  1, true, true, true, true, false);
2057 
2058 	// Test to see if the root selector works as we expect
2059 	{
2060 		BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
2061 		BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
2062 		//                012345678901234567890123456789
2063 		//                         abcdefghiA: 644
2064 		//                                    CDEFGHIabc : 454 + 100 = 554
2065 		DescentDriver dr;
2066 		PrioritizedRootSelector sel(
2067 			2.0,
2068 			SimpleFunc(SIMPLE_FUNC_CONST, 10.0, 10.0, 10.0, 10.0), // 6 roots
2069 			10);
2070 		// Set up the read
2071 		dr.initRead(
2072 			Read("test", seq.toZBuf(), qual.toZBuf()),
2073 			false, // nofw
2074 			false, // norc
2075 			-30,   // minsc
2076 			30,    // maxpen
2077 			NULL,  // opposite mate
2078 			&sel); // root selector
2079 		dr.printRoots(std::cerr);
2080 		assert_eq(12, dr.roots().size());
2081 		assert_eq(652, dr.roots()[0].pri);
2082 		assert_eq(652, dr.roots()[1].pri);
2083 	}
2084 
2085 	// Test to see if the root selector works as we expect; this time the
2086 	// string is longer (64 chars) and there are a couple Ns
2087 	{
2088 		BTDnaString seq ("NCTATATAGCGCGCTCGCATCNTTTTGTGTGCTATATAGCGCGCTCGCATCATTTTGTGTTTAT", true);
2089 		BTString    qual("ABCDEFGHIJKLMNOPabcdefghijklmnopABCDEFGHIJKLMNOPabcdefghijklmnop");
2090 		//                0123456789012345678901234567890123456789012345678901234567890123
2091 		//                                 bcdefghijklmnop: 1080 - 150     bcdefghijklmnop: 1080 + 150 = 1230
2092 		//                                                 = 930
2093 		DescentDriver dr;
2094 		PrioritizedRootSelector sel(
2095 			2.0,
2096 			SimpleFunc(SIMPLE_FUNC_CONST, 10.0, 10.0, 10.0, 10.0), // 6 * 4 = 24 roots
2097 			15);  // landing size
2098 		// Set up the read
2099 		dr.initRead(
2100 			Read("test", seq.toZBuf(), qual.toZBuf()),
2101 			false, // nofw
2102 			false, // norc
2103 			-30,   // minsc
2104 			30,    // maxpen
2105 			NULL,  // opposite mate
2106 			&sel); // root selector
2107 		dr.printRoots(std::cerr);
2108 		assert_eq(24, dr.roots().size());
2109 		assert_eq(1230, dr.roots()[0].pri);
2110 		assert_eq(1230, dr.roots()[1].pri);
2111 	}
2112 
2113 	// Query is longer than ftab and matches exactly once.  One search root for
2114 	// forward read.
2115 	{
2116 		size_t last_topf = std::numeric_limits<size_t>::max();
2117 		size_t last_botf = std::numeric_limits<size_t>::max();
2118 		for(int i = 0; i < 2; i++) {
2119 			BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
2120 			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
2121 			for(size_t j = 0; j < seq.length(); j++) {
2122 				cerr << "Test " << (++testnum) << endl;
2123 				cerr << "  Query with length greater than ftab and matches exactly once" << endl;
2124 				DescentMetrics mets;
2125 				PerReadMetrics prm;
2126 				DescentDriver dr;
2127 
2128 				// Set up the read
2129 				dr.initRead(
2130 					Read("test", seq.toZBuf(), qual.toZBuf()),
2131 					false, // nofw
2132 					false, // norc
2133 					-30,   // minsc
2134 					30);   // maxpen
2135 
2136 				// Set up the DescentConfig
2137 				DescentConfig conf;
2138 				conf.cons.init(Ebwt::default_ftabChars, 1.0);
2139 				conf.expol = DESC_EX_NONE;
2140 
2141 				// Set up the search roots
2142 				dr.addRoot(
2143 					conf,   // DescentConfig
2144 					j,      // 5' offset into read of root
2145 					i == 0, // left-to-right?
2146 					true,   // forward?
2147 					1,      // landing
2148 					0.0f);   // root priority
2149 
2150 				// Do the search
2151 				Scoring sc = Scoring::base1();
2152 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2153 
2154 				// Confirm that an exact-matching alignment was found
2155 				assert_eq(1, dr.sink().nrange());
2156 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2157 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2158 				assert_eq(1, dr.sink().nelt());
2159 			}
2160 		}
2161 	}
2162 
2163 	// Query is longer than ftab and its reverse complement matches exactly
2164 	// once.  Search roots on forward and reverse-comp reads.
2165 	{
2166 		size_t last_topf = std::numeric_limits<size_t>::max();
2167 		size_t last_botf = std::numeric_limits<size_t>::max();
2168 		for(int i = 0; i < 2; i++) {
2169 			BTDnaString seq ("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
2170 			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
2171 			for(size_t j = 0; j < seq.length(); j++) {
2172 				cerr << "Test " << (++testnum) << endl;
2173 				cerr << "  Query with length greater than ftab and reverse complement matches exactly once" << endl;
2174 				DescentMetrics mets;
2175 				PerReadMetrics prm;
2176 				DescentDriver dr;
2177 
2178 				// Set up the read
2179 				dr.initRead(
2180 					Read("test", seq.toZBuf(), qual.toZBuf()),
2181 					false, // nofw
2182 					false, // norc
2183 					-30,   // minsc
2184 					30);   // maxpen
2185 
2186 				// Set up the DescentConfig
2187 				DescentConfig conf;
2188 				conf.cons.init(Ebwt::default_ftabChars, 1.0);
2189 				conf.expol = DESC_EX_NONE;
2190 
2191 				// Set up the search roots
2192 				dr.addRoot(
2193 					conf,   // DescentConfig
2194 					j,      // 5' offset into read of root
2195 					i == 0, // left-to-right?
2196 					true,   // forward?
2197 					1,      // landing
2198 					0.0f);   // root priority
2199 				dr.addRoot(
2200 					conf,   // DescentConfig
2201 					j,      // 5' offset into read of root
2202 					i == 0, // left-to-right?
2203 					false,  // forward?
2204 					1,      // landing
2205 					1.0f);   // root priority
2206 
2207 				// Do the search
2208 				Scoring sc = Scoring::base1();
2209 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2210 
2211 				// Confirm that an exact-matching alignment was found
2212 				assert_eq(1, dr.sink().nrange());
2213 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2214 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2215 				assert_eq(1, dr.sink().nelt());
2216 			}
2217 		}
2218 	}
2219 
2220 	// Query is longer than ftab and matches exactly once with one mismatch
2221 	{
2222 		size_t last_topf = std::numeric_limits<size_t>::max();
2223 		size_t last_botf = std::numeric_limits<size_t>::max();
2224 		for(int i = 0; i < 2; i++) {
2225 			// Set up the read
2226 			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
2227 			//                ||||||||||||||||||||||||||||||
2228 			BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
2229 			//                012345678901234567890123456789
2230 			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
2231 			for(size_t k = 0; k < orig.length(); k++) {
2232 				BTDnaString seq = orig;
2233 				seq.set(seq[k] ^ 3, k);
2234 				for(size_t j = 0; j < seq.length(); j++) {
2235 					// Assume left-to-right
2236 					size_t beg = j;
2237 					size_t end = j + Ebwt::default_ftabChars;
2238 					// Mismatch penalty is 3, so we have to skip starting
2239 					// points that are within 2 from the mismatch
2240 					if((i > 0 && j > 0) || j == seq.length()-1) {
2241 						// Right-to-left
2242 						if(beg < Ebwt::default_ftabChars) {
2243 							beg = 0;
2244 						} else {
2245 							beg -= Ebwt::default_ftabChars;
2246 						}
2247 						end -= Ebwt::default_ftabChars;
2248 					}
2249 					size_t kk = k;
2250 					//if(rc) {
2251 					//	kk = seq.length() - k - 1;
2252 					//}
2253 					if(beg <= kk && end > kk) {
2254 						continue;
2255 					}
2256 					if((j > kk) ? (j - kk <= 2) : (kk - j <= 2)) {
2257 						continue;
2258 					}
2259 					cerr << "Test " << (++testnum) << endl;
2260 					cerr << "  Query with length greater than ftab and matches exactly once with 1mm" << endl;
2261 					DescentMetrics mets;
2262 					PerReadMetrics prm;
2263 					DescentDriver dr;
2264 
2265 					dr.initRead(
2266 						Read("test", seq.toZBuf(), qual.toZBuf()),
2267 						false, // nofw
2268 						false, // norc
2269 						-30,   // minsc
2270 						30);   // maxpen
2271 
2272 					// Set up the DescentConfig
2273 					DescentConfig conf;
2274 					// Changed
2275 					conf.cons.init(0, 1.0);
2276 					conf.expol = DESC_EX_NONE;
2277 
2278 					// Set up the search roots
2279 					dr.addRoot(
2280 						conf,    // DescentConfig
2281 						j,       // 5' offset into read of root
2282 						i == 0,  // left-to-right?
2283 						true,    // forward?
2284 						1,      // landing
2285 						0.0f);    // root priority
2286 
2287 					// Do the search
2288 					Scoring sc = Scoring::base1();
2289 					dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2290 
2291 					// Confirm that an exact-matching alignment was found
2292 					assert_eq(1, dr.sink().nrange());
2293 					assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2294 					assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2295 					cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
2296 					assert_eq(1, dr.sink().nelt());
2297 					last_topf = dr.sink()[0].topf;
2298 					last_botf = dr.sink()[0].botf;
2299 				}
2300 			}
2301 		}
2302     }
2303 
2304 	// Query is longer than ftab and matches exactly once with one N mismatch
2305 	{
2306 		size_t last_topf = std::numeric_limits<size_t>::max();
2307 		size_t last_botf = std::numeric_limits<size_t>::max();
2308 		for(int i = 0; i < 2; i++) {
2309 			// Set up the read
2310 			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
2311 			//                ||||||||||||||||||||||||||||||
2312 			BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
2313 			//                012345678901234567890123456789
2314 			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
2315 			for(size_t k = 0; k < orig.length(); k++) {
2316 				BTDnaString seq = orig;
2317 				seq.set(4, k);
2318 				for(size_t j = 0; j < seq.length(); j++) {
2319 					// Assume left-to-right
2320 					size_t beg = j;
2321 					size_t end = j + Ebwt::default_ftabChars;
2322 					// Mismatch penalty is 3, so we have to skip starting
2323 					// points that are within 2 from the mismatch
2324 					if((i > 0 && j > 0) || j == seq.length()-1) {
2325 						// Right-to-left
2326 						if(beg < Ebwt::default_ftabChars) {
2327 							beg = 0;
2328 						} else {
2329 							beg -= Ebwt::default_ftabChars;
2330 						}
2331 						end -= Ebwt::default_ftabChars;
2332 					}
2333 					if(beg <= k && end > k) {
2334 						continue;
2335 					}
2336 					if((j > k) ? (j - k <= 2) : (k - j <= 2)) {
2337 						continue;
2338 					}
2339 					cerr << "Test " << (++testnum) << endl;
2340 					cerr << "  Query with length greater than ftab and matches exactly once with 1mm" << endl;
2341 					DescentMetrics mets;
2342 					PerReadMetrics prm;
2343 					DescentDriver dr;
2344 
2345 					dr.initRead(
2346 						Read("test", seq.toZBuf(), qual.toZBuf()),
2347 						false, // nofw
2348 						false, // norc
2349 						-30,   // minsc
2350 						30);   // maxpen
2351 
2352 					// Set up the DescentConfig
2353 					DescentConfig conf;
2354 					// Changed
2355 					conf.cons.init(0, 1.0);
2356 					conf.expol = DESC_EX_NONE;
2357 
2358 					// Set up the search roots
2359 					dr.addRoot(
2360 						conf,   // DescentConfig
2361 						j,      // 5' offset into read of root
2362 						i == 0, // left-to-right?
2363 						true,   // forward?
2364 						1,      // landing
2365 						0.0f);   // root priority
2366 
2367 					// Do the search
2368 					Scoring sc = Scoring::base1();
2369 					dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2370 
2371 					// Confirm that an exact-matching alignment was found
2372 					assert_eq(1, dr.sink().nrange());
2373 					assert_eq(sc.n(40), dr.sink()[0].pen);
2374 					assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2375 					assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2376 					cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
2377 					assert_eq(1, dr.sink().nelt());
2378 					last_topf = dr.sink()[0].topf;
2379 					last_botf = dr.sink()[0].botf;
2380 				}
2381 			}
2382 		}
2383     }
2384 
2385 	// Throw a bunch of queries with a bunch of Ns in and try to force an assert
2386 	{
2387 		RandomSource rnd(79);
2388 		for(int i = 0; i < 2; i++) {
2389 			// Set up the read
2390 			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
2391 			//                ||||||||||||||||||||||||||||||
2392 			BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
2393 			//                012345678901234567890123456789
2394 			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
2395 			if(i == 1) {
2396 				orig.reverseComp();
2397 				qual.reverse();
2398 			}
2399 			for(size_t trials = 0; trials < 100; trials++) {
2400 				BTDnaString seq = orig;
2401 				size_t ns = 10;
2402 				for(size_t k = 0; k < ns; k++) {
2403 					size_t pos = rnd.nextU32() % seq.length();
2404 					seq.set(4, pos);
2405 				}
2406 
2407 				cerr << "Test " << (++testnum) << endl;
2408 				cerr << "  Query with a bunch of Ns" << endl;
2409 
2410 				DescentMetrics mets;
2411 				PerReadMetrics prm;
2412 				DescentDriver dr;
2413 
2414 				dr.initRead(
2415 					Read("test", seq.toZBuf(), qual.toZBuf()),
2416 					false, // nofw
2417 					false, // norc
2418 					-30,   // minsc
2419 					30);   // maxpen
2420 
2421 				// Set up the DescentConfig
2422 				DescentConfig conf;
2423 				// Changed
2424 				conf.cons.init(Ebwt::default_ftabChars, 1.0);
2425 				conf.expol = DESC_EX_NONE;
2426 
2427 				// Set up the search roots
2428 				for(size_t k = 0; k < ns; k++) {
2429 					size_t j = rnd.nextU32() % seq.length();
2430 					bool ltr = (rnd.nextU2() == 0) ? true : false;
2431 					bool fw = (rnd.nextU2() == 0) ? true : false;
2432 					dr.addRoot(
2433 						conf,   // DescentConfig
2434 						j,      // 5' offset into read of root
2435 						ltr,    // left-to-right?
2436 						fw,     // forward?
2437 						1,      // landing
2438 						0.0f);   // root priority
2439 				}
2440 
2441 				// Do the search
2442 				Scoring sc = Scoring::base1();
2443 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2444 			}
2445 		}
2446     }
2447 
2448 	// Query is longer than ftab and matches exactly once with one mismatch
2449 	{
2450 		RandomSource rnd(77);
2451 		size_t last_topf = std::numeric_limits<size_t>::max();
2452 		size_t last_botf = std::numeric_limits<size_t>::max();
2453 		for(int i = 0; i < 2; i++) {
2454 			// Set up the read
2455 			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
2456 			//                ||||||||||||||||||||||||||||||
2457 			BTDnaString orig("GCTATATAGCGCGCTCGCATCATTTTGTGT", true);
2458 			//                012345678901234567890123456789
2459 			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIabc");
2460 			//       revcomp: ACACAAAATGATGCGAGCGCGCTATATAGC
2461 			//       revqual: cbaIHGFEDCBAihgfedcbaIHGFEDCBA
2462 			bool fwi = (i == 0);
2463 			if(!fwi) {
2464 				orig.reverseComp();
2465 			}
2466 			for(size_t k = 0; k < orig.length(); k++) {
2467 				BTDnaString seq = orig;
2468 				seq.set(seq[k] ^ 3, k);
2469 				cerr << "Test " << (++testnum) << endl;
2470 				cerr << "  Query with length greater than ftab and matches exactly once with 1mm.  Many search roots." << endl;
2471 				DescentMetrics mets;
2472 				PerReadMetrics prm;
2473 				DescentDriver dr;
2474 
2475 				dr.initRead(
2476 					Read("test", seq.toZBuf(), qual.toZBuf()),
2477 					false, // nofw
2478 					false, // norc
2479 					-30,   // minsc
2480 					30);   // maxpen
2481 
2482 				// Set up the DescentConfig
2483 				DescentConfig conf;
2484 				// Changed
2485 				conf.cons.init(0, 1.0);
2486 				conf.expol = DESC_EX_NONE;
2487 
2488 				// Set up several random search roots
2489 				bool onegood = false;
2490 				for(size_t y = 0; y < 10; y++) {
2491 					size_t j = rnd.nextU32() % seq.length();
2492 					bool ltr = (rnd.nextU2() == 0) ? true : false;
2493 					bool fw = (rnd.nextU2() == 0) ? true : false;
2494 					dr.addRoot(
2495 						conf,     // DescentConfig
2496 						(TReadOff)j,        // 5' offset into read of root
2497 						ltr,      // left-to-right?
2498 						fw,       // forward?
2499 						1,        // landing
2500 						(float)((float)y * 1.0f)); // root priority
2501 					// Assume left-to-right
2502 					size_t beg = j;
2503 					size_t end = j + Ebwt::default_ftabChars;
2504 					// Mismatch penalty is 3, so we have to skip starting
2505 					// points that are within 2 from the mismatch
2506 					if(!ltr) {
2507 						// Right-to-left
2508 						if(beg < Ebwt::default_ftabChars) {
2509 							beg = 0;
2510 						} else {
2511 							beg -= Ebwt::default_ftabChars;
2512 						}
2513 						end -= Ebwt::default_ftabChars;
2514 					}
2515 					bool good = true;
2516 					if(fw != fwi) {
2517 						good = false;
2518 					}
2519 					if(beg <= k && end > k) {
2520 						good = false;
2521 					}
2522 					if((j > k) ? (j - k <= 2) : (k - j <= 2)) {
2523 						good = false;
2524 					}
2525 					if(good) {
2526 						onegood = true;
2527 					}
2528 				}
2529 				if(!onegood) {
2530 					continue;
2531 				}
2532 
2533 				// Do the search
2534 				Scoring sc = Scoring::base1();
2535 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2536 
2537 				// Confirm that an exact-matching alignment was found
2538 				assert_eq(1, dr.sink().nrange());
2539 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2540 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2541 				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
2542 				assert_eq(1, dr.sink().nelt());
2543 				last_topf = dr.sink()[0].topf;
2544 				last_botf = dr.sink()[0].botf;
2545 			}
2546 		}
2547     }
2548 
2549 	// Query is longer than ftab and matches exactly once with one read gap
2550 	{
2551 		size_t last_topf = std::numeric_limits<size_t>::max();
2552 		size_t last_botf = std::numeric_limits<size_t>::max();
2553 		for(int i = 0; i < 2; i++) {
2554 		for(int k = 0; k < 2; k++) {
2555 			// Set up the read
2556 			//                GCTATATAGCGCGCCTGCATCATTTTGTGT
2557 			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
2558 			//                |||||||||||||||///////////////
2559 			BTDnaString seq ("GCTATATAGCGCGCTGCATCATTTTGTGT", true);
2560 			//                01234567890123456789012345678
2561 			//                87654321098765432109876543210
2562 			BTString    qual("ABCDEFGHIabcdefghiABCDEFGHIab");
2563 			if(k == 1) {
2564 				seq.reverseComp();
2565 				qual.reverse();
2566 			}
2567 			assert_eq(seq.length(), qual.length());
2568 			// js iterate over offsets from 5' end for the search root
2569 			for(size_t j = 0; j < seq.length(); j++) {
2570 				// Assume left-to-right
2571 				size_t beg = j;
2572 				if(k == 1) {
2573 					beg = seq.length() - beg - 1;
2574 				}
2575 				size_t end = beg + Ebwt::default_ftabChars;
2576 				// Mismatch penalty is 3, so we have to skip starting
2577 				// points that are within 2 from the mismatch
2578 				if((i > 0 && j > 0) || j == seq.length()-1) {
2579 					// Right-to-left
2580 					if(beg < Ebwt::default_ftabChars) {
2581 						beg = 0;
2582 					} else {
2583 						beg -= Ebwt::default_ftabChars;
2584 					}
2585 					end -= Ebwt::default_ftabChars;
2586 				}
2587 				assert_geq(end, beg);
2588 				if(beg <= 15 && end >= 15) {
2589 					continue;
2590 				}
2591 				cerr << "Test " << (++testnum) << endl;
2592 				cerr << "  Query matches once with a read gap of length 1" << endl;
2593 				DescentMetrics mets;
2594 				PerReadMetrics prm;
2595 				DescentDriver dr;
2596 
2597 				Read q("test", seq.toZBuf(), qual.toZBuf());
2598 				assert(q.repOk());
2599 				dr.initRead(
2600 					q,     // read
2601 					false, // nofw
2602 					false, // norc
2603 					-30,   // minsc
2604 					30);   // maxpen
2605 
2606 				// Set up the DescentConfig
2607 				DescentConfig conf;
2608 				// Changed
2609 				conf.cons.init(0, 0.5);
2610 				conf.expol = DESC_EX_NONE;
2611 
2612 				// Set up the search roots
2613 				dr.addRoot(
2614 					conf,   // DescentConfig
2615 					j,      // 5' offset into read of root
2616 					i == 0, // left-to-right?
2617 					k == 0, // forward?
2618 					1,      // landing
2619 					0.0f);  // root priority
2620 
2621 				// Do the search
2622 				Scoring sc = Scoring::base1();
2623 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2624 
2625 				// Confirm that an exact-matching alignment was found
2626 				assert_eq(1, dr.sink().nrange());
2627 				assert_eq(sc.readGapOpen() + 0 * sc.readGapExtend(), dr.sink()[0].pen);
2628 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2629 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2630 				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
2631 				assert_eq(1, dr.sink().nelt());
2632 				last_topf = dr.sink()[0].topf;
2633 				last_botf = dr.sink()[0].botf;
2634 			}
2635 		}}
2636     }
2637 
2638 	// Query is longer than ftab and matches exactly once with one read gap of
2639 	// length 3
2640 	{
2641 		size_t last_topf = std::numeric_limits<size_t>::max();
2642 		size_t last_botf = std::numeric_limits<size_t>::max();
2643 		for(int i = 0; i < 2; i++) {
2644 		for(int k = 0; k < 2; k++) {
2645 			// Set up the read
2646 			//                GCTATATAGCGCGCGCTCATCATTTTGTGT
2647 			//    Ref: CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAACCA
2648 			//                ||||||||||||||   |||||||||||||
2649 			BTDnaString seq ("GCTATATAGCGCGC" "CATCATTTTGTGT", true);
2650 			//                01234567890123   4567890123456
2651 			//                65432109876543   2109876543210
2652 			BTString    qual("ABCDEFGHIabcde" "fghiABCDEFGHI");
2653 			if(k == 1) {
2654 				seq.reverseComp();
2655 				qual.reverse();
2656 			}
2657 			for(size_t j = 0; j < seq.length(); j++) {
2658 				// Assume left-to-right
2659 				size_t beg = j;
2660 				if(k == 1) {
2661 					beg = seq.length() - beg - 1;
2662 				}
2663 				size_t end = beg + Ebwt::default_ftabChars;
2664 				// Mismatch penalty is 3, so we have to skip starting
2665 				// points that are within 2 from the mismatch
2666 				if((i > 0 && j > 0) || j == seq.length()-1) {
2667 					// Right-to-left
2668 					if(beg < Ebwt::default_ftabChars) {
2669 						beg = 0;
2670 					} else {
2671 						beg -= Ebwt::default_ftabChars;
2672 					}
2673 					end -= Ebwt::default_ftabChars;
2674 				}
2675 				if(beg <= 14 && end >= 14) {
2676 					continue;
2677 				}
2678 				cerr << "Test " << (++testnum) << endl;
2679 				cerr << "  Query matches once with a read gap of length 3" << endl;
2680 				DescentMetrics mets;
2681 				PerReadMetrics prm;
2682 				DescentDriver dr;
2683 
2684 				dr.initRead(
2685 					Read("test", seq.toZBuf(), qual.toZBuf()),
2686 					false, // nofw
2687 					false, // norc
2688 					-30,   // minsc
2689 					30);   // maxpen
2690 
2691 				// Set up the DescentConfig
2692 				DescentConfig conf;
2693 				// Changed
2694 				conf.cons.init(0, 0.2);
2695 				conf.expol = DESC_EX_NONE;
2696 
2697 				// Set up the search roots
2698 				dr.addRoot(
2699 					conf,   // DescentConfig
2700 					j,      // 5' offset into read of root
2701 					i == 0, // left-to-right?
2702 					k == 0, // forward?
2703 					1,      // landing
2704 					0.0f);  // root priority
2705 
2706 				// Do the search
2707 				Scoring sc = Scoring::base1();
2708 				// Need to adjust the mismatch penalty up to avoid alignments
2709 				// with lots of mismatches.
2710 				sc.setMmPen(COST_MODEL_CONSTANT, 6, 6);
2711 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2712 
2713 				// Confirm that an exact-matching alignment was found
2714 				assert_eq(1, dr.sink().nrange());
2715 				assert_eq(sc.readGapOpen() + 2 * sc.readGapExtend(), dr.sink()[0].pen);
2716 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2717 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2718 				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
2719 				assert_eq(1, dr.sink().nelt());
2720 				last_topf = dr.sink()[0].topf;
2721 				last_botf = dr.sink()[0].botf;
2722 			}
2723 		}}
2724     }
2725 
2726 	// Query is longer than ftab and matches exactly once with one reference gap
2727 	{
2728 		size_t last_topf = std::numeric_limits<size_t>::max();
2729 		size_t last_botf = std::numeric_limits<size_t>::max();
2730 		for(int i = 0; i < 2; i++) {
2731 			// Set up the read
2732 			//    Ref: CATGTCAGCTATATAGCGCGC" "TCGCATCATTTTGTGTGTAAACCA
2733 			//                ||||||||||||||   ||||||||||||||||
2734 			BTDnaString seq ("GCTATATAGCGCGCA""TCGCATCATTTTGTGT", true);
2735 			//                012345678901234  5678901234567890
2736 			BTString    qual("ABCDEFGHIabcdef""ghiABCDEFGHIabcd");
2737 			for(size_t j = 0; j < seq.length(); j++) {
2738 				// Assume left-to-right
2739 				size_t beg = j;
2740 				size_t end = j + Ebwt::default_ftabChars;
2741 				// Mismatch penalty is 3, so we have to skip starting
2742 				// points that are within 2 from the mismatch
2743 				if((i > 0 && j > 0) || j == seq.length()-1) {
2744 					// Right-to-left
2745 					if(beg < Ebwt::default_ftabChars) {
2746 						beg = 0;
2747 					} else {
2748 						beg -= Ebwt::default_ftabChars;
2749 					}
2750 					end -= Ebwt::default_ftabChars;
2751 				}
2752 				if(beg <= 14 && end >= 14) {
2753 					continue;
2754 				}
2755 				cerr << "Test " << (++testnum) << endl;
2756 				cerr << "  Query matches once with a reference gap of length 1" << endl;
2757 				DescentMetrics mets;
2758 				PerReadMetrics prm;
2759 				DescentDriver dr;
2760 
2761 				dr.initRead(
2762 					Read("test", seq.toZBuf(), qual.toZBuf()),
2763 					false, // nofw
2764 					false, // norc
2765 					-30,   // minsc
2766 					30);   // maxpen
2767 
2768 				// Set up the DescentConfig
2769 				DescentConfig conf;
2770 				// Changed
2771 				conf.cons.init(1, 0.5);
2772 				conf.expol = DESC_EX_NONE;
2773 
2774 				// Set up the search roots
2775 				dr.addRoot(
2776 					conf,   // DescentConfig
2777 					j,      // 5' offset into read of root
2778 					i == 0, // left-to-right?
2779 					true,   // forward?
2780 					1,      // landing
2781 					0.0f);  // root priority
2782 
2783 				// Do the search
2784 				Scoring sc = Scoring::base1();
2785 				// Need to adjust the mismatch penalty up to avoid alignments
2786 				// with lots of mismatches.
2787 				sc.setMmPen(COST_MODEL_CONSTANT, 6, 6);
2788 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2789 
2790 				// Confirm that an exact-matching alignment was found
2791 				assert_eq(1, dr.sink().nrange());
2792 				assert_eq(sc.refGapOpen() + 0 * sc.refGapExtend(), dr.sink()[0].pen);
2793 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2794 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2795 				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
2796 				assert_eq(1, dr.sink().nelt());
2797 				last_topf = dr.sink()[0].topf;
2798 				last_botf = dr.sink()[0].botf;
2799 			}
2800 		}
2801     }
2802 
2803 	// Query is longer than ftab and matches exactly once with one reference gap
2804 	{
2805 		size_t last_topf = std::numeric_limits<size_t>::max();
2806 		size_t last_botf = std::numeric_limits<size_t>::max();
2807 		for(int i = 0; i < 2; i++) {
2808 			// Set up the read
2809 			//    Ref: CATGTCAGCTATATAGCGCGC"   "TCGCATCATTTTGTGTGTAAACCA
2810 			//                ||||||||||||||     ||||||||||||||||
2811 			BTDnaString seq ("GCTATATAGCGCGCATG""TCGCATCATTTTGTGT", true);
2812 			//                01234567890123456  7890123456789012
2813 			BTString    qual("ABCDEFGHIabcdefgh""iABCDEFGHIabcdef");
2814 			for(size_t j = 0; j < seq.length(); j++) {
2815 				// Assume left-to-right
2816 				size_t beg = j;
2817 				size_t end = j + Ebwt::default_ftabChars;
2818 				// Mismatch penalty is 3, so we have to skip starting
2819 				// points that are within 2 from the mismatch
2820 				if((i > 0 && j > 0) || j == seq.length()-1) {
2821 					// Right-to-left
2822 					if(beg < Ebwt::default_ftabChars) {
2823 						beg = 0;
2824 					} else {
2825 						beg -= Ebwt::default_ftabChars;
2826 					}
2827 					end -= Ebwt::default_ftabChars;
2828 				}
2829 				if(beg <= 14 && end >= 14) {
2830 					continue;
2831 				}
2832 				if(beg <= 15 && end >= 15) {
2833 					continue;
2834 				}
2835 				if(beg <= 16 && end >= 16) {
2836 					continue;
2837 				}
2838 				cerr << "Test " << (++testnum) << endl;
2839 				cerr << "  Query matches once with a reference gap of length 1" << endl;
2840 				DescentMetrics mets;
2841 				PerReadMetrics prm;
2842 				DescentDriver dr;
2843 
2844 				dr.initRead(
2845 					Read("test", seq.toZBuf(), qual.toZBuf()),
2846 					false, // nofw
2847 					false, // norc
2848 					-30,   // minsc
2849 					30);   // maxpen
2850 
2851 				// Set up the DescentConfig
2852 				DescentConfig conf;
2853 				// Changed
2854 				conf.cons.init(1, 0.25);
2855 				conf.expol = DESC_EX_NONE;
2856 
2857 				// Set up the search roots
2858 				dr.addRoot(
2859 					conf,   // DescentConfig
2860 					j,      // 5' offset into read of root
2861 					i == 0, // left-to-right?
2862 					true,   // forward?
2863 					1,      // landing
2864 					0.0f);  // root priority
2865 
2866 				// Do the search
2867 				Scoring sc = Scoring::base1();
2868 				// Need to adjust the mismatch penalty up to avoid alignments
2869 				// with lots of mismatches.
2870 				sc.setMmPen(COST_MODEL_CONSTANT, 6, 6);
2871 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2872 
2873 				// Confirm that an exact-matching alignment was found
2874 				assert_eq(1, dr.sink().nrange());
2875 				assert_eq(sc.refGapOpen() + 2 * sc.refGapExtend(), dr.sink()[0].pen);
2876 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2877 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2878 				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
2879 				assert_eq(1, dr.sink().nelt());
2880 				last_topf = dr.sink()[0].topf;
2881 				last_botf = dr.sink()[0].botf;
2882 			}
2883 		}
2884     }
2885 
2886 	// Query is longer than ftab and matches exactly once with one read gap,
2887 	// one ref gap, and one mismatch
2888 	{
2889 		size_t last_topf = std::numeric_limits<size_t>::max();
2890 		size_t last_botf = std::numeric_limits<size_t>::max();
2891 		for(int i = 0; i < 2; i++) {
2892 			// Set up the read
2893 			//           Ref: CATGTCAGCT   ATATAGCGCGCT  CGCATCATTTTGTGTGTAAACCA
2894 			//                ||||||||||   ||||||||||||   |||||| |||||||||||||
2895 			BTDnaString seq ("CATGTCAGCT""GATATAGCGCGCT" "GCATCAATTTGTGTGTAAAC", true);
2896 			//                0123456789  0123456789012   34567890123456789012
2897 			BTString    qual("ABCDEFGHIa""bcdefghiACDEF" "GHIabcdefghijkABCDEF");
2898 			for(size_t j = 0; j < seq.length(); j++) {
2899 				// Assume left-to-right
2900 				size_t beg = j;
2901 				size_t end = j + Ebwt::default_ftabChars;
2902 				// Mismatch penalty is 3, so we have to skip starting
2903 				// points that are within 2 from the mismatch
2904 				if((i > 0 && j > 0) || j == seq.length()-1) {
2905 					// Right-to-left
2906 					if(beg < Ebwt::default_ftabChars) {
2907 						beg = 0;
2908 					} else {
2909 						beg -= Ebwt::default_ftabChars;
2910 					}
2911 					end -= Ebwt::default_ftabChars;
2912 				}
2913 				if(beg <= 10 && end >= 10) {
2914 					continue;
2915 				}
2916 				if(beg <= 22 && end >= 22) {
2917 					continue;
2918 				}
2919 				if(beg <= 30 && end >= 30) {
2920 					continue;
2921 				}
2922 				cerr << "Test " << (++testnum) << endl;
2923 				cerr << "  Query matches once with a read gap of length 1" << endl;
2924 				DescentMetrics mets;
2925 				PerReadMetrics prm;
2926 				DescentDriver dr;
2927 
2928 				dr.initRead(
2929 					Read("test", seq.toZBuf(), qual.toZBuf()),
2930 					false, // nofw
2931 					false, // norc
2932 					-50,   // minsc
2933 					50);   // maxpen
2934 
2935 				// Set up the DescentConfig
2936 				DescentConfig conf;
2937 				// Changed
2938 				conf.cons.init(1, 0.5);
2939 				conf.expol = DESC_EX_NONE;
2940 
2941 				// Set up the search roots
2942 				dr.addRoot(
2943 					conf,   // DescentConfig
2944 					j,      // 5' offset into read of root
2945 					i == 0, // left-to-right?
2946 					true,   // forward?
2947 					1,      // landing
2948 					0.0f);  // root priority
2949 
2950 				// Do the search
2951 				Scoring sc = Scoring::base1();
2952 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
2953 
2954 				// Confirm that an exact-matching alignment was found
2955 				assert_eq(1, dr.sink().nrange());
2956 				assert_eq(sc.readGapOpen() + sc.refGapOpen() + sc.mm((int)'d' - 33), dr.sink()[0].pen);
2957 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
2958 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
2959 				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
2960 				assert_eq(1, dr.sink().nelt());
2961 				last_topf = dr.sink()[0].topf;
2962 				last_botf = dr.sink()[0].botf;
2963 			}
2964 		}
2965     }
2966 
2967 	delete ebwts.first;
2968 	delete ebwts.second;
2969 
2970 	//  Ref CATGTCAGCT-ATATAGCGCGCTCGCATCATTTTGTGTGTAAAC
2971 	//      |||||||||| |||||||||||| |||||| |||||||||||||
2972 	//  Rd  CATGTCAGCTGATATAGCGCGCT-GCATCAATTTGTGTGTAAAC
2973 	strs.clear();
2974     strs.push_back(string("CATGTCAGCTATATAGCGCGCTCGCATCATTTTGTGTGTAAAC"
2975                           "NNNNNNNNNN"
2976                           "CATGTCAGCTGATATAGCGCGCTCGCATCATTTTGTGTGTAAAC" // same but without first ref gap
2977                           "N"
2978                           "CATGTCAGCTATATAGCGCGCTGCATCATTTTGTGTGTAAAC" // same but without first read gap
2979                           "N"
2980                           "CATGTCAGCTATATAGCGCGCTCGCATCAATTTGTGTGTAAAC" // same but without first mismatch
2981                           "N"
2982                           "CATGTCAGCTGATATAGCGCGCTGCATCAATTTGTGTGTAAAC" // Exact match for read
2983 						  ));
2984 	ebwts = Ebwt::fromStrings<SString<char> >(
2985 		strs,
2986 		packed,
2987 		0,
2988 		REF_READ_REVERSE,
2989 		Ebwt::default_bigEndian,
2990 		Ebwt::default_lineRate,
2991 		Ebwt::default_offRate,
2992 		Ebwt::default_ftabChars,
2993 		".aligner_seed2.cpp.tmp",
2994 		Ebwt::default_useBlockwise,
2995 		Ebwt::default_bmax,
2996 		Ebwt::default_bmaxMultSqrt,
2997 		Ebwt::default_bmaxDivN,
2998 		Ebwt::default_dcv,
2999 		Ebwt::default_seed,
3000 		false,  // verbose
3001 		false,  // autoMem
3002 		false); // sanity
3003 
3004     ebwts.first->loadIntoMemory (0, -1, true, true, true, true, false);
3005     ebwts.second->loadIntoMemory(0,  1, true, true, true, true, false);
3006 
3007 	// Query is longer than ftab and matches exactly once with one read gap,
3008 	// one ref gap, and one mismatch
3009 	{
3010 		size_t last_topf = std::numeric_limits<size_t>::max();
3011 		size_t last_botf = std::numeric_limits<size_t>::max();
3012 		for(int i = 0; i < 2; i++) {
3013 			// Set up the read
3014 			//           Ref: CATGTCAGCT   ATATAGCGCGCT  CGCATCATTTTGTGTGTAAACCA
3015 			//                ||||||||||   ||||||||||||   |||||| |||||||||||||
3016 			BTDnaString seq ("CATGTCAGCT""GATATAGCGCGCT" "GCATCAATTTGTGTGTAAAC", true);
3017 			//                0123456789  0123456789012   34567890123456789012
3018 			BTString    qual("ABCDEFGHIa""bcdefghiACDEF" "GHIabcdefghijkABCDEF");
3019 			for(size_t j = 0; j < seq.length(); j++) {
3020 				// Assume left-to-right
3021 				size_t beg = j;
3022 				size_t end = j + Ebwt::default_ftabChars;
3023 				// Mismatch penalty is 3, so we have to skip starting
3024 				// points that are within 2 from the mismatch
3025 				if((i > 0 && j > 0) || j == seq.length()-1) {
3026 					// Right-to-left
3027 					if(beg < Ebwt::default_ftabChars) {
3028 						beg = 0;
3029 					} else {
3030 						beg -= Ebwt::default_ftabChars;
3031 					}
3032 					end -= Ebwt::default_ftabChars;
3033 				}
3034 				if(beg <= 10 && end >= 10) {
3035 					continue;
3036 				}
3037 				if(beg <= 22 && end >= 22) {
3038 					continue;
3039 				}
3040 				if(beg <= 30 && end >= 30) {
3041 					continue;
3042 				}
3043 				cerr << "Test " << (++testnum) << endl;
3044 				cerr << "  Query matches once with a read gap of length 1" << endl;
3045 				DescentMetrics mets;
3046 				PerReadMetrics prm;
3047 				DescentDriver dr;
3048 
3049 				dr.initRead(
3050 					Read("test", seq.toZBuf(), qual.toZBuf()),
3051 					false, // nofw
3052 					false, // norc
3053 					-50,   // minsc
3054 					50);   // maxpen
3055 
3056 				// Set up the DescentConfig
3057 				DescentConfig conf;
3058 				// Changed
3059 				conf.cons.init(1, 0.5);
3060 				conf.expol = DESC_EX_NONE;
3061 
3062 				// Set up the search roots
3063 				dr.addRoot(
3064 					conf,   // DescentConfig
3065 					j,      // 5' offset into read of root
3066 					i == 0, // left-to-right?
3067 					true,   // forward?
3068 					1,      // landing
3069 					0.0f);  // root priority
3070 
3071 				// Do the search
3072 				Scoring sc = Scoring::base1();
3073 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
3074 
3075 				// Confirm that an exact-matching alignment was found
3076 				assert_eq(5, dr.sink().nrange());
3077 				assert_eq(0, dr.sink()[0].pen);
3078 				assert_eq(min(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[1].pen);
3079 				assert_eq(max(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[2].pen);
3080 				assert_eq(sc.readGapOpen() + sc.refGapOpen(), dr.sink()[3].pen);
3081 				assert_eq(sc.readGapOpen() + sc.refGapOpen() + sc.mm((int)'d' - 33), dr.sink()[4].pen);
3082 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
3083 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
3084 				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
3085 				assert_eq(5, dr.sink().nelt());
3086 				last_topf = dr.sink()[0].topf;
3087 				last_botf = dr.sink()[0].botf;
3088 			}
3089 		}
3090     }
3091 
3092 	// Query is longer than ftab and matches exactly once with one read gap,
3093 	// one ref gap, one mismatch, and one N
3094 	{
3095 		size_t last_topf = std::numeric_limits<size_t>::max();
3096 		size_t last_botf = std::numeric_limits<size_t>::max();
3097 		for(int i = 0; i < 2; i++) {
3098 			// Set up the read
3099 			//           Ref: CATGTCAGCT   ATATAGCGCGCT  CGCATCATTTTGTGTGTAAACCA
3100 			//                ||||||||||   ||||||||||||   |||||| |||||| ||||||
3101 			BTDnaString seq ("CATGTCAGCT""GATATAGCGCGCT" "GCATCAATTTGTGNGTAAAC", true);
3102 			//                0123456789  0123456789012   34567890123456789012
3103 			BTString    qual("ABCDEFGHIa""bcdefghiACDEF" "GHIabcdefghijkABCDEF");
3104 			for(size_t j = 0; j < seq.length(); j++) {
3105 				// Assume left-to-right
3106 				size_t beg = j;
3107 				size_t end = j + Ebwt::default_ftabChars;
3108 				// Mismatch penalty is 3, so we have to skip starting
3109 				// points that are within 2 from the mismatch
3110 				if((i > 0 && j > 0) || j == seq.length()-1) {
3111 					// Right-to-left
3112 					if(beg < Ebwt::default_ftabChars) {
3113 						beg = 0;
3114 					} else {
3115 						beg -= Ebwt::default_ftabChars;
3116 					}
3117 					end -= Ebwt::default_ftabChars;
3118 				}
3119 				if(beg <= 10 && end >= 10) {
3120 					continue;
3121 				}
3122 				if(beg <= 22 && end >= 22) {
3123 					continue;
3124 				}
3125 				if(beg <= 30 && end >= 30) {
3126 					continue;
3127 				}
3128 				if(beg <= 36 && end >= 36) {
3129 					continue;
3130 				}
3131 				cerr << "Test " << (++testnum) << endl;
3132 				cerr << "  Query matches with various patterns of gaps, mismatches and Ns" << endl;
3133 				DescentMetrics mets;
3134 				PerReadMetrics prm;
3135 				DescentDriver dr;
3136 
3137 				dr.initRead(
3138 					Read("test", seq.toZBuf(), qual.toZBuf()),
3139 					false, // nofw
3140 					false, // norc
3141 					-50,   // minsc
3142 					50);   // maxpen
3143 
3144 				// Set up the DescentConfig
3145 				DescentConfig conf;
3146 				// Changed
3147 				conf.cons.init(1, 0.5);
3148 				conf.expol = DESC_EX_NONE;
3149 
3150 				// Set up the search roots
3151 				dr.addRoot(
3152 					conf,   // DescentConfig
3153 					j,      // 5' offset into read of root
3154 					i == 0, // left-to-right?
3155 					true,   // forward?
3156 					1,      // landing
3157 					0.0f);  // root priority
3158 
3159 				// Do the search
3160 				Scoring sc = Scoring::base1();
3161 				sc.setNPen(COST_MODEL_CONSTANT, 1);
3162 				dr.go(sc, *ebwts.first, *ebwts.second, mets, prm);
3163 
3164 				// Confirm that an exact-matching alignment was found
3165 				assert_eq(5, dr.sink().nrange());
3166 				assert_eq(sc.n(40), dr.sink()[0].pen);
3167 				assert_eq(sc.n(40) + min(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[1].pen);
3168 				assert_eq(sc.n(40) + max(sc.readGapOpen(), sc.refGapOpen()) + sc.mm((int)'d' - 33), dr.sink()[2].pen);
3169 				assert_eq(sc.n(40) + sc.readGapOpen() + sc.refGapOpen(), dr.sink()[3].pen);
3170 				assert_eq(sc.n(40) + sc.readGapOpen() + sc.refGapOpen() + sc.mm((int)'d' - 33), dr.sink()[4].pen);
3171 				assert(last_topf == std::numeric_limits<size_t>::max() || last_topf == dr.sink()[0].topf);
3172 				assert(last_botf == std::numeric_limits<size_t>::max() || last_botf == dr.sink()[0].botf);
3173 				cerr << dr.sink()[0].topf << ", " << dr.sink()[0].botf << endl;
3174 				assert_eq(5, dr.sink().nelt());
3175 				last_topf = dr.sink()[0].topf;
3176 				last_botf = dr.sink()[0].botf;
3177 			}
3178 		}
3179     }
3180 
3181     delete ebwts.first;
3182     delete ebwts.second;
3183 
3184 	cerr << "DONE" << endl;
3185 }
3186 #endif
3187 
3188