1 /****
2 DIAMOND protein aligner
3 Copyright (C) 2016-2020 Max Planck Society for the Advancement of Science e.V.
4                         Benjamin Buchfink
5 
6 Code developed by Benjamin Buchfink <benjamin.buchfink@tue.mpg.de>
7 
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 ****/
21 
22 // #define _ITERATOR_DEBUG_LEVEL 0
23 
24 #include <map>
25 #include <list>
26 #include <set>
27 #include "../basic/sequence.h"
28 #include "../basic/match.h"
29 #include "../stats/score_matrix.h"
30 //#include "../align/extend_ungapped.h"
31 #include "../output/output_format.h"
32 #include "../dp/hsp_traits.h"
33 #include "chaining.h"
34 #include "../util/util.h"
35 
36 using std::endl;
37 using std::cout;
38 using std::map;
39 using std::list;
40 using std::set;
41 using std::max;
42 using std::min;
43 
disjoint(list<Hsp_traits>::const_iterator begin,list<Hsp_traits>::const_iterator end,const Hsp_traits & t,int cutoff)44 bool disjoint(list<Hsp_traits>::const_iterator begin, list<Hsp_traits>::const_iterator end, const Hsp_traits &t, int cutoff)
45 {
46 	for (; begin != end; ++begin) {
47 		const double ot = t.subject_range.overlap_factor(begin->subject_range),
48 			oq = t.query_range.overlap_factor(begin->query_range);
49 		if ((1.0 - min(ot, oq)) * t.score / begin->score >= config.chaining_stacked_hsp_ratio)
50 			continue;
51 		if ((1.0 - max(ot, oq)) * t.score < cutoff)
52 			return false;
53 		//if (begin->partial_score(t) < cutoff || !begin->collinear(t))
54 		//if (!begin->disjoint(t) || !begin->collinear(t))
55 		//if (!begin->rel_disjoint(t))
56 		//	return false;
57 	}
58 	return true;
59 }
60 
disjoint(list<Hsp_traits>::const_iterator begin,list<Hsp_traits>::const_iterator end,const Diagonal_segment & d,int cutoff)61 bool disjoint(list<Hsp_traits>::const_iterator begin, list<Hsp_traits>::const_iterator end, const Diagonal_segment &d, int cutoff)
62 {
63 	for (; begin != end; ++begin) {
64 		const double ot = d.subject_range().overlap_factor(begin->subject_range),
65 			oq = d.query_range().overlap_factor(begin->query_range);
66 		if ((1.0 - min(ot, oq)) * d.score / begin->score >= config.chaining_stacked_hsp_ratio)
67 			continue;
68 		if ((1.0 - max(ot, oq)) * d.score < cutoff)
69 			return false;
70 		//if (begin->partial_score(d) < cutoff || !begin->collinear(d))
71 		//if (!begin->disjoint(d) || !begin->collinear(d))
72 		//if (!begin->rel_disjoint(d))
73 		//return false;
74 	}
75 	return true;
76 }
77 
clear_edges()78 void Diag_graph::clear_edges()
79 {
80 	edges.clear();
81 	for (vector<Diagonal_node>::iterator i = nodes.begin(); i < nodes.end(); ++i)
82 		i->deactivate();
83 }
84 
load(vector<Diagonal_segment>::const_iterator begin,vector<Diagonal_segment>::const_iterator end)85 void Diag_graph::load(vector<Diagonal_segment>::const_iterator begin, vector<Diagonal_segment>::const_iterator end)
86 {
87 	int d = std::numeric_limits<int>::min(), max_j_end = d;
88 	for (vector<Diagonal_segment>::const_iterator i = begin; i < end; ++i) {
89 		const int d2 = i->diag();
90 		if (d2 != d) {
91 			d = d2;
92 			nodes.push_back(*i);
93 			max_j_end = nodes.back().subject_end();
94 		}
95 		else if (max_j_end < i->j) {
96 			nodes.push_back(*i);
97 			max_j_end = std::max(max_j_end, nodes.back().subject_end());
98 		}
99 	}
100 }
101 
print(Sequence query,Sequence subject) const102 void Diag_graph::print(Sequence query, Sequence subject) const
103 {
104 	for (int k = 0; k < (int)nodes.size(); ++k) {
105 		const Diagonal_segment &d = nodes[k];
106 		cout << "Diag n=" << k << " i=" << d.i << " j=" << d.j << " d=" << d.diag() << " score=" << d.score << " len=" << d.len << endl;
107 		cout << Sequence(query, d.i, d.query_last()) << endl;
108 		cout << Sequence(subject, d.j, d.subject_last()) << endl;
109 	}
110 }
111 
top_node() const112 size_t Diag_graph::top_node() const
113 {
114 	int top_score = 0, score;
115 	size_t top_node = end;
116 	for (size_t k = 0; k < nodes.size(); ++k)
117 		if ((score = nodes[k].prefix_score) > top_score) {
118 			top_node = k;
119 			top_score = score;
120 		}
121 	return top_node;
122 }
123 
sort()124 void Diag_graph::sort()
125 {
126 	std::sort(nodes.begin(), nodes.end(), Diagonal_segment::cmp_subject);
127 }
128 
prune()129 void Diag_graph::prune() {
130 	vector<Diagonal_node> finished;
131 	list<Diagonal_node> window;
132 	for (const Diagonal_node &d : nodes) {
133 		size_t n = 0;
134 		for (list<Diagonal_node>::iterator i = window.begin(); i != window.end();) {
135 			if (i->subject_end() > d.j) {
136 				if (i->score >= d.score && i->j <= d.j && i->subject_end() >= d.subject_end())
137 					++n;
138 				++i;
139 			}
140 			else {
141 				finished.push_back(*i);
142 				i = window.erase(i);
143 			}
144 		}
145 		if (n <= config.chaining_range_cover)
146 			window.push_back(d);
147 	}
148 	for (const Diagonal_node &d : window)
149 		finished.push_back(d);
150 	nodes = std::move(finished);
151 }
152 
153 struct Link
154 {
LinkLink155 	Link():
156 		subject_pos1(-1)
157 	{}
LinkLink158 	Link(unsigned target, int query_pos, int subject_pos, int score1, int score2) :
159 		subject_pos1(subject_pos),
160 		query_pos1(query_pos),
161 		score1(score1),
162 		score2(score2)
163 	{}
164 	int subject_pos1, query_pos1, subject_pos2, query_pos2, score1, score2;
transposeLink165 	Link& transpose()
166 	{
167 		std::swap(subject_pos1, query_pos1);
168 		std::swap(subject_pos2, query_pos2);
169 		return *this;
170 	}
resetLink171 	void reset()
172 	{
173 		subject_pos1 = -1;
174 		score1 = 0;
175 		score2 = 0;
176 	}
177 };
178 
score_range(Sequence query,Sequence subject,int i,int j,int j_end)179 int score_range(Sequence query, Sequence subject, int i, int j, int j_end)
180 {
181 	int score = 0;
182 	while (j < j_end) {
183 		score += score_matrix(query[i], subject[j]);
184 		++i;
185 		++j;
186 	}
187 	return score;
188 }
189 
get_hgap_link(const Diagonal_segment & d1,const Diagonal_segment & d2,Sequence query,Sequence subject,Link & l,int padding)190 int get_hgap_link(const Diagonal_segment &d1, const Diagonal_segment &d2, Sequence query, Sequence subject, Link &l, int padding)
191 {
192 	const int d = d1.diag() - d2.diag(),
193 		j2_end = std::min(std::max((int)d2.j, d1.subject_last() + d + 1 + padding), d2.subject_last());
194 	int j1;
195 	bool space;
196 	if (d1.subject_last() < d2.j - d - 1) {
197 		j1 = d1.subject_last();
198 		space = true;
199 	}
200 	else {
201 		j1 = std::max(d2.j - d - 1 - padding, d1.j);
202 		space = false;
203 	}
204 	int j2 = j1 + d + 1,
205 		i1 = d1.i + (j1 - d1.j),
206 		i2 = i1 + 1;
207 	//cout << "j2=" << j2 << " d2.subject_last=" << d2.subject_last() << endl;
208 	if (j2 > d2.subject_last()) {
209 		l.reset();
210 		return std::numeric_limits<int>::min();
211 	}
212 	int score1 = 0,
213 		//score2 = score_range(query, subject, i2, j2, d2.j + d2.len);
214 		score2 = score_range(query, subject, i2, j2, d2.j) + d2.score - score_range(query, subject, d2.i, d2.j, j2);
215 	int max_score = std::numeric_limits<int>::min();
216 	while (true) {
217 		//cout << "i1=" << i1 << " j1=" << j1 << " i2=" << i2 << " j2=" << j2 << " score1=" << score1 << " score2=" << score2 << " total=" << score1 + score2 << endl;
218 		if (score1 + score2 > max_score) {
219 			max_score = score1 + score2;
220 			l.query_pos1 = i1;
221 			l.subject_pos1 = j1;
222 			l.query_pos2 = i2;
223 			l.subject_pos2 = j2;
224 			l.score1 = score1;
225 			l.score2 = score2;
226 		}
227 		score2 -= score_matrix(query[i2], subject[j2]);
228 		++i1; ++i2; ++j1; ++j2;
229 		if (j2 > j2_end)
230 			break;
231 		score1 += score_matrix(query[i1], subject[j1]);
232 	}
233 	const int j1_end = j2_end - d;
234 	if (space)
235 		l.score1 = d1.score + l.score1;
236 	else
237 		l.score1 = d1.score - score_range(query, subject, d1.diag() + j1_end, j1_end, d1.subject_end()) + score_range(query, subject, d1.query_end(), d1.subject_end(), j1_end) - score1 + l.score1;
238 	return max_score;
239 }
240 
get_vgap_link(const Diagonal_segment & d1,const Diagonal_segment & d2,Sequence query,Sequence subject,Link & l,int padding)241 int get_vgap_link(const Diagonal_segment &d1, const Diagonal_segment &d2, Sequence query, Sequence subject, Link &l, int padding)
242 {
243 	int s = get_hgap_link(d1.transpose(), d2.transpose(), subject, query, l, padding);
244 	l.transpose();
245 	return s;
246 }
247 
get_link(const Diagonal_segment & d1,const Diagonal_segment & d2,Sequence query,Sequence subject,Link & l,int padding)248 int get_link(const Diagonal_segment &d1, const Diagonal_segment &d2, Sequence query, Sequence subject, Link &l, int padding)
249 {
250 	if (d1.diag() < d2.diag())
251 		return get_vgap_link(d1, d2, query, subject, l, padding);
252 	else
253 		return get_hgap_link(d1, d2, query, subject, l, padding);
254 }
255 
256 struct Greedy_aligner2
257 {
258 
259 	enum { link_padding = 10, reverse_link_min_overhang = 10 };
260 
get_approximate_linkGreedy_aligner2261 	int get_approximate_link(int d_idx, int e_idx, double space_penalty, int max_i)
262 	{
263 		Diagonal_node &d = diags[d_idx];
264 		Diagonal_node &e = diags[e_idx];
265 		const int shift = d.diag() - e.diag();
266 		int gap_score = shift != 0 ? -score_matrix.gap_open() - abs(shift)*score_matrix.gap_extend() : 0;
267 		const int space = shift > 0 ? d.j - e.subject_last() : d.i - e.query_last();
268 		int prefix_score = 0, link_score = 0, link_j, diff1 = 0, path_max, path_min, prefix_score_begin;
269 		if (space <= 0) {
270 			vector<Diag_graph::Edge>::const_iterator edge = diags.get_edge(d_idx, d.j);
271 			if (edge != diags.edges.end() && edge->prefix_score > e.prefix_score + gap_score + d.score)
272 				return 0;
273 			/*if (d.prefix_score > e.prefix_score + gap_score + d.score)
274 				return 0;*/
275 			Link link;
276 			if (get_link(e, d, query, subject, link, link_padding) > 0) {
277 				diff1 = e.score - link.score1;
278 				const int prefix_e = diags.prefix_score(e_idx, link.subject_pos1, path_max, path_min);
279 				prefix_score = prefix_e - diff1 + gap_score + link.score2;
280 				vector<Diag_graph::Edge>::const_iterator edge = diags.get_edge(d_idx, link.subject_pos2);
281 				if (edge != diags.edges.end() && edge->prefix_score > prefix_score)
282 					return 0;
283 				prefix_score_begin = prefix_score - link.score2;
284 				path_min = std::min(path_min, prefix_score - link.score2);
285 				if (prefix_e == path_max) {
286 					path_max -= diff1;
287 				}
288 				link_score = link.score1 + link.score2 + gap_score;
289 				link_j = link.subject_pos2;
290 				/*if (log)
291 					cout << "Link score1=" << link.score1 << " score2=" << link.score2 << " j1=" << link.subject_pos1 << " j2=" << link.subject_pos2 << endl;*/
292 			}
293 		}
294 		else {
295 			prefix_score = e.prefix_score + gap_score - int(space_penalty*std::max(space - 1, 0)) + d.score;
296 			vector<Diag_graph::Edge>::const_iterator edge = diags.get_edge(d_idx, d.j);
297 			if (edge != diags.edges.end() && edge->prefix_score > prefix_score)
298 				return 0;
299 			prefix_score_begin = prefix_score - d.score;
300 			path_max = e.path_max;
301 			path_min = e.path_min;
302 			path_min = std::min(path_min, prefix_score - d.score);
303 			link_score = e.score + d.score + gap_score;
304 			link_j = d.j;
305 		}
306 
307 		if (prefix_score > d.score) {
308 			path_max = std::max(path_max, prefix_score);
309 			diags.add_edge(Diag_graph::Edge(prefix_score, path_max, link_j, d_idx, e_idx, prefix_score == path_max ? prefix_score : path_min, prefix_score_begin));
310 			if (log)
311 				cout << "Link n=" << e_idx << " d=" << e.diag() << " i_end=" << e.query_end() << " max_i=" << max_i << " shift=" << shift << " space=" << space << " prefix_score=" << prefix_score << " link_score=" << link_score << " path_min="<<path_min<<endl;
312 		}
313 		return prefix_score;
314 	}
315 
316 	template<typename _it>
forward_passGreedy_aligner2317 	void forward_pass(_it begin, _it end, bool init, double space_penalty)
318 	{
319 		window.clear();
320 
321 		for (_it it = begin; it != end; ++it) {
322 
323 			unsigned node = (unsigned)(*it);
324 			if(init) diags.init(node);
325 			Diagonal_node& d = diags[node];
326 			const int dd = d.diag();
327 			if (log) cout << "Node " << node << " Score=" << d.score << endl;
328 			map<int, unsigned>::iterator i = window.find(dd), j;
329 			if (i == window.end())
330 				i = window.insert(std::make_pair(dd, node)).first;
331 
332 			j = i;
333 			int max_j = 0;
334 			if (i == window.begin())
335 				goto weiter;
336 			do {
337 				--j;
338 				Diagonal_node &e = diags[j->second];
339 				const int de = j->first, shift = dd - de;
340 				//if (d.j - e.subject_end() > max_dist) {
341 				if (e.prefix_score - int(space_penalty*(std::max(d.j - e.subject_end(), 0))) <= 0) {
342 					map<int, unsigned>::iterator k = j;
343 					if (j == window.begin()) {
344 						window.erase(j);
345 						break;
346 					}
347 					else {
348 						++k;
349 						window.erase(j);
350 						j = k;
351 						continue;
352 					}
353 				}
354 				if (e.subject_end() < max_j)
355 					continue;
356 				get_approximate_link(node, j->second, space_penalty, max_j);
357 				max_j = std::max(max_j, std::min(d.j, e.subject_end()));
358 				if (e.subject_end() - (d.subject_end() - std::min(e.diag() - d.diag(), 0)) >= reverse_link_min_overhang) {
359 					if (log)
360 						cout << "Computing reverse link node=" << j->second << endl;
361 					get_approximate_link(j->second, node, space_penalty, max_j);
362 				}
363 			} while (j != window.begin());
364 
365 			weiter:
366 			j = i;
367 			if (j->second == node)
368 				++j;
369 			int max_i = 0;
370 			while (j != window.end()) {
371 				Diagonal_node &e = diags[j->second];
372 				const int de = j->first, shift = dd - de;
373 				//if (d.j - e.subject_end() > max_dist && j != i) {
374 				if (e.prefix_score - int(space_penalty*(std::max(d.j - e.subject_end(),0))) <= 0 && j != i) {
375 					map<int, unsigned>::iterator k = j;
376 					++k;
377 					window.erase(j);
378 					j = k;
379 					continue;
380 				}
381 				if (e.query_end() < max_i) {
382 					++j;
383 					continue;
384 				}
385 				//if (get_approximate_link(node, j->second, space_penalty, max_i) > e.prefix_score)
386 				get_approximate_link(node, j->second, space_penalty, max_i);
387 				if (e.i < d.i)
388 					max_i = std::max(max_i, std::min(e.query_end(), d.i));
389 				if (e.subject_end() - (d.subject_end() - std::min(e.diag() - d.diag(), 0)) >= reverse_link_min_overhang) {
390 					if (log)
391 						cout << "Computing reverse link node=" << j->second << endl;
392 					get_approximate_link(j->second, node, space_penalty, max_i);
393 				}
394 				++j;
395 			}
396 			i->second = node;
397 
398 			if (log)
399 				cout << "Prefix_score=" << d.prefix_score << " path_max=" << d.path_max << " path_min=" << d.path_min << endl << endl;
400 		}
401 	}
402 
backtraceGreedy_aligner2403 	bool backtrace(size_t node, int j_end, Hsp *out, Hsp_traits &t, int score_max, int score_min, int max_shift, unsigned &next) const
404 	{
405 		const Diagonal_node &d = diags[node];
406 		vector<Diag_graph::Edge>::const_iterator f = diags.get_edge(node, j_end);
407 		bool at_end = f >= diags.edges.end();
408 		const int prefix_score = at_end ? d.score : f->prefix_score;
409 		if (prefix_score > score_max)
410 			return false;
411 
412 		int j;
413 		score_min = std::min(score_min, at_end ? 0 : f->prefix_score_begin);
414 
415 		//if (f != diags.edges.end() && (!stop_at_min || f->path_min == diags[f->node_out].path_min)) {
416 		if (!at_end) {
417 			const Diagonal_node &e = diags[f->node_out];
418 			const int shift = d.diag() - e.diag();
419 			j = f->j;
420 
421 			if (abs(shift) <= max_shift) {
422 				const bool bt = backtrace(f->node_out, shift > 0 ? j : j + shift, out, t, score_max, score_min, max_shift, next);
423 				if (!bt) {
424 					if (f->prefix_score_begin > score_min)
425 						return false;
426 					else
427 						at_end = true;
428 				}
429 			}
430 			else {
431 				next = f->node_out;
432 				at_end = true;
433 			}
434 		}
435 
436 		if (at_end) {
437 			if (out) {
438 				out->query_range.begin_ = d.i;
439 				out->subject_range.begin_ = d.j;
440 				out->score = score_max - score_min;
441 			}
442 			t.query_range.begin_ = d.i;
443 			t.subject_range.begin_ = d.j;
444 			t.score = score_max - score_min;
445 			j = d.j;
446 		}
447 		else {
448 			const Diagonal_node &e = diags[f->node_out];
449 			const int shift = d.diag() - e.diag();
450 			if (out) {
451 				if (shift > 0) {
452 					out->transcript.push_back(op_insertion, (unsigned)shift);
453 					out->length += shift;
454 				}
455 				else if (shift < 0) {
456 					for (int j2 = j + shift; j2 < j; ++j2) {
457 						out->transcript.push_back(op_deletion, subject[j2]);
458 						++out->length;
459 					}
460 				}
461 			}
462 		}
463 
464 		const int dd = d.diag();
465 		t.d_max = std::max(t.d_max, dd);
466 		t.d_min = std::min(t.d_min, dd);
467 
468 		if (out) {
469 			const int d2 = d.diag();
470 			if (log) cout << "Backtrace node=" << node << " i=" << d2 + j << "-" << d2 + j_end << " j=" << j << "-" << j_end << endl;
471 			for (; j < j_end; ++j) {
472 				const Letter s = subject[j], q = query[d2 + j];
473 				if (s == q) {
474 					out->transcript.push_back(op_match);
475 					++out->identities;
476 				}
477 				else
478 					out->transcript.push_back(op_substitution, s);
479 				++out->length;
480 			}
481 		}
482 		return true;
483 	}
484 
backtraceGreedy_aligner2485 	void backtrace(size_t top_node, Hsp *out, Hsp_traits &t, int max_shift, unsigned &next, int max_j) const
486 	{
487 		Hsp_traits traits(frame);
488 		if (top_node != Diag_graph::end) {
489 			const Diagonal_node &d = diags[top_node];
490 			if (out) {
491 				out->transcript.clear();
492 				out->query_range.end_ = d.query_end();
493 				out->subject_range.end_ = d.subject_end();
494 			}
495 			traits.subject_range.end_ = d.subject_end();
496 			traits.query_range.end_ = d.query_end();
497 			int score_min = d.prefix_score;
498 			backtrace(top_node, std::min(d.subject_end(), max_j), out, traits, d.prefix_score, score_min, max_shift, next);
499 		}
500 		else {
501 			traits.score = 0;
502 			if (out)
503 				out->score = 0;
504 		}
505 		if (out)
506 			out->transcript.push_terminator();
507 		t = traits;
508 	}
509 
backtraceGreedy_aligner2510 	int backtrace(size_t top_node, list<Hsp> &hsps, list<Hsp_traits> &ts, list<Hsp_traits>::iterator &t_begin, int cutoff, int max_shift) const
511 	{
512 		unsigned next;
513 		int max_score = 0, max_j = (int)subject.length();
514 		do {
515 			Hsp *hsp = log ? new Hsp(true) : 0;
516 			Hsp_traits t(frame);
517 			next = std::numeric_limits<unsigned>::max();
518 			backtrace(top_node, hsp, t, max_shift, next, max_j);
519 			if (t.score > 0)
520 				max_j = t.subject_range.begin_;
521 			if (t.score >= cutoff && disjoint(t_begin, ts.end(), t, cutoff)) {
522 				if (t_begin == ts.end()) {
523 					ts.push_back(t);
524 					t_begin = ts.end();
525 					t_begin--;
526 				}
527 				else
528 					ts.push_back(t);
529 				if (hsp)
530 					hsps.push_back(*hsp);
531 				max_score = std::max(max_score, t.score);
532 			}
533 			delete hsp;
534 			top_node = next;
535 		} while (next != std::numeric_limits<unsigned>::max());
536 		return max_score;
537 	}
538 
backtraceGreedy_aligner2539 	int backtrace(list<Hsp> &hsps, list<Hsp_traits> &ts, int cutoff, int max_shift) const
540 	{
541 		vector<Diagonal_node*> top_nodes;
542 		for (size_t i = 0; i < diags.nodes.size(); ++i) {
543 			Diagonal_node &d = diags.nodes[i];
544 			//cout << "node=" << i << " prefix_score=" << d.prefix_score << " path_max=" << d.path_max << " rel_score=" << d.rel_score() << " cutoff=" << cutoff << endl;
545 			//if (d.prefix_score >= cutoff && (d.prefix_score == d.path_max || d.prefix_score - d.path_min >= cutoff))
546 			if(d.rel_score() >= cutoff)
547 				top_nodes.push_back(&d);
548 		}
549 		std::sort(top_nodes.begin(), top_nodes.end(), Diagonal_node::cmp_rel_score);
550 		int max_score = 0;
551 		list<Hsp_traits>::iterator t_begin = ts.end();
552 
553 		for (vector<Diagonal_node*>::const_iterator i = top_nodes.begin(); i < top_nodes.end(); ++i) {
554 			const size_t node = *i - diags.nodes.data();
555 			if (log)
556 				cout << "Backtrace candidate node=" << node << endl;
557 			if (disjoint(t_begin, ts.end(), **i, cutoff)) {
558 				if (log)
559 					cout << "Backtrace node=" << node << " prefix_score=" << (*i)->prefix_score << " rel_score=" << (*i)->rel_score() << endl;
560 				max_score = std::max(max_score, backtrace(node, hsps, ts, t_begin, cutoff, max_shift));
561 				if (log)
562 					cout << endl;
563 			}
564 		}
565 		return max_score;
566 	}
567 
runGreedy_aligner2568 	int run(list<Hsp> &hsps, list<Hsp_traits> &ts, double space_penalty, int cutoff, int max_shift)
569 	{
570 		if (config.chaining_maxnodes > 0) {
571 			std::sort(diags.nodes.begin(), diags.nodes.end(), Diagonal_segment::cmp_score);
572 			if (diags.nodes.size() > config.chaining_maxnodes)
573 				diags.nodes.erase(diags.nodes.begin() + config.chaining_maxnodes, diags.nodes.end());
574 		}
575 		if (config.chaining_len_cap > 0.0 && diags.nodes.size() > config.chaining_min_nodes) {
576 			std::sort(diags.nodes.begin(), diags.nodes.end(), Diagonal_segment::cmp_score);
577 			const double cap = query.length() * config.chaining_len_cap;
578 			double total_len = 0.0;
579 			auto it = diags.nodes.begin();
580 			while (it < diags.nodes.end() && total_len < cap) {
581 				total_len += it->len;
582 				++it;
583 			}
584 			diags.nodes.erase(std::max(diags.nodes.begin() + config.chaining_min_nodes, it), diags.nodes.end());
585 		}
586 		diags.sort();
587 		diags.prune();
588 		if (log) {
589 			diags.print(query, subject);
590 			cout << endl << endl;
591 		}
592 
593 		forward_pass(Index_iterator(0llu), Index_iterator(diags.nodes.size()), true, space_penalty);
594 		int max_score = backtrace(hsps, ts, cutoff, max_shift);
595 
596 		if (log) {
597 			hsps.sort(Hsp::cmp_query_pos);
598 			for (list<Hsp>::iterator i = hsps.begin(); i != hsps.end(); ++i)
599 				print_hsp(*i, TranslatedSequence(query));
600 			cout << endl << "Smith-Waterman:" << endl;
601 			smith_waterman(query, subject, diags);
602 			cout << endl << endl;
603 		}
604 		return max_score;
605 	}
606 
runGreedy_aligner2607 	int run(list<Hsp> &hsps, list<Hsp_traits> &ts, vector<Diagonal_segment>::const_iterator begin, vector<Diagonal_segment>::const_iterator end, int band)
608 	{
609 		if (log)
610 			cout << "***** Seed hit run " << begin->diag() << '\t' << (end - 1)->diag() << '\t' << (end - 1)->diag() - begin->diag() << endl;
611 		diags.init();
612 		diags.load(begin, end);
613 		return run(hsps, ts, 0.1, 19, band);
614 	}
615 
Greedy_aligner2Greedy_aligner2616 	Greedy_aligner2(const Sequence &query, const Sequence &subject, bool log, unsigned frame) :
617 		query(query),
618 		subject(subject),
619 		//query_bc(query_bc),
620 		log(log),
621 		frame(frame)
622 	{
623 	}
624 
625 	const Sequence query, subject;
626 	//const Bias_correction &query_bc;
627 	const bool log;
628 	const unsigned frame;
629 	static thread_local Diag_graph diags;
630 	static thread_local map<int, unsigned> window;
631 
632 };
633 
634 thread_local Diag_graph Greedy_aligner2::diags;
635 thread_local map<int, unsigned> Greedy_aligner2::window;
636 
greedy_align(Sequence query,Sequence subject,vector<Diagonal_segment>::const_iterator begin,vector<Diagonal_segment>::const_iterator end,bool log,unsigned frame)637 std::pair<int, list<Hsp_traits>> greedy_align(Sequence query, Sequence subject, vector<Diagonal_segment>::const_iterator begin, vector<Diagonal_segment>::const_iterator end, bool log, unsigned frame)
638 {
639 	const int band = config.chaining_maxgap;
640 	if (end - begin == 1)
641 		return { begin->score, { { begin->diag(), begin->diag(), begin->score, (int)frame, begin->query_range(), begin->subject_range() } } };
642 	Greedy_aligner2 ga(query, subject, log, frame);
643 	list<Hsp> hsps;
644 	list<Hsp_traits> ts;
645 	int score = ga.run(hsps, ts, begin, end, band);
646 	return std::make_pair(score, std::move(ts));
647 }