1 /****
2 DIAMOND protein aligner
3 Copyright (C) 2016-2020 Max Planck Society for the Advancement of Science e.V.
4 Benjamin Buchfink
5
6 Code developed by Benjamin Buchfink <benjamin.buchfink@tue.mpg.de>
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 ****/
21
22 // #define _ITERATOR_DEBUG_LEVEL 0
23
24 #include <map>
25 #include <list>
26 #include <set>
27 #include "../basic/sequence.h"
28 #include "../basic/match.h"
29 #include "../stats/score_matrix.h"
30 //#include "../align/extend_ungapped.h"
31 #include "../output/output_format.h"
32 #include "../dp/hsp_traits.h"
33 #include "chaining.h"
34 #include "../util/util.h"
35
36 using std::endl;
37 using std::cout;
38 using std::map;
39 using std::list;
40 using std::set;
41 using std::max;
42 using std::min;
43
disjoint(list<Hsp_traits>::const_iterator begin,list<Hsp_traits>::const_iterator end,const Hsp_traits & t,int cutoff)44 bool disjoint(list<Hsp_traits>::const_iterator begin, list<Hsp_traits>::const_iterator end, const Hsp_traits &t, int cutoff)
45 {
46 for (; begin != end; ++begin) {
47 const double ot = t.subject_range.overlap_factor(begin->subject_range),
48 oq = t.query_range.overlap_factor(begin->query_range);
49 if ((1.0 - min(ot, oq)) * t.score / begin->score >= config.chaining_stacked_hsp_ratio)
50 continue;
51 if ((1.0 - max(ot, oq)) * t.score < cutoff)
52 return false;
53 //if (begin->partial_score(t) < cutoff || !begin->collinear(t))
54 //if (!begin->disjoint(t) || !begin->collinear(t))
55 //if (!begin->rel_disjoint(t))
56 // return false;
57 }
58 return true;
59 }
60
disjoint(list<Hsp_traits>::const_iterator begin,list<Hsp_traits>::const_iterator end,const Diagonal_segment & d,int cutoff)61 bool disjoint(list<Hsp_traits>::const_iterator begin, list<Hsp_traits>::const_iterator end, const Diagonal_segment &d, int cutoff)
62 {
63 for (; begin != end; ++begin) {
64 const double ot = d.subject_range().overlap_factor(begin->subject_range),
65 oq = d.query_range().overlap_factor(begin->query_range);
66 if ((1.0 - min(ot, oq)) * d.score / begin->score >= config.chaining_stacked_hsp_ratio)
67 continue;
68 if ((1.0 - max(ot, oq)) * d.score < cutoff)
69 return false;
70 //if (begin->partial_score(d) < cutoff || !begin->collinear(d))
71 //if (!begin->disjoint(d) || !begin->collinear(d))
72 //if (!begin->rel_disjoint(d))
73 //return false;
74 }
75 return true;
76 }
77
clear_edges()78 void Diag_graph::clear_edges()
79 {
80 edges.clear();
81 for (vector<Diagonal_node>::iterator i = nodes.begin(); i < nodes.end(); ++i)
82 i->deactivate();
83 }
84
load(vector<Diagonal_segment>::const_iterator begin,vector<Diagonal_segment>::const_iterator end)85 void Diag_graph::load(vector<Diagonal_segment>::const_iterator begin, vector<Diagonal_segment>::const_iterator end)
86 {
87 int d = std::numeric_limits<int>::min(), max_j_end = d;
88 for (vector<Diagonal_segment>::const_iterator i = begin; i < end; ++i) {
89 const int d2 = i->diag();
90 if (d2 != d) {
91 d = d2;
92 nodes.push_back(*i);
93 max_j_end = nodes.back().subject_end();
94 }
95 else if (max_j_end < i->j) {
96 nodes.push_back(*i);
97 max_j_end = std::max(max_j_end, nodes.back().subject_end());
98 }
99 }
100 }
101
print(Sequence query,Sequence subject) const102 void Diag_graph::print(Sequence query, Sequence subject) const
103 {
104 for (int k = 0; k < (int)nodes.size(); ++k) {
105 const Diagonal_segment &d = nodes[k];
106 cout << "Diag n=" << k << " i=" << d.i << " j=" << d.j << " d=" << d.diag() << " score=" << d.score << " len=" << d.len << endl;
107 cout << Sequence(query, d.i, d.query_last()) << endl;
108 cout << Sequence(subject, d.j, d.subject_last()) << endl;
109 }
110 }
111
top_node() const112 size_t Diag_graph::top_node() const
113 {
114 int top_score = 0, score;
115 size_t top_node = end;
116 for (size_t k = 0; k < nodes.size(); ++k)
117 if ((score = nodes[k].prefix_score) > top_score) {
118 top_node = k;
119 top_score = score;
120 }
121 return top_node;
122 }
123
sort()124 void Diag_graph::sort()
125 {
126 std::sort(nodes.begin(), nodes.end(), Diagonal_segment::cmp_subject);
127 }
128
prune()129 void Diag_graph::prune() {
130 vector<Diagonal_node> finished;
131 list<Diagonal_node> window;
132 for (const Diagonal_node &d : nodes) {
133 size_t n = 0;
134 for (list<Diagonal_node>::iterator i = window.begin(); i != window.end();) {
135 if (i->subject_end() > d.j) {
136 if (i->score >= d.score && i->j <= d.j && i->subject_end() >= d.subject_end())
137 ++n;
138 ++i;
139 }
140 else {
141 finished.push_back(*i);
142 i = window.erase(i);
143 }
144 }
145 if (n <= config.chaining_range_cover)
146 window.push_back(d);
147 }
148 for (const Diagonal_node &d : window)
149 finished.push_back(d);
150 nodes = std::move(finished);
151 }
152
153 struct Link
154 {
LinkLink155 Link():
156 subject_pos1(-1)
157 {}
LinkLink158 Link(unsigned target, int query_pos, int subject_pos, int score1, int score2) :
159 subject_pos1(subject_pos),
160 query_pos1(query_pos),
161 score1(score1),
162 score2(score2)
163 {}
164 int subject_pos1, query_pos1, subject_pos2, query_pos2, score1, score2;
transposeLink165 Link& transpose()
166 {
167 std::swap(subject_pos1, query_pos1);
168 std::swap(subject_pos2, query_pos2);
169 return *this;
170 }
resetLink171 void reset()
172 {
173 subject_pos1 = -1;
174 score1 = 0;
175 score2 = 0;
176 }
177 };
178
score_range(Sequence query,Sequence subject,int i,int j,int j_end)179 int score_range(Sequence query, Sequence subject, int i, int j, int j_end)
180 {
181 int score = 0;
182 while (j < j_end) {
183 score += score_matrix(query[i], subject[j]);
184 ++i;
185 ++j;
186 }
187 return score;
188 }
189
get_hgap_link(const Diagonal_segment & d1,const Diagonal_segment & d2,Sequence query,Sequence subject,Link & l,int padding)190 int get_hgap_link(const Diagonal_segment &d1, const Diagonal_segment &d2, Sequence query, Sequence subject, Link &l, int padding)
191 {
192 const int d = d1.diag() - d2.diag(),
193 j2_end = std::min(std::max((int)d2.j, d1.subject_last() + d + 1 + padding), d2.subject_last());
194 int j1;
195 bool space;
196 if (d1.subject_last() < d2.j - d - 1) {
197 j1 = d1.subject_last();
198 space = true;
199 }
200 else {
201 j1 = std::max(d2.j - d - 1 - padding, d1.j);
202 space = false;
203 }
204 int j2 = j1 + d + 1,
205 i1 = d1.i + (j1 - d1.j),
206 i2 = i1 + 1;
207 //cout << "j2=" << j2 << " d2.subject_last=" << d2.subject_last() << endl;
208 if (j2 > d2.subject_last()) {
209 l.reset();
210 return std::numeric_limits<int>::min();
211 }
212 int score1 = 0,
213 //score2 = score_range(query, subject, i2, j2, d2.j + d2.len);
214 score2 = score_range(query, subject, i2, j2, d2.j) + d2.score - score_range(query, subject, d2.i, d2.j, j2);
215 int max_score = std::numeric_limits<int>::min();
216 while (true) {
217 //cout << "i1=" << i1 << " j1=" << j1 << " i2=" << i2 << " j2=" << j2 << " score1=" << score1 << " score2=" << score2 << " total=" << score1 + score2 << endl;
218 if (score1 + score2 > max_score) {
219 max_score = score1 + score2;
220 l.query_pos1 = i1;
221 l.subject_pos1 = j1;
222 l.query_pos2 = i2;
223 l.subject_pos2 = j2;
224 l.score1 = score1;
225 l.score2 = score2;
226 }
227 score2 -= score_matrix(query[i2], subject[j2]);
228 ++i1; ++i2; ++j1; ++j2;
229 if (j2 > j2_end)
230 break;
231 score1 += score_matrix(query[i1], subject[j1]);
232 }
233 const int j1_end = j2_end - d;
234 if (space)
235 l.score1 = d1.score + l.score1;
236 else
237 l.score1 = d1.score - score_range(query, subject, d1.diag() + j1_end, j1_end, d1.subject_end()) + score_range(query, subject, d1.query_end(), d1.subject_end(), j1_end) - score1 + l.score1;
238 return max_score;
239 }
240
get_vgap_link(const Diagonal_segment & d1,const Diagonal_segment & d2,Sequence query,Sequence subject,Link & l,int padding)241 int get_vgap_link(const Diagonal_segment &d1, const Diagonal_segment &d2, Sequence query, Sequence subject, Link &l, int padding)
242 {
243 int s = get_hgap_link(d1.transpose(), d2.transpose(), subject, query, l, padding);
244 l.transpose();
245 return s;
246 }
247
get_link(const Diagonal_segment & d1,const Diagonal_segment & d2,Sequence query,Sequence subject,Link & l,int padding)248 int get_link(const Diagonal_segment &d1, const Diagonal_segment &d2, Sequence query, Sequence subject, Link &l, int padding)
249 {
250 if (d1.diag() < d2.diag())
251 return get_vgap_link(d1, d2, query, subject, l, padding);
252 else
253 return get_hgap_link(d1, d2, query, subject, l, padding);
254 }
255
256 struct Greedy_aligner2
257 {
258
259 enum { link_padding = 10, reverse_link_min_overhang = 10 };
260
get_approximate_linkGreedy_aligner2261 int get_approximate_link(int d_idx, int e_idx, double space_penalty, int max_i)
262 {
263 Diagonal_node &d = diags[d_idx];
264 Diagonal_node &e = diags[e_idx];
265 const int shift = d.diag() - e.diag();
266 int gap_score = shift != 0 ? -score_matrix.gap_open() - abs(shift)*score_matrix.gap_extend() : 0;
267 const int space = shift > 0 ? d.j - e.subject_last() : d.i - e.query_last();
268 int prefix_score = 0, link_score = 0, link_j, diff1 = 0, path_max, path_min, prefix_score_begin;
269 if (space <= 0) {
270 vector<Diag_graph::Edge>::const_iterator edge = diags.get_edge(d_idx, d.j);
271 if (edge != diags.edges.end() && edge->prefix_score > e.prefix_score + gap_score + d.score)
272 return 0;
273 /*if (d.prefix_score > e.prefix_score + gap_score + d.score)
274 return 0;*/
275 Link link;
276 if (get_link(e, d, query, subject, link, link_padding) > 0) {
277 diff1 = e.score - link.score1;
278 const int prefix_e = diags.prefix_score(e_idx, link.subject_pos1, path_max, path_min);
279 prefix_score = prefix_e - diff1 + gap_score + link.score2;
280 vector<Diag_graph::Edge>::const_iterator edge = diags.get_edge(d_idx, link.subject_pos2);
281 if (edge != diags.edges.end() && edge->prefix_score > prefix_score)
282 return 0;
283 prefix_score_begin = prefix_score - link.score2;
284 path_min = std::min(path_min, prefix_score - link.score2);
285 if (prefix_e == path_max) {
286 path_max -= diff1;
287 }
288 link_score = link.score1 + link.score2 + gap_score;
289 link_j = link.subject_pos2;
290 /*if (log)
291 cout << "Link score1=" << link.score1 << " score2=" << link.score2 << " j1=" << link.subject_pos1 << " j2=" << link.subject_pos2 << endl;*/
292 }
293 }
294 else {
295 prefix_score = e.prefix_score + gap_score - int(space_penalty*std::max(space - 1, 0)) + d.score;
296 vector<Diag_graph::Edge>::const_iterator edge = diags.get_edge(d_idx, d.j);
297 if (edge != diags.edges.end() && edge->prefix_score > prefix_score)
298 return 0;
299 prefix_score_begin = prefix_score - d.score;
300 path_max = e.path_max;
301 path_min = e.path_min;
302 path_min = std::min(path_min, prefix_score - d.score);
303 link_score = e.score + d.score + gap_score;
304 link_j = d.j;
305 }
306
307 if (prefix_score > d.score) {
308 path_max = std::max(path_max, prefix_score);
309 diags.add_edge(Diag_graph::Edge(prefix_score, path_max, link_j, d_idx, e_idx, prefix_score == path_max ? prefix_score : path_min, prefix_score_begin));
310 if (log)
311 cout << "Link n=" << e_idx << " d=" << e.diag() << " i_end=" << e.query_end() << " max_i=" << max_i << " shift=" << shift << " space=" << space << " prefix_score=" << prefix_score << " link_score=" << link_score << " path_min="<<path_min<<endl;
312 }
313 return prefix_score;
314 }
315
316 template<typename _it>
forward_passGreedy_aligner2317 void forward_pass(_it begin, _it end, bool init, double space_penalty)
318 {
319 window.clear();
320
321 for (_it it = begin; it != end; ++it) {
322
323 unsigned node = (unsigned)(*it);
324 if(init) diags.init(node);
325 Diagonal_node& d = diags[node];
326 const int dd = d.diag();
327 if (log) cout << "Node " << node << " Score=" << d.score << endl;
328 map<int, unsigned>::iterator i = window.find(dd), j;
329 if (i == window.end())
330 i = window.insert(std::make_pair(dd, node)).first;
331
332 j = i;
333 int max_j = 0;
334 if (i == window.begin())
335 goto weiter;
336 do {
337 --j;
338 Diagonal_node &e = diags[j->second];
339 const int de = j->first, shift = dd - de;
340 //if (d.j - e.subject_end() > max_dist) {
341 if (e.prefix_score - int(space_penalty*(std::max(d.j - e.subject_end(), 0))) <= 0) {
342 map<int, unsigned>::iterator k = j;
343 if (j == window.begin()) {
344 window.erase(j);
345 break;
346 }
347 else {
348 ++k;
349 window.erase(j);
350 j = k;
351 continue;
352 }
353 }
354 if (e.subject_end() < max_j)
355 continue;
356 get_approximate_link(node, j->second, space_penalty, max_j);
357 max_j = std::max(max_j, std::min(d.j, e.subject_end()));
358 if (e.subject_end() - (d.subject_end() - std::min(e.diag() - d.diag(), 0)) >= reverse_link_min_overhang) {
359 if (log)
360 cout << "Computing reverse link node=" << j->second << endl;
361 get_approximate_link(j->second, node, space_penalty, max_j);
362 }
363 } while (j != window.begin());
364
365 weiter:
366 j = i;
367 if (j->second == node)
368 ++j;
369 int max_i = 0;
370 while (j != window.end()) {
371 Diagonal_node &e = diags[j->second];
372 const int de = j->first, shift = dd - de;
373 //if (d.j - e.subject_end() > max_dist && j != i) {
374 if (e.prefix_score - int(space_penalty*(std::max(d.j - e.subject_end(),0))) <= 0 && j != i) {
375 map<int, unsigned>::iterator k = j;
376 ++k;
377 window.erase(j);
378 j = k;
379 continue;
380 }
381 if (e.query_end() < max_i) {
382 ++j;
383 continue;
384 }
385 //if (get_approximate_link(node, j->second, space_penalty, max_i) > e.prefix_score)
386 get_approximate_link(node, j->second, space_penalty, max_i);
387 if (e.i < d.i)
388 max_i = std::max(max_i, std::min(e.query_end(), d.i));
389 if (e.subject_end() - (d.subject_end() - std::min(e.diag() - d.diag(), 0)) >= reverse_link_min_overhang) {
390 if (log)
391 cout << "Computing reverse link node=" << j->second << endl;
392 get_approximate_link(j->second, node, space_penalty, max_i);
393 }
394 ++j;
395 }
396 i->second = node;
397
398 if (log)
399 cout << "Prefix_score=" << d.prefix_score << " path_max=" << d.path_max << " path_min=" << d.path_min << endl << endl;
400 }
401 }
402
backtraceGreedy_aligner2403 bool backtrace(size_t node, int j_end, Hsp *out, Hsp_traits &t, int score_max, int score_min, int max_shift, unsigned &next) const
404 {
405 const Diagonal_node &d = diags[node];
406 vector<Diag_graph::Edge>::const_iterator f = diags.get_edge(node, j_end);
407 bool at_end = f >= diags.edges.end();
408 const int prefix_score = at_end ? d.score : f->prefix_score;
409 if (prefix_score > score_max)
410 return false;
411
412 int j;
413 score_min = std::min(score_min, at_end ? 0 : f->prefix_score_begin);
414
415 //if (f != diags.edges.end() && (!stop_at_min || f->path_min == diags[f->node_out].path_min)) {
416 if (!at_end) {
417 const Diagonal_node &e = diags[f->node_out];
418 const int shift = d.diag() - e.diag();
419 j = f->j;
420
421 if (abs(shift) <= max_shift) {
422 const bool bt = backtrace(f->node_out, shift > 0 ? j : j + shift, out, t, score_max, score_min, max_shift, next);
423 if (!bt) {
424 if (f->prefix_score_begin > score_min)
425 return false;
426 else
427 at_end = true;
428 }
429 }
430 else {
431 next = f->node_out;
432 at_end = true;
433 }
434 }
435
436 if (at_end) {
437 if (out) {
438 out->query_range.begin_ = d.i;
439 out->subject_range.begin_ = d.j;
440 out->score = score_max - score_min;
441 }
442 t.query_range.begin_ = d.i;
443 t.subject_range.begin_ = d.j;
444 t.score = score_max - score_min;
445 j = d.j;
446 }
447 else {
448 const Diagonal_node &e = diags[f->node_out];
449 const int shift = d.diag() - e.diag();
450 if (out) {
451 if (shift > 0) {
452 out->transcript.push_back(op_insertion, (unsigned)shift);
453 out->length += shift;
454 }
455 else if (shift < 0) {
456 for (int j2 = j + shift; j2 < j; ++j2) {
457 out->transcript.push_back(op_deletion, subject[j2]);
458 ++out->length;
459 }
460 }
461 }
462 }
463
464 const int dd = d.diag();
465 t.d_max = std::max(t.d_max, dd);
466 t.d_min = std::min(t.d_min, dd);
467
468 if (out) {
469 const int d2 = d.diag();
470 if (log) cout << "Backtrace node=" << node << " i=" << d2 + j << "-" << d2 + j_end << " j=" << j << "-" << j_end << endl;
471 for (; j < j_end; ++j) {
472 const Letter s = subject[j], q = query[d2 + j];
473 if (s == q) {
474 out->transcript.push_back(op_match);
475 ++out->identities;
476 }
477 else
478 out->transcript.push_back(op_substitution, s);
479 ++out->length;
480 }
481 }
482 return true;
483 }
484
backtraceGreedy_aligner2485 void backtrace(size_t top_node, Hsp *out, Hsp_traits &t, int max_shift, unsigned &next, int max_j) const
486 {
487 Hsp_traits traits(frame);
488 if (top_node != Diag_graph::end) {
489 const Diagonal_node &d = diags[top_node];
490 if (out) {
491 out->transcript.clear();
492 out->query_range.end_ = d.query_end();
493 out->subject_range.end_ = d.subject_end();
494 }
495 traits.subject_range.end_ = d.subject_end();
496 traits.query_range.end_ = d.query_end();
497 int score_min = d.prefix_score;
498 backtrace(top_node, std::min(d.subject_end(), max_j), out, traits, d.prefix_score, score_min, max_shift, next);
499 }
500 else {
501 traits.score = 0;
502 if (out)
503 out->score = 0;
504 }
505 if (out)
506 out->transcript.push_terminator();
507 t = traits;
508 }
509
backtraceGreedy_aligner2510 int backtrace(size_t top_node, list<Hsp> &hsps, list<Hsp_traits> &ts, list<Hsp_traits>::iterator &t_begin, int cutoff, int max_shift) const
511 {
512 unsigned next;
513 int max_score = 0, max_j = (int)subject.length();
514 do {
515 Hsp *hsp = log ? new Hsp(true) : 0;
516 Hsp_traits t(frame);
517 next = std::numeric_limits<unsigned>::max();
518 backtrace(top_node, hsp, t, max_shift, next, max_j);
519 if (t.score > 0)
520 max_j = t.subject_range.begin_;
521 if (t.score >= cutoff && disjoint(t_begin, ts.end(), t, cutoff)) {
522 if (t_begin == ts.end()) {
523 ts.push_back(t);
524 t_begin = ts.end();
525 t_begin--;
526 }
527 else
528 ts.push_back(t);
529 if (hsp)
530 hsps.push_back(*hsp);
531 max_score = std::max(max_score, t.score);
532 }
533 delete hsp;
534 top_node = next;
535 } while (next != std::numeric_limits<unsigned>::max());
536 return max_score;
537 }
538
backtraceGreedy_aligner2539 int backtrace(list<Hsp> &hsps, list<Hsp_traits> &ts, int cutoff, int max_shift) const
540 {
541 vector<Diagonal_node*> top_nodes;
542 for (size_t i = 0; i < diags.nodes.size(); ++i) {
543 Diagonal_node &d = diags.nodes[i];
544 //cout << "node=" << i << " prefix_score=" << d.prefix_score << " path_max=" << d.path_max << " rel_score=" << d.rel_score() << " cutoff=" << cutoff << endl;
545 //if (d.prefix_score >= cutoff && (d.prefix_score == d.path_max || d.prefix_score - d.path_min >= cutoff))
546 if(d.rel_score() >= cutoff)
547 top_nodes.push_back(&d);
548 }
549 std::sort(top_nodes.begin(), top_nodes.end(), Diagonal_node::cmp_rel_score);
550 int max_score = 0;
551 list<Hsp_traits>::iterator t_begin = ts.end();
552
553 for (vector<Diagonal_node*>::const_iterator i = top_nodes.begin(); i < top_nodes.end(); ++i) {
554 const size_t node = *i - diags.nodes.data();
555 if (log)
556 cout << "Backtrace candidate node=" << node << endl;
557 if (disjoint(t_begin, ts.end(), **i, cutoff)) {
558 if (log)
559 cout << "Backtrace node=" << node << " prefix_score=" << (*i)->prefix_score << " rel_score=" << (*i)->rel_score() << endl;
560 max_score = std::max(max_score, backtrace(node, hsps, ts, t_begin, cutoff, max_shift));
561 if (log)
562 cout << endl;
563 }
564 }
565 return max_score;
566 }
567
runGreedy_aligner2568 int run(list<Hsp> &hsps, list<Hsp_traits> &ts, double space_penalty, int cutoff, int max_shift)
569 {
570 if (config.chaining_maxnodes > 0) {
571 std::sort(diags.nodes.begin(), diags.nodes.end(), Diagonal_segment::cmp_score);
572 if (diags.nodes.size() > config.chaining_maxnodes)
573 diags.nodes.erase(diags.nodes.begin() + config.chaining_maxnodes, diags.nodes.end());
574 }
575 if (config.chaining_len_cap > 0.0 && diags.nodes.size() > config.chaining_min_nodes) {
576 std::sort(diags.nodes.begin(), diags.nodes.end(), Diagonal_segment::cmp_score);
577 const double cap = query.length() * config.chaining_len_cap;
578 double total_len = 0.0;
579 auto it = diags.nodes.begin();
580 while (it < diags.nodes.end() && total_len < cap) {
581 total_len += it->len;
582 ++it;
583 }
584 diags.nodes.erase(std::max(diags.nodes.begin() + config.chaining_min_nodes, it), diags.nodes.end());
585 }
586 diags.sort();
587 diags.prune();
588 if (log) {
589 diags.print(query, subject);
590 cout << endl << endl;
591 }
592
593 forward_pass(Index_iterator(0llu), Index_iterator(diags.nodes.size()), true, space_penalty);
594 int max_score = backtrace(hsps, ts, cutoff, max_shift);
595
596 if (log) {
597 hsps.sort(Hsp::cmp_query_pos);
598 for (list<Hsp>::iterator i = hsps.begin(); i != hsps.end(); ++i)
599 print_hsp(*i, TranslatedSequence(query));
600 cout << endl << "Smith-Waterman:" << endl;
601 smith_waterman(query, subject, diags);
602 cout << endl << endl;
603 }
604 return max_score;
605 }
606
runGreedy_aligner2607 int run(list<Hsp> &hsps, list<Hsp_traits> &ts, vector<Diagonal_segment>::const_iterator begin, vector<Diagonal_segment>::const_iterator end, int band)
608 {
609 if (log)
610 cout << "***** Seed hit run " << begin->diag() << '\t' << (end - 1)->diag() << '\t' << (end - 1)->diag() - begin->diag() << endl;
611 diags.init();
612 diags.load(begin, end);
613 return run(hsps, ts, 0.1, 19, band);
614 }
615
Greedy_aligner2Greedy_aligner2616 Greedy_aligner2(const Sequence &query, const Sequence &subject, bool log, unsigned frame) :
617 query(query),
618 subject(subject),
619 //query_bc(query_bc),
620 log(log),
621 frame(frame)
622 {
623 }
624
625 const Sequence query, subject;
626 //const Bias_correction &query_bc;
627 const bool log;
628 const unsigned frame;
629 static thread_local Diag_graph diags;
630 static thread_local map<int, unsigned> window;
631
632 };
633
634 thread_local Diag_graph Greedy_aligner2::diags;
635 thread_local map<int, unsigned> Greedy_aligner2::window;
636
greedy_align(Sequence query,Sequence subject,vector<Diagonal_segment>::const_iterator begin,vector<Diagonal_segment>::const_iterator end,bool log,unsigned frame)637 std::pair<int, list<Hsp_traits>> greedy_align(Sequence query, Sequence subject, vector<Diagonal_segment>::const_iterator begin, vector<Diagonal_segment>::const_iterator end, bool log, unsigned frame)
638 {
639 const int band = config.chaining_maxgap;
640 if (end - begin == 1)
641 return { begin->score, { { begin->diag(), begin->diag(), begin->score, (int)frame, begin->query_range(), begin->subject_range() } } };
642 Greedy_aligner2 ga(query, subject, log, frame);
643 list<Hsp> hsps;
644 list<Hsp_traits> ts;
645 int score = ga.run(hsps, ts, begin, end, band);
646 return std::make_pair(score, std::move(ts));
647 }