1 /* $Id: aln_builders.cpp 601341 2020-02-05 20:27:33Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Kamen Todorov
27 *
28 * File Description:
29 * Alignment builders
30 *
31 * ===========================================================================
32 */
33
34
35 #include <ncbi_pch.hpp>
36
37 #include <objtools/alnmgr/aln_builders.hpp>
38 #include <objtools/alnmgr/aln_rng_coll_oper.hpp>
39 #include <objtools/alnmgr/aln_rng_coll_list_oper.hpp>
40 #include <objtools/alnmgr/aln_serial.hpp>
41 #include <corelib/ncbitime.hpp>
42
43 BEGIN_NCBI_SCOPE
44 USING_SCOPE(objects);
45
46 //#define _TRACE_MergeAlnRngColl
47
48 void
MergePairwiseAlns(CPairwiseAln & existing,const CPairwiseAln & addition,const CAlnUserOptions::TMergeFlags & flags)49 MergePairwiseAlns(CPairwiseAln& existing,
50 const CPairwiseAln& addition,
51 const CAlnUserOptions::TMergeFlags& flags)
52 {
53 CPairwiseAln difference(existing.GetFirstId(),
54 existing.GetSecondId(),
55 existing.GetPolicyFlags());
56 SubtractAlnRngCollections(addition, // minuend
57 existing, // subtrahend
58 difference);
59 #ifdef _TRACE_MergeAlnRngColl
60 cerr << endl;
61 cerr << "existing:" << endl << existing << endl;
62 cerr << "addition:" << endl << addition << endl;
63 cerr << "difference = addition - existing:" << endl << difference << endl;
64 #endif
65 ITERATE(CPairwiseAln, rng_it, difference) {
66 existing.insert(*rng_it);
67 }
68 if ((flags & CAlnUserOptions::fIgnoreInsertions) == 0) {
69 int gap_flags = CPairwiseAln::TAlnRngColl::fAllowAbutting |
70 CPairwiseAln::TAlnRngColl::fAllowMixedDir |
71 CPairwiseAln::TAlnRngColl::fAllowOverlap;
72 CPairwiseAln::TAlnRngColl gaps_coll(addition.GetInsertions(),
73 gap_flags);
74 CPairwiseAln::TAlnRngColl gaps_truncated(gap_flags);
75 SubtractAlnRngCollections(gaps_coll, existing, gaps_truncated);
76 existing.AddInsertions(gaps_truncated);
77 }
78 #ifdef _TRACE_MergeAlnRngColl
79 cerr << "result = existing + difference:" << endl << existing << endl;
80 #endif
81 }
82
83
84 class CMergedPairwiseAln : public CObject
85 {
86 public:
87 typedef CAnchoredAln::TPairwiseAlnVector TPairwiseAlnVector;
88
CMergedPairwiseAln(const CAlnUserOptions::TMergeFlags & merge_flags)89 CMergedPairwiseAln(const CAlnUserOptions::TMergeFlags& merge_flags)
90 : m_MergeFlags(merge_flags),
91 m_NumberOfDirectAlns(0)
92 {
93 };
94
insert(const CRef<CPairwiseAln> & pairwise)95 void insert(const CRef<CPairwiseAln>& pairwise) {
96 CRef<CPairwiseAln> addition(pairwise);
97
98 if (m_MergeFlags & CAlnUserOptions::fTruncateOverlaps) {
99 x_TruncateOverlaps(addition);
100 }
101 x_AddPairwise(*addition);
102 }
103
GetPairwiseAlns() const104 const TPairwiseAlnVector& GetPairwiseAlns() const {
105 return m_PairwiseAlns;
106 };
107
GetMergedFlags() const108 const CAlnUserOptions::TMergeFlags& GetMergedFlags() const {
109 return m_MergeFlags;
110 }
111
SortInsertions(void)112 void SortInsertions(void)
113 {
114 NON_CONST_ITERATE(TPairwiseAlnVector, it, m_PairwiseAlns) {
115 (*it)->SortInsertions();
116 }
117 }
118
119 private:
x_TruncateOverlaps(CRef<CPairwiseAln> & addition)120 void x_TruncateOverlaps(CRef<CPairwiseAln>& addition)
121 {
122 /// Truncate, if requested
123 ITERATE(TPairwiseAlnVector, aln_it, m_PairwiseAlns) {
124 const CPairwiseAln& existing = **aln_it;
125 CRef<CPairwiseAln> truncated
126 (new CPairwiseAln(addition->GetFirstId(),
127 addition->GetSecondId(),
128 addition->GetPolicyFlags()));
129 SubtractAlnRngCollections(*addition, // minuend
130 existing, // subtrahend
131 *truncated);// difference
132
133 #ifdef _TRACE_MergeAlnRngColl
134 cerr << endl;
135 cerr << "existing:" << endl << existing << endl;
136 cerr << "addition:" << endl << *addition << endl;
137 cerr << "truncated = addition - existing:" << endl << *truncated << endl;
138 #endif
139
140 if ((m_MergeFlags & CAlnUserOptions::fIgnoreInsertions) == 0) {
141 int gap_flags = CPairwiseAln::TAlnRngColl::fAllowAbutting |
142 CPairwiseAln::TAlnRngColl::fAllowMixedDir |
143 CPairwiseAln::TAlnRngColl::fAllowOverlap;
144 CPairwiseAln::TAlnRngColl gaps_coll(addition->GetInsertions(),
145 gap_flags);
146 CPairwiseAln::TAlnRngColl gaps_truncated(gap_flags);
147 SubtractAlnRngCollections(gaps_coll, existing, gaps_truncated);
148 addition = truncated;
149 addition->AddInsertions(gaps_truncated);
150 }
151 else {
152 addition = truncated;
153 }
154 }
155 }
156
157
x_ValidNeighboursOnFirstDim(const CPairwiseAln::TAlnRng & left,const CPairwiseAln::TAlnRng & right)158 bool x_ValidNeighboursOnFirstDim(const CPairwiseAln::TAlnRng& left,
159 const CPairwiseAln::TAlnRng& right) {
160 if (left.GetFirstToOpen() > right.GetFirstFrom()) {
161 // Overlap on first dimension
162 return false;
163 }
164 return true;
165 }
166
x_ValidNeighboursOnSecondDim(const CPairwiseAln::TAlnRng & left,const CPairwiseAln::TAlnRng & right)167 bool x_ValidNeighboursOnSecondDim(const CPairwiseAln::TAlnRng& left,
168 const CPairwiseAln::TAlnRng& right) {
169 if (left.GetSecondToOpen() > right.GetSecondFrom()) {
170 if (m_MergeFlags & CAlnUserOptions::fAllowTranslocation) {
171 if (left.GetSecondFrom() < right.GetSecondToOpen()) {
172 // Overlap on second dimension
173 return false;
174 }
175 // Allowed translocation
176 } else {
177 // Either overlap or
178 // unallowed translocation (unsorted on second dimension)
179 return false;
180 }
181 }
182 return true;
183 }
184
185
x_CanInsertRng(CPairwiseAln & a,const CPairwiseAln::TAlnRng & r)186 bool x_CanInsertRng(CPairwiseAln& a, const CPairwiseAln::TAlnRng& r) {
187 CPairwiseAln::const_iterator it = a.find_insertion_point(r);
188
189 if (it != a.begin()) { // Check left
190 const CPairwiseAln::TAlnRng& left = *(it - 1);
191 if ( !x_ValidNeighboursOnFirstDim(left, r) ||
192 !x_ValidNeighboursOnSecondDim(r.IsDirect() ? left : r,
193 r.IsDirect() ? r : left) ) {
194 return false;
195 }
196 }
197 if (it != a.end()) { // Check right
198 const CPairwiseAln::TAlnRng& right = *it;
199 if ( !x_ValidNeighboursOnFirstDim(r, right) ||
200 !x_ValidNeighboursOnSecondDim(r.IsDirect() ? r : right,
201 r.IsDirect() ? right : r) ) {
202 return false;
203 }
204 }
205 return true;
206 }
207
208
x_AddPairwise(const CPairwiseAln & addition)209 void x_AddPairwise(const CPairwiseAln& addition) {
210 TPairwiseAlnVector::iterator aln_it, aln_end;
211 const CPairwiseAln::TInsertions& gaps = addition.GetInsertions();
212 CPairwiseAln::TInsertions::const_iterator gap_it = gaps.begin();
213 ITERATE(CPairwiseAln, rng_it, addition) {
214
215 // What alignments can we possibly insert it to?
216 if (m_MergeFlags & CAlnUserOptions::fAllowMixedStrand) {
217 aln_it = m_PairwiseAlns.begin();
218 aln_end = m_PairwiseAlns.end();
219 } else {
220 if (rng_it->IsDirect()) {
221 aln_it = m_PairwiseAlns.begin();
222 aln_end = aln_it + m_NumberOfDirectAlns;
223 } else {
224 aln_it = m_PairwiseAlns.begin() + m_NumberOfDirectAlns;
225 aln_end = m_PairwiseAlns.end();
226 }
227 }
228
229 // Which alignment do we insert it to?
230 while (aln_it != aln_end) {
231 #ifdef _TRACE_MergeAlnRngColl
232 cerr << endl;
233 cerr << *rng_it << endl;
234 cerr << **aln_it << endl;
235 cerr << "m_MergeFlags: " << m_MergeFlags << endl;
236 #endif
237 _ASSERT(m_MergeFlags & CAlnUserOptions::fAllowMixedStrand ||
238 (rng_it->IsDirect() && (*aln_it)->IsSet(CPairwiseAln::fDirect)) ||
239 (rng_it->IsReversed() && (*aln_it)->IsSet(CPairwiseAln::fReversed)));
240 if (x_CanInsertRng(**aln_it, *rng_it)) {
241 break;
242 }
243 ++aln_it;
244 }
245 if (aln_it == aln_end) {
246 CRef<CPairwiseAln> new_aln
247 (new CPairwiseAln(addition.GetFirstId(),
248 addition.GetSecondId(),
249 addition.GetPolicyFlags()));
250 /* adjust policy flags here? */
251
252 aln_it = m_PairwiseAlns.insert(aln_it, new_aln);
253
254 if (rng_it->IsDirect() &&
255 !(m_MergeFlags & CAlnUserOptions::fAllowMixedStrand)) {
256 ++m_NumberOfDirectAlns;
257 }
258 }
259 (*aln_it)->insert(*rng_it);
260 #ifdef _TRACE_MergeAlnRngColl
261 cerr << *rng_it;
262 cerr << **aln_it;
263 #endif
264 _ASSERT(m_MergeFlags & CAlnUserOptions::fAllowMixedStrand ||
265 (rng_it->IsDirect() && (*aln_it)->IsSet(CPairwiseAln::fDirect)) ||
266 (rng_it->IsReversed() && (*aln_it)->IsSet(CPairwiseAln::fReversed)));
267
268 // Add gaps
269 CPairwiseAln::const_iterator next_rng_it = rng_it;
270 ++next_rng_it;
271 CPairwiseAln::TPos next_rng_pos = -1;
272 if (next_rng_it != addition.end()) {
273 next_rng_pos = next_rng_it->GetFirstFrom();
274 }
275 if ((m_MergeFlags & CAlnUserOptions::fIgnoreInsertions) == 0) {
276 // Add all gaps up to the next non-gap range
277 while (gap_it != gaps.end() &&
278 (gap_it->GetFirstFrom() <= next_rng_pos || next_rng_pos < 0)) {
279 (*aln_it)->AddInsertion(*gap_it);
280 gap_it++;
281 }
282 }
283 }
284 }
285
286 const CAlnUserOptions::TMergeFlags m_MergeFlags;
287 TPairwiseAlnVector m_PairwiseAlns;
288 size_t m_NumberOfDirectAlns;
289 };
290
291
292
operator <<(ostream & out,const CMergedPairwiseAln & merged)293 ostream& operator<<(ostream& out, const CMergedPairwiseAln& merged)
294 {
295 out << "MergedPairwiseAln contains: " << endl;
296 out << " TMergeFlags: " << merged.GetMergedFlags() << endl;
297 ITERATE(CMergedPairwiseAln::TPairwiseAlnVector, aln_it, merged.GetPairwiseAlns()) {
298 out << **aln_it;
299 };
300 return out;
301 }
302
303
304 typedef vector<CRef<CMergedPairwiseAln> > TMergedVec;
305
BuildAln(const TMergedVec & merged_vec,CAnchoredAln & out_aln)306 void BuildAln(const TMergedVec& merged_vec,
307 CAnchoredAln& out_aln)
308 {
309 typedef CAnchoredAln::TDim TDim;
310
311 // Determine the size
312 size_t total_number_of_rows = 0;
313 ITERATE(TMergedVec, merged_i, merged_vec) {
314 total_number_of_rows += (*merged_i)->GetPairwiseAlns().size();
315 }
316
317 // Resize the container
318 out_aln.SetPairwiseAlns().resize(total_number_of_rows);
319
320 // Copy pairwises
321 TDim row = 0;
322 ITERATE(TMergedVec, merged_i, merged_vec) {
323 ITERATE(CAnchoredAln::TPairwiseAlnVector, pairwise_i, (*merged_i)->GetPairwiseAlns()) {
324 out_aln.SetPairwiseAlns()[row] = *pairwise_i;
325 ++row;
326 }
327 }
328 }
329
330
331 void
SortAnchoredAlnVecByScore(TAnchoredAlnVec & anchored_aln_vec)332 SortAnchoredAlnVecByScore(TAnchoredAlnVec& anchored_aln_vec)
333 {
334 sort(anchored_aln_vec.begin(),
335 anchored_aln_vec.end(),
336 PScoreGreater<CAnchoredAln>());
337 }
338
339
340 void
s_TranslateAnchorToAlnCoords(CPairwiseAln & out_anchor_pw,const CPairwiseAln & anchor_pw)341 s_TranslateAnchorToAlnCoords(CPairwiseAln& out_anchor_pw, // output must be empty
342 const CPairwiseAln& anchor_pw)
343 {
344 if ( anchor_pw.empty() ) return;
345 CPairwiseAln::TPos aln_pos = 0; // Start at 0
346 CPairwiseAln::TPos aln_len = 0;
347 ITERATE (CPairwiseAln::TAlnRngColl, it, anchor_pw) {
348 aln_len += it->GetLength();
349 }
350
351 bool direct = anchor_pw.begin()->IsFirstDirect();
352
353 // There should be no gaps on anchor
354 _ASSERT(anchor_pw.GetInsertions().empty());
355 ITERATE (CPairwiseAln::TAlnRngColl, it, anchor_pw) {
356 CPairwiseAln::TAlnRng ar = *it;
357 ar.SetFirstFrom(direct ? aln_pos : aln_len - aln_pos - ar.GetLength());
358 if ( !direct ) {
359 ar.SetDirect(!ar.IsDirect());
360 ar.SetFirstDirect(true);
361 }
362 out_anchor_pw.insert(ar);
363 aln_pos += ar.GetLength();
364 }
365 }
366
367
368 void
s_TranslatePairwiseToAlnCoords(CPairwiseAln & out_pw,const CPairwiseAln & pw,const CPairwiseAln & tr,bool direct)369 s_TranslatePairwiseToAlnCoords(CPairwiseAln& out_pw, // output pairwise (needs to be empty)
370 const CPairwiseAln& pw, // input pairwise to translate from
371 const CPairwiseAln& tr, // translating (aln segments) pairwise
372 bool direct) // new anchor has the same direction as the original one?
373 {
374 // Shift between the old anchor and the alignment.
375 const CPairwiseAln::TInsertions& gaps = pw.GetInsertions();
376 CPairwiseAln::TInsertions::const_iterator gap_it = gaps.begin();
377 ITERATE (CPairwiseAln, it, pw) {
378 CPairwiseAln::TAlnRng ar = *it;
379 CPairwiseAln::TPos pos =
380 tr.GetFirstPosBySecondPos(direct ? ar.GetFirstFrom() : ar.GetFirstTo());
381 ar.SetFirstFrom(pos);
382 if ( !direct ) {
383 ar.SetDirect(!ar.IsDirect());
384 ar.SetFirstDirect(true);
385 }
386 out_pw.insert(ar);
387 if (gap_it != gaps.end()) {
388 CPairwiseAln::const_iterator next_it = it;
389 ++next_it;
390 if (next_it != pw.end()) {
391 while (gap_it != gaps.end() &&
392 gap_it->GetFirstFrom() <= next_it->GetFirstFrom()) {
393 CPairwiseAln::TAlnRng gap_rg = *gap_it;
394 // Need to specify direction since the source point is out of
395 // anchor ranges and will produce -1.
396 CPairwiseAln::TPos new_gap_pos =
397 tr.GetFirstPosBySecondPos(gap_rg.GetFirstFrom(),
398 direct ? CPairwiseAln::eForward : CPairwiseAln::eBackwards);
399 if ( !direct ) {
400 ++new_gap_pos;
401 gap_rg.SetDirect(!gap_rg.IsDirect());
402 gap_rg.SetFirstDirect(true);
403 }
404 gap_rg.SetFirstFrom(new_gap_pos);
405 out_pw.AddInsertion(gap_rg);
406 gap_it++;
407 }
408 }
409 }
410 }
411 while (gap_it != gaps.end()) {
412 CPairwiseAln::TAlnRng gap_rg = *gap_it;
413 CPairwiseAln::TPos new_gap_pos =
414 tr.GetFirstPosBySecondPos(gap_rg.GetFirstFrom(),
415 CPairwiseAln::eForward);
416 // If there are no ranges ahead, try to find the last one before the gap.
417 if (new_gap_pos == -1) {
418 new_gap_pos = tr.GetFirstPosBySecondPos(
419 gap_rg.GetFirstFrom(),
420 CPairwiseAln::eBackwards) + 1;
421 }
422 else if ( !direct ) {
423 new_gap_pos++;
424 }
425 gap_rg.SetFirstFrom(new_gap_pos);
426 if ( !direct ) {
427 gap_rg.SetDirect(!gap_rg.IsDirect());
428 gap_rg.SetFirstDirect(true);
429 }
430 out_pw.AddInsertion(gap_rg);
431 gap_it++;
432 }
433 }
434
435
436 void
s_TranslateToAlnCoords(CAnchoredAln & anchored_aln,const TAlnSeqIdIRef & pseudo_seqid)437 s_TranslateToAlnCoords(CAnchoredAln& anchored_aln,
438 const TAlnSeqIdIRef& pseudo_seqid)
439 {
440 CAnchoredAln::TPairwiseAlnVector& pairwises = anchored_aln.SetPairwiseAlns();
441 typedef CAnchoredAln::TDim TDim;
442 const TDim anchor_row = anchored_aln.GetAnchorRow();
443
444 /// Fix the anchor pairwise, so it's expressed in aln coords:
445 const CPairwiseAln& anchor_pw = *pairwises[anchor_row];
446
447 int flags = anchor_pw.GetFlags();
448 flags &= ~(CPairwiseAln::fDirect | CPairwiseAln::fReversed);
449
450 CRef<CPairwiseAln> new_anchor_pw(new CPairwiseAln(pseudo_seqid,
451 anchor_pw.GetSecondId(),
452 flags));
453 s_TranslateAnchorToAlnCoords(*new_anchor_pw, anchor_pw);
454 bool direct = (new_anchor_pw->begin()->IsFirstDirect() == anchor_pw.begin()->IsFirstDirect());
455
456 /// Translate non-anchor pairwises to aln coords:
457 for (TDim row = 0; row < (TDim)pairwises.size(); ++row) {
458 if (row == anchor_row) {
459 pairwises[row].Reset(new_anchor_pw);
460 } else {
461 const CPairwiseAln& pw = *pairwises[row];
462 flags = pw.GetFlags();
463 flags &= ~(CPairwiseAln::fDirect | CPairwiseAln::fReversed);
464 CRef<CPairwiseAln> new_pw(new CPairwiseAln(pseudo_seqid,
465 pw.GetSecondId(),
466 flags));
467 s_TranslatePairwiseToAlnCoords(*new_pw, pw, *new_anchor_pw, direct);
468 pairwises[row].Reset(new_pw);
469 }
470 }
471 }
472
473
x_AdjustAnchorDirection(TAnchoredAlnVec & in_alns,AutoPtr<TAnchoredAlnVec> & out_alns)474 void x_AdjustAnchorDirection(TAnchoredAlnVec& in_alns,
475 AutoPtr<TAnchoredAlnVec>& out_alns)
476 {
477 // By default use the original alignments.
478 out_alns.reset(&in_alns, eNoOwnership);
479
480 // Check if all anchor rows have the same direction.
481 bool have_direct = false;
482 bool have_reverse = false;
483 TAlnSeqIdIRef common_anchor_id;
484 ITERATE(TAnchoredAlnVec, it, in_alns) {
485 const CAnchoredAln& anchored = **it;
486 // All anlignments must have the same anchor id. If this is not true,
487 // don't try to adjust strands.
488 if ( !common_anchor_id ) {
489 common_anchor_id = anchored.GetAnchorId();
490 }
491 else if ( !common_anchor_id->GetSeqId().Equals(
492 anchored.GetAnchorId()->GetSeqId()) ) {
493 return;
494 }
495 ITERATE(CAnchoredAln::TPairwiseAlnVector, pw_it, anchored.GetPairwiseAlns()) {
496 const CPairwiseAln& pw = **pw_it;
497 ITERATE(CPairwiseAln, seg, pw) {
498 if ( seg->IsFirstDirect() ) {
499 have_direct = true;
500 }
501 else {
502 have_reverse = true;
503 }
504 if (have_direct && have_reverse) {
505 break;
506 }
507 }
508 if (have_direct && have_reverse) {
509 break;
510 }
511 }
512 if (have_direct && have_reverse) {
513 break;
514 }
515 }
516 if (!have_direct || !have_reverse) {
517 return;
518 }
519
520 // Create new anchor with the same coordinates but on plus strand.
521 out_alns.reset(new TAnchoredAlnVec, eTakeOwnership);
522 ITERATE(TAnchoredAlnVec, it, in_alns) {
523 const CAnchoredAln& anchored = **it;
524 CAnchoredAln::TDim old_anchor_row = anchored.GetAnchorRow();
525 CRef<CAnchoredAln> anchored_copy(new CAnchoredAln);
526 anchored_copy->SetDim(anchored.GetDim());
527 anchored_copy->SetScore(anchored.GetScore());
528 anchored_copy->SetAnchorRow(old_anchor_row);
529 out_alns->push_back(anchored_copy);
530
531 for (int row = 0; row < anchored.GetDim(); ++row) {
532 const CPairwiseAln& pw = *anchored.GetPairwiseAlns()[row];
533 int flags = pw.GetFlags();
534 flags &= ~CPairwiseAln::fMixedDir;
535 CRef<CPairwiseAln> pw_copy(
536 new CPairwiseAln(common_anchor_id, pw.GetSecondId(), flags));
537 anchored_copy->SetPairwiseAlns()[row] = pw_copy;
538 ITERATE(CPairwiseAln, seg, pw) {
539 CPairwiseAln::TAlnRng seg_copy(*seg);
540 seg_copy.SetFirstDirect();
541 pw_copy->insert(seg_copy);
542 }
543 }
544 }
545 }
546
547
548 void
BuildAln(TAnchoredAlnVec & in_alns,CAnchoredAln & out_aln,const CAlnUserOptions & options,TAlnSeqIdIRef pseudo_seqid)549 BuildAln(TAnchoredAlnVec& in_alns,
550 CAnchoredAln& out_aln,
551 const CAlnUserOptions& options,
552 TAlnSeqIdIRef pseudo_seqid)
553 {
554 // Types
555 typedef CAnchoredAln::TDim TDim;
556
557 AutoPtr<TAnchoredAlnVec> adj_alns;
558
559 x_AdjustAnchorDirection(in_alns, adj_alns);
560 _ASSERT(adj_alns.get());
561
562 /// 1. Build a single anchored_aln
563 _ASSERT(out_aln.GetDim() == 0);
564 bool anchor_first = (options.m_MergeFlags & CAlnUserOptions::fAnchorRowFirst) != 0;
565
566 switch (options.m_MergeAlgo) {
567 case CAlnUserOptions::eQuerySeqMergeOnly:
568 ITERATE(TAnchoredAlnVec, anchored_it, *adj_alns) {
569 const CAnchoredAln& anchored = **anchored_it;
570 if (anchored_it == adj_alns->begin()) {
571 out_aln = anchored;
572 continue;
573 }
574 // assumption is that anchor row is the last
575 for (TDim row = 0; row < anchored.GetDim(); ++row) {
576 if (row == anchored.GetAnchorRow()) {
577 MergePairwiseAlns(*out_aln.SetPairwiseAlns().back(),
578 *anchored.GetPairwiseAlns()[row],
579 CAlnUserOptions::fTruncateOverlaps);
580 } else if (!anchor_first) {
581 // swap the anchor row with the new one
582 CRef<CPairwiseAln> anchor_pairwise(out_aln.GetPairwiseAlns().back());
583 out_aln.SetPairwiseAlns().back().Reset
584 (new CPairwiseAln(*anchored.GetPairwiseAlns()[row]));
585 out_aln.SetPairwiseAlns().push_back(anchor_pairwise);
586 }
587 }
588 }
589 break;
590 case CAlnUserOptions::ePreserveRows:
591 if ( !adj_alns->empty() ) {
592 if ( !(options.m_MergeFlags & CAlnUserOptions::fSkipSortByScore) ) {
593 SortAnchoredAlnVecByScore(*adj_alns);
594 }
595 TMergedVec merged_vec;
596 const CAnchoredAln& first_anchored = *adj_alns->front();
597 merged_vec.resize(first_anchored.GetDim());
598 ITERATE(TAnchoredAlnVec, anchored_it, *adj_alns) {
599 const CAnchoredAln& anchored = **anchored_it;
600 _ASSERT(anchored.GetDim() == first_anchored.GetDim());
601 if (anchored.GetDim() != first_anchored.GetDim()) {
602 string errstr = "All input alignments need to have "
603 "the same dimension when using ePreserveRows.";
604 NCBI_THROW(CAlnException, eInvalidRequest, errstr);
605 }
606 _ASSERT(anchored.GetAnchorRow() == first_anchored.GetAnchorRow());
607 if (anchored.GetAnchorRow() != first_anchored.GetAnchorRow()) {
608 string errstr = "All input alignments need to have "
609 "the same anchor row when using ePreserveRows.";
610 NCBI_THROW(CAlnException, eInvalidRequest, errstr);
611 }
612 for (TDim row = 0; row < anchored.GetDim(); ++row) {
613 CRef<CMergedPairwiseAln>& merged = merged_vec[row];
614 if (merged.Empty()) {
615 merged.Reset
616 (new CMergedPairwiseAln(row == anchored.GetAnchorRow() ?
617 CAlnUserOptions::fTruncateOverlaps :
618 options.m_MergeFlags));
619 }
620 merged->insert(anchored.GetPairwiseAlns()[row]);
621 }
622 }
623 BuildAln(merged_vec, out_aln);
624 }
625 break;
626 case CAlnUserOptions::eMergeAllSeqs:
627 default:
628 {
629 if ( !(options.m_MergeFlags & CAlnUserOptions::fSkipSortByScore) ) {
630 SortAnchoredAlnVecByScore(*adj_alns);
631 }
632 typedef map<TAlnSeqIdIRef, CRef<CMergedPairwiseAln>, SAlnSeqIdIRefComp> TIdMergedMap;
633 TIdMergedMap id_merged_map;
634 TMergedVec merged_vec;
635
636 #ifdef _TRACE_MergeAlnRngColl
637 static int aln_idx;
638 #endif
639 int flags = CAlnUserOptions::fTruncateOverlaps;
640 flags |= options.m_MergeFlags & CAlnUserOptions::fAllowMixedStrand;
641 CRef<CMergedPairwiseAln> merged_anchor(new CMergedPairwiseAln(flags));
642 if (anchor_first) {
643 merged_vec.push_back(merged_anchor);
644 }
645 ITERATE(TAnchoredAlnVec, anchored_it, *adj_alns) {
646 const CAnchoredAln& anchored_aln = **anchored_it;
647 const CAnchoredAln::TDim& anchor_row = anchored_aln.GetAnchorRow();
648
649 /// Anchor first (important, to insert anchor id in
650 /// id_merged_map before any possible self-aligned seq
651 /// gets there first).
652 #ifdef _TRACE_MergeAlnRngColl
653 cerr << endl;
654 cerr << *merged_anchor << endl;
655 cerr << "inserting aln " << aln_idx << ", anchor row (" << anchor_row << ")" << endl;
656 cerr << *anchored_aln.GetPairwiseAlns()[anchor_row] << endl;
657 #endif
658 merged_anchor->insert(anchored_aln.GetPairwiseAlns()[anchor_row]);
659 if (anchored_it == adj_alns->begin()) {
660 id_merged_map[anchored_aln.GetId(anchor_row)].Reset(merged_anchor);
661 }
662
663 /// Then other rows:
664 for (TDim row = anchored_aln.GetDim() - 1; row >=0; --row) {
665 if (row != anchor_row) {
666 CRef<CMergedPairwiseAln>& merged = id_merged_map[anchored_aln.GetId(row)];
667 if (merged.Empty()) {
668 // first time we are dealing with this id.
669 merged.Reset
670 (new CMergedPairwiseAln(options.m_MergeFlags));
671 // and add to the out vectors
672 merged_vec.push_back(merged);
673 }
674 #ifdef _TRACE_MergeAlnRngColl
675 cerr << *merged << endl;
676 cerr << "inserting aln " << aln_idx << ", row " << row << endl;
677 cerr << *anchored_aln.GetPairwiseAlns()[row] << endl;
678 #endif
679 merged->insert(anchored_aln.GetPairwiseAlns()[row]);
680 }
681 }
682 #ifdef _TRACE_MergeAlnRngColl
683 ++aln_idx;
684 #endif
685 }
686 // finally, add the anchor
687 if (!anchor_first) {
688 merged_vec.push_back(merged_anchor);
689 }
690 NON_CONST_ITERATE(TMergedVec, ma, merged_vec) {
691 (*ma)->SortInsertions();
692 }
693 BuildAln(merged_vec, out_aln);
694 }
695 break;
696 }
697 out_aln.SetAnchorRow(anchor_first ? 0 : out_aln.GetPairwiseAlns().size() - 1);
698 if ( !(options.m_MergeFlags & CAlnUserOptions::fUseAnchorAsAlnSeq) ) {
699 if ( !pseudo_seqid ) {
700 CRef<CSeq_id> seq_id (new CSeq_id("lcl|pseudo [timestamp " +
701 CTime(CTime::eCurrent).AsString("YMDhms") + "]"));
702 CRef<CAlnSeqId> aln_seq_id(new CAlnSeqId(*seq_id));
703 pseudo_seqid.Reset(aln_seq_id);
704 }
705 s_TranslateToAlnCoords(out_aln, pseudo_seqid);
706 }
707
708 /// 2. Sort the ids and alns according to score, how to collect score?
709 }
710
711
712 END_NCBI_SCOPE
713