1 /*  $Id: aln_generators.cpp 601341 2020-02-05 20:27:33Z vasilche $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Kamen Todorov, NCBI
27 *
28 * File Description:
29 *   Alignment generators
30 *
31 * ===========================================================================
32 */
33 
34 
35 #include <ncbi_pch.hpp>
36 
37 #include <objects/seqalign/Dense_seg.hpp>
38 #include <objects/seqalign/Std_seg.hpp>
39 #include <objects/seqalign/Seq_align_set.hpp>
40 #include <objects/seqalign/Dense_diag.hpp>
41 #include <objects/seqalign/Sparse_seg.hpp>
42 #include <objects/seqalign/Spliced_seg.hpp>
43 #include <objects/seqalign/Spliced_exon.hpp>
44 #include <objects/seqalign/Spliced_exon_chunk.hpp>
45 #include <objects/seqalign/Product_pos.hpp>
46 #include <objects/seqalign/Prot_pos.hpp>
47 #include <objmgr/scope.hpp>
48 #include <objmgr/bioseq_handle.hpp>
49 
50 #include <objects/seqloc/Seq_loc.hpp>
51 #include <objects/seqloc/Seq_id.hpp>
52 
53 #include <objtools/alnmgr/aln_generators.hpp>
54 #include <objtools/alnmgr/alnexception.hpp>
55 #include <objtools/alnmgr/aln_serial.hpp>
56 #include <objtools/alnmgr/aln_converters.hpp>
57 
58 #define USE_RANGE_SET
59 #ifdef USE_RANGE_SET
60 # include <util/range_set.hpp>
61 # define CRangeCollection CRangeSet
62 #else
63 # include <util/range_coll.hpp>
64 #endif
65 
66 #include <serial/typeinfo.hpp> // for SerialAssign
67 
68 BEGIN_NCBI_SCOPE
69 USING_SCOPE(objects);
70 
71 
72 CRef<CSeq_align>
CreateSeqAlignFromAnchoredAln(const CAnchoredAln & anchored_aln,CSeq_align::TSegs::E_Choice choice,CScope * scope)73 CreateSeqAlignFromAnchoredAln(const CAnchoredAln& anchored_aln,
74                               CSeq_align::TSegs::E_Choice choice,
75                               CScope* scope)
76 {
77     CRef<CSeq_align> sa(new CSeq_align);
78     sa->SetType(CSeq_align::eType_not_set);
79     sa->SetDim(anchored_aln.GetDim());
80 
81     switch(choice) {
82     case CSeq_align::TSegs::e_Dendiag:
83         CreateDense_diagFromAnchoredAln(sa->SetSegs().SetDendiag(), anchored_aln, scope);
84         break;
85     case CSeq_align::TSegs::e_Denseg:
86         sa->SetSegs().SetDenseg(*CreateDensegFromAnchoredAln(anchored_aln, scope));
87         break;
88     case CSeq_align::TSegs::e_Std:
89         break;
90     case CSeq_align::TSegs::e_Packed:
91         sa->SetSegs().SetPacked(*CreatePackedsegFromAnchoredAln(anchored_aln, scope));
92         break;
93     case CSeq_align::TSegs::e_Disc:
94         sa->SetSegs().SetDisc(*CreateAlignSetFromAnchoredAln(anchored_aln, scope));
95         break;
96     case CSeq_align::TSegs::e_Spliced:
97         sa->SetSegs().SetSpliced(*CreateSplicedsegFromAnchoredAln(anchored_aln, scope));
98         break;
99     case CSeq_align::TSegs::e_Sparse:
100         break;
101     case CSeq_align::TSegs::e_not_set:
102         NCBI_THROW(CAlnException, eInvalidRequest,
103                    "Invalid CSeq_align::TSegs type.");
104         break;
105     }
106     return sa;
107 }
108 
109 
110 CRef<CSeq_align>
CreateSeqAlignFromPairwiseAln(const CPairwiseAln & pairwise_aln,CSeq_align::TSegs::E_Choice choice,CScope * scope)111 CreateSeqAlignFromPairwiseAln(const CPairwiseAln& pairwise_aln,
112                               CSeq_align::TSegs::E_Choice choice,
113                               CScope* scope)
114 {
115     CRef<CSeq_align> sa(new CSeq_align);
116     sa->SetType(CSeq_align::eType_not_set);
117     sa->SetDim(2);
118 
119     switch(choice) {
120     case CSeq_align::TSegs::e_Denseg:
121         sa->SetSegs().SetDenseg(*CreateDensegFromPairwiseAln(pairwise_aln, scope));
122         break;
123     case CSeq_align::TSegs::e_Disc:
124         sa->SetSegs().SetDisc(*CreateAlignSetFromPairwiseAln(pairwise_aln, scope));
125         break;
126     case CSeq_align::TSegs::e_Packed:
127         sa->SetSegs().SetPacked(*CreatePackedsegFromPairwiseAln(pairwise_aln, scope));
128         break;
129     case CSeq_align::TSegs::e_Spliced:
130         sa->SetSegs().SetSpliced(*CreateSplicedsegFromPairwiseAln(pairwise_aln, scope));
131         break;
132     case CSeq_align::TSegs::e_Std:
133     case CSeq_align::TSegs::e_Dendiag:
134     case CSeq_align::TSegs::e_Sparse:
135     case CSeq_align::TSegs::e_not_set:
136         NCBI_THROW(CAlnException, eInvalidRequest,
137                    "Unsupported CSeq_align::TSegs type.");
138         break;
139     }
140     return sa;
141 }
142 
143 
144 //#define _TRACE_CSegmentedRangeCollection
145 #ifdef _TRACE_CSegmentedRangeCollection
operator <<(ostream & out,const CRangeCollection<CPairwiseAln::TPos> & segmetned_range_coll)146 ostream& operator<<(ostream& out, const CRangeCollection<CPairwiseAln::TPos>& segmetned_range_coll)
147 {
148     out << "CRangeCollection<CPairwiseAln::TPos>" << endl;
149 
150     ITERATE (CRangeCollection<CPairwiseAln::TPos>, rng_it, segmetned_range_coll) {
151         out << (CPairwiseAln::TRng)*rng_it << endl;
152     }
153     return out << endl;
154 }
155 #endif
156 
157 class CSegmentedRangeCollection : public CRangeCollection<CPairwiseAln::TPos>
158 {
159 public:
160     typedef ncbi::CRangeCollection<CPairwiseAln::TPos> TParent;
161 
CutAtPosition(position_type pos)162     void CutAtPosition(position_type pos) {
163         if ( pos > 0 ) {
164             DivideAfter(pos-1);
165         }
166     }
167 
insert(const TRange & r)168     void insert(const TRange& r) {
169 #ifdef _TRACE_CSegmentedRangeCollection
170         cerr << "=====================" << endl;
171         cerr << "Original:" << *this;
172         cerr << "Inserting: " <<  endl << (CPairwiseAln::TRng)r << endl << endl;
173 #endif
174         // Cut
175         CutAtPosition(r.GetFrom());
176         CutAtPosition(r.GetToOpen());
177 #ifdef _TRACE_CSegmentedRangeCollection
178         cerr << "After the cut:" << *this << endl;
179 #endif
180 
181         // Find the diff if any
182         TParent addition;
183         addition += r;
184         addition -= *this;
185 
186         if ( !addition.empty() ) {
187 #ifdef _TRACE_CSegmentedRangeCollection
188             cerr << "Addition: " << addition << endl;
189 #endif
190             // Insert the diff
191 #ifndef USE_RANGE_SET
192             iterator it = find_nc(addition.begin()->GetToOpen());
193             ITERATE(TParent, add_it, addition) {
194                 TRange rr(add_it->GetFrom(), add_it->GetTo());
195                 while (it != TParent::m_vRanges.end()  &&
196                        rr.GetFrom() >= it->GetFrom()) {
197                     ++it;
198                 }
199                 it = TParent::m_vRanges.insert(it, rr);
200                 ++it;
201             }
202 #else
203             CombineWithAndKeepAbutting(addition);
204 #endif
205         }
206 #ifdef _TRACE_CSegmentedRangeCollection
207         else {
208             cerr << "No addition." << endl << endl;
209         }
210 #endif
211 #ifdef _TRACE_CSegmentedRangeCollection
212         cerr << "Result: " << *this;
213         cerr << "=====================" << endl << endl;
214 #endif
215     }
216 };
217 
218 
219 void
CreateDense_diagFromAnchoredAln(CSeq_align::TSegs::TDendiag & dd,const CAnchoredAln & anchored_aln,CScope * scope)220 CreateDense_diagFromAnchoredAln(CSeq_align::TSegs::TDendiag& dd,
221                                 const CAnchoredAln& anchored_aln,
222                                 CScope* scope)
223 {
224     const CAnchoredAln::TPairwiseAlnVector& pairwises = anchored_aln.GetPairwiseAlns();
225 
226     typedef CSegmentedRangeCollection TAnchorSegments;
227     TAnchorSegments anchor_segments;
228     ITERATE(CAnchoredAln::TPairwiseAlnVector, pairwise_aln_i, pairwises) {
229         ITERATE (CPairwiseAln::TAlnRngColl, rng_i, **pairwise_aln_i) {
230             anchor_segments.insert(CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
231         }
232     }
233 
234     CSeq_align::TSegs::TDendiag diags;
235     CDense_diag::TDim dim = anchored_aln.GetDim();
236     for ( auto anchor_segments_seg = anchor_segments.begin(); anchor_segments_seg != anchor_segments.end(); ++anchor_segments_seg ) {
237         CRef<CDense_diag> diag(new CDense_diag);
238         diag->SetDim(dim);
239         diag->SetIds().resize(dim);
240         for (int row = 0;  row < dim;  ++row) {
241             CRef<CSeq_id> id(new CSeq_id);
242             id->Assign(anchored_aln.GetId(dim - row - 1)->GetSeqId());
243             diag->SetIds()[row] = id;
244         }
245         diag->SetStarts().resize(dim, kInvalidSeqPos);
246         diag->SetStrands().resize(dim);
247         diag->SetLen(anchor_segments_seg->GetLength());
248         diags.push_back(diag);
249     }
250 
251     for (int row = 0;  row < dim;  ++row) {
252         size_t seg = 0;
253         CSeq_align::TSegs::TDendiag::iterator diag_it = diags.begin();
254 
255         TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
256         CPairwiseAln::TAlnRngColl::const_iterator aln_rng_i =
257             pairwises[dim - row - 1]->begin();
258         bool direct = aln_rng_i->IsDirect();
259         TSignedSeqPos left_delta = 0;
260         TSignedSeqPos right_delta = aln_rng_i->GetLength();
261         while (seg_i != anchor_segments.end()) {
262             if (aln_rng_i != pairwises[dim - row - 1]->end()  &&
263                 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
264                 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
265                 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
266                     NCBI_THROW(CAlnException, eInternalFailure,
267                                "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
268                 }
269 
270                 // dec right_delta
271                 _ASSERT(right_delta >= seg_i->GetLength());
272                 if (right_delta < seg_i->GetLength()) {
273                     NCBI_THROW(CAlnException, eInternalFailure,
274                                "right_delta < seg_i->GetLength()");
275                 }
276                 right_delta -= seg_i->GetLength();
277 
278                 (*diag_it)->SetStarts()[row] =
279                     (direct ?
280                      aln_rng_i->GetSecondFrom() + left_delta :
281                      aln_rng_i->GetSecondFrom() + right_delta);
282 
283                 // inc left_delta
284                 left_delta += seg_i->GetLength();
285 
286                 if (right_delta == 0) {
287                     _ASSERT(left_delta == aln_rng_i->GetLength());
288                     ++aln_rng_i;
289                     if (aln_rng_i != pairwises[dim - row - 1]->end()) {
290                         direct = aln_rng_i->IsDirect();
291                         left_delta = 0;
292                         right_delta = aln_rng_i->GetLength();
293                     }
294                 }
295             }
296             (*diag_it)->SetStrands()[row] =
297                 (direct ? eNa_strand_plus : eNa_strand_minus);
298             ++seg_i;
299             ++seg;
300             ++diag_it;
301         }
302     }
303     // Cleanup: remove rows with gaps, remove one-row diags.
304     NON_CONST_ITERATE(CSeq_align::TSegs::TDendiag, diag_it, diags) {
305         size_t row = 0;
306         CDense_diag& diag = **diag_it;
307         CDense_diag::TStarts& starts = diag.SetStarts();
308         while (row < starts.size()) {
309             if (starts[row] == kInvalidSeqPos) {
310                 starts.erase(starts.begin() + row);
311                 CDense_diag::TIds& ids = diag.SetIds();
312                 ids.erase(ids.begin() + row);
313                 CDense_diag::TStrands& strands = diag.SetStrands();
314                 strands.erase(strands.begin() + row);
315                 continue;
316             }
317             ++row;
318         }
319         if (diag.GetStarts().size() < 2) continue;
320         diag.SetDim(starts.size());
321 #if _DEBUG
322         diag.Validate();
323 #endif
324         dd.push_back(*diag_it);
325     }
326 }
327 
328 
329 CRef<CDense_seg>
CreateDensegFromAnchoredAln(const CAnchoredAln & anchored_aln,CScope * scope)330 CreateDensegFromAnchoredAln(const CAnchoredAln& anchored_aln,
331                             CScope* scope)
332 {
333     const CAnchoredAln::TPairwiseAlnVector& pairwises = anchored_aln.GetPairwiseAlns();
334 
335     typedef CSegmentedRangeCollection TAnchorSegments;
336     TAnchorSegments anchor_segments;
337     ITERATE(CAnchoredAln::TPairwiseAlnVector, pairwise_aln_i, pairwises) {
338         ITERATE (CPairwiseAln::TAlnRngColl, rng_i, **pairwise_aln_i) {
339             anchor_segments.insert(CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
340         }
341     }
342 
343     // Create a dense-seg
344     CRef<CDense_seg> ds(new CDense_seg);
345 
346     // Determine dimensions
347     CDense_seg::TNumseg& numseg = ds->SetNumseg();
348     numseg = anchor_segments.size();
349     CDense_seg::TDim& dim = ds->SetDim();
350     dim = anchored_aln.GetDim();
351 
352     // Tmp vars
353     CDense_seg::TDim row;
354     CDense_seg::TNumseg seg;
355 
356     // Ids
357     CDense_seg::TIds& ids = ds->SetIds();
358     ids.resize(dim);
359     for (row = 0;  row < dim;  ++row) {
360         ids[row].Reset(new CSeq_id);
361         SerialAssign<CSeq_id>(*ids[row], anchored_aln.GetId(dim - row - 1)->GetSeqId());
362     }
363 
364     // Lens
365     CDense_seg::TLens& lens = ds->SetLens();
366     lens.resize(numseg);
367     TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
368     for (seg = 0; seg < numseg; ++seg, ++seg_i) {
369         lens[seg] = seg_i->GetLength();
370     }
371 
372     int matrix_size = dim * numseg;
373 
374     // Strands (just resize, will set while setting starts)
375     CDense_seg::TStrands& strands = ds->SetStrands();
376     strands.resize(matrix_size, eNa_strand_unknown);
377 
378     // Starts and strands
379     CDense_seg::TStarts& starts = ds->SetStarts();
380     starts.resize(matrix_size, -1);
381     for (row = 0;  row < dim;  ++row) {
382         seg = 0;
383         int matrix_row_pos = row;  // optimization to eliminate multiplication
384         seg_i = anchor_segments.begin();
385         CPairwiseAln::TAlnRngColl::const_iterator aln_rng_i = pairwises[dim - row - 1]->begin();
386         bool direct = aln_rng_i->IsDirect();
387         TSignedSeqPos left_delta = 0;
388         TSignedSeqPos right_delta = aln_rng_i->GetLength();
389         while (seg_i != anchor_segments.end()) {
390             _ASSERT(seg < numseg);
391             _ASSERT(matrix_row_pos == row + dim * seg);
392             if (aln_rng_i != pairwises[dim - row - 1]->end()  &&
393                 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
394                 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
395                 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
396                     NCBI_THROW(CAlnException, eInternalFailure,
397                                "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
398                 }
399 
400                 // dec right_delta
401                 _ASSERT(right_delta >= seg_i->GetLength());
402                 if (right_delta < seg_i->GetLength()) {
403                     NCBI_THROW(CAlnException, eInternalFailure,
404                                "right_delta < seg_i->GetLength()");
405                 }
406                 right_delta -= seg_i->GetLength();
407 
408                 starts[matrix_row_pos] =
409                     (direct ?
410                      aln_rng_i->GetSecondFrom() + left_delta :
411                      aln_rng_i->GetSecondFrom() + right_delta);
412 
413                 // inc left_delta
414                 left_delta += seg_i->GetLength();
415 
416                 if (right_delta == 0) {
417                     _ASSERT(left_delta == aln_rng_i->GetLength());
418                     ++aln_rng_i;
419                     if (aln_rng_i != pairwises[dim - row - 1]->end()) {
420                         direct = aln_rng_i->IsDirect();
421                         left_delta = 0;
422                         right_delta = aln_rng_i->GetLength();
423                     }
424                 }
425             }
426             strands[matrix_row_pos] = (direct ? eNa_strand_plus : eNa_strand_minus);
427             ++seg_i;
428             ++seg;
429             matrix_row_pos += dim;
430         }
431     }
432 #if _DEBUG
433     ds->Validate(true);
434 #endif
435     return ds;
436 }
437 
438 
439 CRef<CDense_seg>
CreateDensegFromPairwiseAln(const CPairwiseAln & pairwise_aln,CScope * scope)440 CreateDensegFromPairwiseAln(const CPairwiseAln& pairwise_aln,
441                             CScope* scope)
442 {
443     // Create a dense-seg
444     CRef<CDense_seg> ds(new CDense_seg);
445 
446 
447     // Determine dimensions
448     CDense_seg::TNumseg& numseg = ds->SetNumseg();
449     numseg = pairwise_aln.size();
450     ds->SetDim(2);
451     int matrix_size = 2 * numseg;
452 
453     CDense_seg::TLens& lens = ds->SetLens();
454     lens.resize(numseg);
455 
456     CDense_seg::TStarts& starts = ds->SetStarts();
457     starts.resize(matrix_size, -1);
458 
459     CDense_seg::TIds& ids = ds->SetIds();
460     ids.resize(2);
461 
462 
463     // Ids
464     ids[0].Reset(new CSeq_id);
465     SerialAssign<CSeq_id>(*ids[0], pairwise_aln.GetFirstId()->GetSeqId());
466     ids[1].Reset(new CSeq_id);
467     SerialAssign<CSeq_id>(*ids[1], pairwise_aln.GetSecondId()->GetSeqId());
468 
469 
470     // Tmp vars
471     CDense_seg::TNumseg seg = 0;
472     int matrix_pos = 0;
473 
474 
475     // Main loop to set all values
476     ITERATE(CPairwiseAln::TAlnRngColl, aln_rng_i, pairwise_aln) {
477         starts[matrix_pos++] = aln_rng_i->GetFirstFrom();
478         if ( !aln_rng_i->IsDirect() ) {
479             if ( !ds->IsSetStrands() ) {
480                 ds->SetStrands().resize(matrix_size, eNa_strand_plus);
481             }
482             ds->SetStrands()[matrix_pos] = eNa_strand_minus;
483         }
484         starts[matrix_pos++] = aln_rng_i->GetSecondFrom();
485         lens[seg++] = aln_rng_i->GetLength();
486     }
487     _ASSERT(matrix_pos == matrix_size);
488     _ASSERT(seg == numseg);
489 
490 
491 #ifdef _DEBUG
492     ds->Validate(true);
493 #endif
494     return ds;
495 }
496 
497 
498 static const TSignedSeqPos kMaxSplicedExonIndelLength = 15;
499 
InitSplicedsegFromPairwiseAln(CSpliced_seg & spliced_seg,const CPairwiseAln & pairwise_aln,CScope * scope)500 void InitSplicedsegFromPairwiseAln(CSpliced_seg& spliced_seg,
501                                    const CPairwiseAln& pairwise_aln,
502                                    CScope* scope)
503 {
504     // Sort by product positions (if minus strand, then reverse).
505     // Make sure genomic positions are sorted in the corresponding direction too, no overlaps etc.
506     // Small one-row gap -> indel.
507     // Large genomic gap -> intron (start new exon).
508     // Other gaps -> intron, set 'partial' on both sides.
509     // If the product does not start/end at the sequence extreme, set 'partial' for the exon.
510 
511     // Check strands - one per row.
512     _ASSERT((pairwise_aln.GetFlags() & CPairwiseAln::fMixedDir) != CPairwiseAln::fMixedDir);
513     // Product is nuc or prot.
514     _ASSERT(pairwise_aln.GetFirstBaseWidth() == 1  ||
515             pairwise_aln.GetFirstBaseWidth() == 3);
516     bool prot = pairwise_aln.GetFirstBaseWidth() == 3;
517     // The other row is genomic.
518     _ASSERT(pairwise_aln.GetSecondBaseWidth() == 1);
519 
520     // Ids
521     CRef<CSeq_id> product_id(new CSeq_id);
522     product_id->Assign(pairwise_aln.GetFirstId()->GetSeqId());
523     spliced_seg.SetProduct_id(*product_id);
524     CRef<CSeq_id> genomic_id(new CSeq_id);
525     genomic_id->Assign(pairwise_aln.GetSecondId()->GetSeqId());
526     spliced_seg.SetGenomic_id(*genomic_id);
527 
528     // Product type
529     spliced_seg.SetProduct_type(prot ?
530         CSpliced_seg::eProduct_type_protein
531         : CSpliced_seg::eProduct_type_transcript);
532 
533     // Exons
534     CSpliced_seg::TExons& exons = spliced_seg.SetExons();
535 
536     typedef TSignedSeqPos                  TPos;
537     typedef CRange<TPos>                   TRng;
538 
539     TPos last_prod_end = 0;
540     TPos last_gen_end = 0;
541     CRef<CSpliced_exon> exon;
542     CPairwiseAln::TAlnRngColl::const_iterator rg_it = pairwise_aln.begin();
543     bool gen_direct = rg_it == pairwise_aln.end() || rg_it->IsDirect();
544     bool prod_direct = prot ||
545         rg_it == pairwise_aln.end() || rg_it->IsFirstDirect();
546     // Adjust genomic strand - in CPairwiseAln it's relative to the first seq.
547     if ( !prod_direct ) {
548         gen_direct = !gen_direct;
549     }
550 
551     TRng ex_prod_rg;
552     TRng ex_gen_rg;
553 
554     // Main loop to set all values
555     ITERATE(CPairwiseAln::TAlnRngColl, rg_it, pairwise_aln) {
556         const CPairwiseAln::TAlnRng& rg = *rg_it;
557 
558         // Unaligned ranges.
559         TPos prod_skip, gen_skip;
560         if (rg_it == pairwise_aln.begin()) {
561             prod_skip = 0;
562             gen_skip = 0;
563         }
564         else {
565             prod_skip = rg.GetFirstFrom() - last_prod_end;
566             gen_skip = gen_direct == prod_direct ?
567                 rg.GetSecondFrom() - last_gen_end
568                 : last_gen_end - rg.GetSecondToOpen();
569         }
570 
571         // Break exon, ignore long gaps between exons.
572         if (!exon  ||  prod_skip > kMaxSplicedExonIndelLength  ||
573             gen_skip > kMaxSplicedExonIndelLength) {
574             if ( exon ) {
575                 _ASSERT(exon->IsSetProduct_start());
576                 _ASSERT(exon->IsSetGenomic_start());
577                 _ASSERT(exon->IsSetProduct_end());
578                 _ASSERT(exon->IsSetGenomic_end());
579                 if ( prod_direct ) {
580                     exons.push_back(exon);
581                 }
582                 else {
583                     exons.push_front(exon);
584                 }
585                 exon.Reset();
586                 ex_prod_rg = TRng::GetEmpty();
587                 ex_gen_rg = TRng::GetEmpty();
588             }
589             prod_skip = 0;
590             gen_skip = 0;
591         }
592 
593         if (prod_skip > 0  ||  gen_skip > 0) {
594             _ASSERT(exon);
595             TSeqPos mismatch = min(prod_skip, gen_skip);
596             if (mismatch > 0) {
597                 CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
598                 chunk->SetMismatch(mismatch);
599                 if ( prod_direct ) {
600                     exon->SetParts().push_back(chunk);
601                 }
602                 else {
603                     exon->SetParts().push_front(chunk);
604                 }
605                 prod_skip -= mismatch;
606                 gen_skip -= mismatch;
607             }
608             if (prod_skip > 0) {
609                 CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
610                 chunk->SetProduct_ins(prod_skip);
611                 if ( prod_direct ) {
612                     exon->SetParts().push_back(chunk);
613                 }
614                 else {
615                     exon->SetParts().push_front(chunk);
616                 }
617             }
618             if (gen_skip > 0) {
619                 CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
620                 chunk->SetGenomic_ins(gen_skip);
621                 if ( prod_direct ) {
622                     exon->SetParts().push_back(chunk);
623                 }
624                 else {
625                     exon->SetParts().push_front(chunk);
626                 }
627             }
628             prod_skip = 0;
629             gen_skip = 0;
630         }
631 
632         if ( !exon ) {
633             // Start new exon
634             exon.Reset(new CSpliced_exon);
635             // The first exon must start at 0 or be partial.
636             if (exons.empty()  &&  rg.GetFirstFrom() > 0) {
637                 exon->SetPartial(true);
638             }
639             if ( !prot ) {
640                 exon->SetProduct_strand(prod_direct
641                     ? eNa_strand_plus : eNa_strand_minus);
642             }
643             exon->SetGenomic_strand(gen_direct
644                 ? eNa_strand_plus : eNa_strand_minus);
645         }
646         // Aligned chunk
647         CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
648         chunk->SetMatch(rg.GetLength());
649         if ( prod_direct ) {
650             exon->SetParts().push_back(chunk);
651         }
652         else {
653             exon->SetParts().push_front(chunk);
654         }
655 
656         // Update exon extremes
657         ex_prod_rg.CombineWith(TRng(rg.GetFirstFrom(), rg.GetFirstTo()));
658         ex_gen_rg.CombineWith(TRng(rg.GetSecondFrom(), rg.GetSecondTo()));
659         if ( exon ) {
660             if (prot) {
661                 exon->SetProduct_start().SetProtpos().SetAmin(ex_prod_rg.GetFrom() / 3);
662                 exon->SetProduct_start().SetProtpos().SetFrame(ex_prod_rg.GetFrom() % 3 + 1);
663                 exon->SetProduct_end().SetProtpos().SetAmin(ex_prod_rg.GetTo() / 3);
664                 exon->SetProduct_end().SetProtpos().SetFrame(ex_prod_rg.GetTo() % 3 + 1);
665             } else {
666                 exon->SetProduct_start().SetNucpos(ex_prod_rg.GetFrom());
667                 exon->SetProduct_end().SetNucpos(ex_prod_rg.GetTo());
668             }
669             exon->SetGenomic_start(ex_gen_rg.GetFrom());
670             exon->SetGenomic_end(ex_gen_rg.GetTo());
671         }
672 
673         last_prod_end = rg.GetFirstToOpen();
674         last_gen_end = gen_direct == prod_direct ?
675             rg.GetSecondToOpen() : rg.GetSecondFrom();
676     }
677     if ( exon ) {
678         _ASSERT(exon->IsSetProduct_start());
679         _ASSERT(exon->IsSetGenomic_start());
680         _ASSERT(exon->IsSetProduct_end());
681         _ASSERT(exon->IsSetGenomic_end());
682         if ( prod_direct ) {
683             exons.push_back(exon);
684         }
685         else {
686             exons.push_front(exon);
687         }
688     }
689     else if ( !exons.empty() ) {
690         // Get the last added exon.
691         exon = prod_direct ? exons.front() : exons.back();
692     }
693 
694     // Check if the last exon ends at product end.
695     if (exon  &&  scope) {
696         TSeqPos prod_end = 0;
697         if ( exon->GetProduct_end().IsNucpos() ) {
698             prod_end = exon->GetProduct_end().GetNucpos();
699         }
700         else {
701             prod_end = exon->GetProduct_end().GetProtpos().GetAmin();
702         }
703         CBioseq_Handle h = scope->GetBioseqHandle(*product_id);
704         if ( h ) {
705             TSeqPos prod_len = h.GetBioseqLength();
706             if (prod_len != kInvalidSeqPos  &&  prod_len != prod_end + 1) {
707                 exon->SetPartial(true);
708             }
709         }
710     }
711 
712 #ifdef _DEBUG
713     spliced_seg.Validate(true);
714 #endif
715 }
716 
717 
718 CRef<CSpliced_seg>
CreateSplicedsegFromAnchoredAln(const CAnchoredAln & anchored_aln,CScope * scope)719 CreateSplicedsegFromAnchoredAln(const CAnchoredAln& anchored_aln,
720                                 CScope* scope)
721 {
722     _ASSERT(anchored_aln.GetDim() == 2);
723 
724     // Sort by product positions (if minus strand, then reverse).
725     // Make sure genomic positions are sorted in the corresponding direction too, no overlaps etc.
726     // Small one-row gap -> indel.
727     // Large genomic gap -> intron (start new exon).
728     // Other gaps -> intron, set 'partial' on both sides.
729     // If the product does not start/end at the sequence extreme, set 'partial' for the exon.
730 
731     // Create a spliced_seg
732     CRef<CSpliced_seg> spliced_seg(new CSpliced_seg);
733     CAnchoredAln::TDim anchor_row = anchored_aln.GetAnchorRow();
734     const CPairwiseAln& pairwise = *anchored_aln.GetPairwiseAlns()[1 - anchor_row];
735     InitSplicedsegFromPairwiseAln(*spliced_seg, pairwise, scope);
736     return spliced_seg;
737 }
738 
739 
740 CRef<CSpliced_seg>
CreateSplicedsegFromPairwiseAln(const CPairwiseAln & pairwise_aln,CScope * scope)741 CreateSplicedsegFromPairwiseAln(const CPairwiseAln& pairwise_aln,
742                                 CScope* scope)
743 {
744     // Create a dense-seg
745     CRef<CSpliced_seg> ss(new CSpliced_seg);
746     InitSplicedsegFromPairwiseAln(*ss, pairwise_aln, scope);
747     return ss;
748 }
749 
750 
s_TranslatePairwise(CPairwiseAln & out_pw,const CPairwiseAln & pw,const CPairwiseAln & tr)751 void s_TranslatePairwise(
752     CPairwiseAln& out_pw,   // output pairwise (needs to be empty)
753     const CPairwiseAln& pw, // input pairwise to translate from
754     const CPairwiseAln& tr) // translating pairwise
755 {
756     ITERATE (CPairwiseAln, it, pw) {
757         CPairwiseAln::TAlnRng ar = *it;
758         ar.SetFirstFrom(tr.GetSecondPosByFirstPos(ar.GetFirstFrom()));
759         if (ar.GetFirstFrom() < 0) continue; // skip unaligned ranges
760         out_pw.insert(ar);
761     }
762 }
763 
764 
765 typedef CAnchoredAln::TDim TDim;
766 
767 
CreateSeqAlignFromEachPairwiseAln(const CAnchoredAln::TPairwiseAlnVector pairwises,TDim anchor,vector<CRef<CSeq_align>> & out_seqaligns,CSeq_align::TSegs::E_Choice choice,CScope * scope)768 void CreateSeqAlignFromEachPairwiseAln(
769     const CAnchoredAln::TPairwiseAlnVector pairwises,
770     TDim                                   anchor,
771     vector<CRef<CSeq_align> >&             out_seqaligns,
772     CSeq_align::TSegs::E_Choice            choice,
773     CScope*                                scope)
774 {
775     out_seqaligns.resize(pairwises.size() - 1);
776     for (TDim row = 0, sa_idx = 0;
777          row < (TDim) pairwises.size();
778          ++row) {
779         if (row == anchor) continue;
780         CRef<CSeq_align> sa(new CSeq_align);
781         sa->SetType(CSeq_align::eType_partial);
782         sa->SetDim(2);
783 
784         const CPairwiseAln& pw = *pairwises[row];
785         CRef<CPairwiseAln> p(new CPairwiseAln(pairwises[anchor]->GetSecondId(),
786             pw.GetSecondId(),
787             pw.GetFlags()));
788         s_TranslatePairwise(*p, pw, *pairwises[anchor]);
789 
790         switch(choice)    {
791         case CSeq_align::TSegs::e_Denseg: {
792             CRef<CDense_seg> ds = CreateDensegFromPairwiseAln(*p, scope);
793             sa->SetSegs().SetDenseg(*ds);
794             break;
795         }
796         case CSeq_align::TSegs::e_Disc: {
797             CRef<CSeq_align_set> disc = CreateAlignSetFromPairwiseAln(*p, scope);
798             sa->SetSegs().SetDisc(*disc);
799             break;
800         }
801         case CSeq_align::TSegs::e_Packed: {
802             CRef<CPacked_seg> ps = CreatePackedsegFromPairwiseAln(*p, scope);
803             sa->SetSegs().SetPacked(*ps);
804             break;
805         }
806         case CSeq_align::TSegs::e_Spliced: {
807             CRef<CSpliced_seg> ss = CreateSplicedsegFromPairwiseAln(*p, scope);
808             sa->SetSegs().SetSpliced(*ss);
809             break;
810         }
811         case CSeq_align::TSegs::e_Dendiag:
812         case CSeq_align::TSegs::e_Std:
813         case CSeq_align::TSegs::e_Sparse:
814             NCBI_THROW(CAlnException, eInvalidRequest,
815                         "Unsupported CSeq_align::TSegs type.");
816         case CSeq_align::TSegs::e_not_set:
817         default:
818             NCBI_THROW(CAlnException, eInvalidRequest,
819                         "Invalid CSeq_align::TSegs type.");
820         }
821         out_seqaligns[sa_idx++].Reset(sa);
822     }
823 }
824 
825 
826 CRef<CSeq_align_set>
CreateAlignSetFromAnchoredAln(const CAnchoredAln & anchored_aln,CScope * scope)827 CreateAlignSetFromAnchoredAln(const CAnchoredAln& anchored_aln,
828                               CScope* scope)
829 {
830     CRef<CSeq_align_set> disc(new CSeq_align_set);
831 
832     const CAnchoredAln::TPairwiseAlnVector& pairwises = anchored_aln.GetPairwiseAlns();
833 
834     typedef CSegmentedRangeCollection TAnchorSegments;
835     TAnchorSegments anchor_segments;
836     ITERATE(CAnchoredAln::TPairwiseAlnVector, pairwise_aln_i, pairwises) {
837         ITERATE (CPairwiseAln::TAlnRngColl, rng_i, **pairwise_aln_i) {
838             anchor_segments.insert(CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
839         }
840     }
841 
842     CDense_seg::TDim row;
843     CDense_seg::TNumseg seg;
844 
845     // Determine dimensions
846     CDense_seg::TNumseg numseg = anchor_segments.size();
847     CDense_seg::TDim dim = anchored_aln.GetDim();
848 
849     vector< CRef<CDense_seg> > dsegs;
850     dsegs.resize(numseg);
851     for (size_t i = 0; i < dsegs.size(); ++i) {
852         dsegs[i].Reset(new CDense_seg);
853         CDense_seg& dseg = *dsegs[i];
854         dseg.SetDim(dim);
855         dseg.SetNumseg(1);
856         // Ids
857         CDense_seg::TIds& ids = dseg.SetIds();
858         ids.resize(dim);
859         for (row = 0;  row < dim;  ++row) {
860             ids[row].Reset(new CSeq_id);
861             SerialAssign<CSeq_id>(*ids[row], anchored_aln.GetId(dim - row - 1)->GetSeqId());
862         }
863         // Lens, strands, starts - just prepare the storage
864         dseg.SetLens().resize(1);
865         dseg.SetStrands().resize(dim, eNa_strand_unknown);
866         dseg.SetStarts().resize(dim, -1);
867     }
868 
869     for (row = 0;  row < dim;  ++row) {
870         seg = 0;
871         TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
872         CPairwiseAln::TAlnRngColl::const_iterator aln_rng_i = pairwises[dim - row - 1]->begin();
873         bool direct = aln_rng_i->IsDirect();
874         TSignedSeqPos left_delta = 0;
875         TSignedSeqPos right_delta = aln_rng_i->GetLength();
876         while (seg_i != anchor_segments.end()) {
877             _ASSERT(seg < numseg);
878             CDense_seg& dseg = *dsegs[seg];
879             dseg.SetLens()[0] = seg_i->GetLength();
880             CDense_seg::TStarts& starts = dseg.SetStarts();
881 
882             dseg.SetStrands()[row] = (direct ? eNa_strand_plus : eNa_strand_minus);
883             if (aln_rng_i != pairwises[dim - row - 1]->end()  &&
884                 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
885                 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
886                 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
887                     NCBI_THROW(CAlnException, eInternalFailure,
888                                "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
889                 }
890 
891                 // dec right_delta
892                 _ASSERT(right_delta >= seg_i->GetLength());
893                 if (right_delta < seg_i->GetLength()) {
894                     NCBI_THROW(CAlnException, eInternalFailure,
895                                "right_delta < seg_i->GetLength()");
896                 }
897                 right_delta -= seg_i->GetLength();
898 
899                 starts[row] =
900                     (direct ?
901                      aln_rng_i->GetSecondFrom() + left_delta :
902                      aln_rng_i->GetSecondFrom() + right_delta);
903 
904                 // inc left_delta
905                 left_delta += seg_i->GetLength();
906 
907                 if (right_delta == 0) {
908                     _ASSERT(left_delta == aln_rng_i->GetLength());
909                     ++aln_rng_i;
910                     if (aln_rng_i != pairwises[dim - row - 1]->end()) {
911                         direct = aln_rng_i->IsDirect();
912                         left_delta = 0;
913                         right_delta = aln_rng_i->GetLength();
914                     }
915                 }
916             }
917             // Add only densegs with both rows non-empty
918             if (starts[0] >= 0  &&  starts[1] >= 0) {
919                 CRef<CSeq_align> seg_aln(new CSeq_align);
920                 seg_aln->SetType(CSeq_align::eType_not_set);
921                 seg_aln->SetDim(dim);
922                 disc->Set().push_back(seg_aln);
923                 seg_aln->SetSegs().SetDenseg(dseg);
924             }
925             ++seg_i;
926             ++seg;
927         }
928     }
929 
930     return disc;
931 }
932 
933 
934 CRef<CSeq_align_set>
CreateAlignSetFromPairwiseAln(const CPairwiseAln & pairwise_aln,CScope * scope)935 CreateAlignSetFromPairwiseAln(const CPairwiseAln& pairwise_aln,
936                               CScope* scope)
937 {
938     CRef<CSeq_align_set> disc(new CSeq_align_set);
939 
940     CDense_seg::TNumseg numseg = pairwise_aln.size();
941 
942     vector< CRef<CDense_seg> > dsegs;
943     dsegs.resize(numseg);
944     for (size_t i = 0; i < dsegs.size(); ++i) {
945         CRef<CSeq_align> seg_aln(new CSeq_align);
946         seg_aln->SetType(CSeq_align::eType_not_set);
947         seg_aln->SetDim(2);
948         disc->Set().push_back(seg_aln);
949         CDense_seg& dseg = seg_aln->SetSegs().SetDenseg();
950         dsegs[i].Reset(&dseg);
951         dseg.SetDim(2);
952         dseg.SetNumseg(1);
953         // Ids
954         CDense_seg::TIds& ids = dseg.SetIds();
955         ids.resize(2);
956         ids[0].Reset(new CSeq_id);
957         SerialAssign<CSeq_id>(*ids[0], pairwise_aln.GetFirstId()->GetSeqId());
958         ids[1].Reset(new CSeq_id);
959         SerialAssign<CSeq_id>(*ids[1], pairwise_aln.GetSecondId()->GetSeqId());
960         // Lens, strands, starts - just prepare the storage
961         dseg.SetLens().resize(1);
962         dseg.SetStrands().resize(2, eNa_strand_unknown);
963         dseg.SetStarts().resize(2, -1);
964     }
965 
966     // Main loop to set all values
967     CDense_seg::TNumseg seg = 0;
968     ITERATE(CPairwiseAln::TAlnRngColl, aln_rng_i, pairwise_aln) {
969         CDense_seg& dseg = *dsegs[seg];
970         dseg.SetStarts()[0] = aln_rng_i->GetFirstFrom();
971         if ( !aln_rng_i->IsDirect() ) {
972             if ( !dseg.IsSetStrands() ) {
973                 dseg.SetStrands().resize(2, eNa_strand_plus);
974             }
975             dseg.SetStrands()[1] = eNa_strand_minus;
976         }
977         dseg.SetStarts()[1] = aln_rng_i->GetSecondFrom();
978         dseg.SetLens()[0] = aln_rng_i->GetLength();
979         seg++;
980     }
981 
982     return disc;
983 }
984 
985 
986 CRef<CPacked_seg>
CreatePackedsegFromAnchoredAln(const CAnchoredAln & anchored_aln,CScope * scope)987 CreatePackedsegFromAnchoredAln(const CAnchoredAln& anchored_aln,
988                                CScope* scope)
989 {
990     const CAnchoredAln::TPairwiseAlnVector& pairwises = anchored_aln.GetPairwiseAlns();
991 
992     typedef CSegmentedRangeCollection TAnchorSegments;
993     TAnchorSegments anchor_segments;
994     ITERATE(CAnchoredAln::TPairwiseAlnVector, pairwise_aln_i, pairwises) {
995         ITERATE (CPairwiseAln::TAlnRngColl, rng_i, **pairwise_aln_i) {
996             anchor_segments.insert(CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
997         }
998     }
999 
1000     // Create a packed-seg
1001     CRef<CPacked_seg> ps(new CPacked_seg);
1002 
1003     // Determine dimensions
1004     CPacked_seg::TNumseg& numseg = ps->SetNumseg();
1005     numseg = anchor_segments.size();
1006     CPacked_seg::TDim& dim = ps->SetDim();
1007     dim = anchored_aln.GetDim();
1008 
1009     // Tmp vars
1010     CPacked_seg::TDim row;
1011     CPacked_seg::TNumseg seg;
1012 
1013     // Ids
1014     CPacked_seg::TIds& ids = ps->SetIds();
1015     ids.resize(dim);
1016     for (row = 0;  row < dim;  ++row) {
1017         ids[row].Reset(new CSeq_id);
1018         SerialAssign<CSeq_id>(*ids[row], anchored_aln.GetId(dim - row - 1)->GetSeqId());
1019     }
1020 
1021     // Lens
1022     CPacked_seg::TLens& lens = ps->SetLens();
1023     lens.resize(numseg);
1024     TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
1025     for (seg = 0; seg < numseg; ++seg, ++seg_i) {
1026         lens[seg] = seg_i->GetLength();
1027     }
1028 
1029     int matrix_size = dim * numseg;
1030 
1031     // Present
1032     CPacked_seg::TPresent& present = ps->SetPresent();
1033     present.resize(matrix_size);
1034 
1035     // Strands (just resize, will set while setting starts)
1036     CPacked_seg::TStrands& strands = ps->SetStrands();
1037     strands.resize(matrix_size, eNa_strand_unknown);
1038 
1039     // Starts and strands
1040     CPacked_seg::TStarts& starts = ps->SetStarts();
1041     starts.resize(matrix_size, 0);
1042     for (row = 0;  row < dim;  ++row) {
1043         seg = 0;
1044         int matrix_row_pos = row;  // optimization to eliminate multiplication
1045         seg_i = anchor_segments.begin();
1046         CPairwiseAln::TAlnRngColl::const_iterator aln_rng_i = pairwises[dim - row - 1]->begin();
1047         bool direct = aln_rng_i->IsDirect();
1048         TSignedSeqPos left_delta = 0;
1049         TSignedSeqPos right_delta = aln_rng_i->GetLength();
1050         while (seg_i != anchor_segments.end()) {
1051             _ASSERT(seg < numseg);
1052             _ASSERT(matrix_row_pos == row + dim * seg);
1053             strands[matrix_row_pos] = (direct ? eNa_strand_plus : eNa_strand_minus);
1054             if (aln_rng_i != pairwises[dim - row - 1]->end()  &&
1055                 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
1056                 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
1057                 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
1058                     NCBI_THROW(CAlnException, eInternalFailure,
1059                                "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
1060                 }
1061 
1062                 // dec right_delta
1063                 _ASSERT(right_delta >= seg_i->GetLength());
1064                 if (right_delta < seg_i->GetLength()) {
1065                     NCBI_THROW(CAlnException, eInternalFailure,
1066                                "right_delta < seg_i->GetLength()");
1067                 }
1068                 right_delta -= seg_i->GetLength();
1069 
1070                 CPacked_seg::TStarts::value_type start = (direct ?
1071                     aln_rng_i->GetSecondFrom() + left_delta
1072                     : aln_rng_i->GetSecondFrom() + right_delta);
1073                 starts[matrix_row_pos] = start;
1074 
1075                 present[matrix_row_pos] = (start != kInvalidSeqPos);
1076 
1077                 // inc left_delta
1078                 left_delta += seg_i->GetLength();
1079 
1080                 if (right_delta == 0) {
1081                     _ASSERT(left_delta == aln_rng_i->GetLength());
1082                     ++aln_rng_i;
1083                     if (aln_rng_i != pairwises[dim - row - 1]->end()) {
1084                         direct = aln_rng_i->IsDirect();
1085                         left_delta = 0;
1086                         right_delta = aln_rng_i->GetLength();
1087                     }
1088                 }
1089             }
1090             ++seg_i;
1091             ++seg;
1092             matrix_row_pos += dim;
1093         }
1094     }
1095     return ps;
1096 }
1097 
1098 
1099 CRef<CPacked_seg>
CreatePackedsegFromPairwiseAln(const CPairwiseAln & pairwise_aln,CScope * scope)1100 CreatePackedsegFromPairwiseAln(const CPairwiseAln& pairwise_aln,
1101                                CScope* scope)
1102 {
1103     // Create a packed-seg
1104     CRef<CPacked_seg> ps(new CPacked_seg);
1105 
1106 
1107     // Determine dimensions
1108     CPacked_seg::TNumseg& numseg = ps->SetNumseg();
1109     numseg = pairwise_aln.size();
1110     ps->SetDim(2);
1111     int matrix_size = 2 * numseg;
1112 
1113     CPacked_seg::TLens& lens = ps->SetLens();
1114     lens.resize(numseg);
1115 
1116     CPacked_seg::TStarts& starts = ps->SetStarts();
1117     starts.resize(matrix_size, 0);
1118 
1119     CPacked_seg::TPresent& present = ps->SetPresent();
1120     present.resize(matrix_size, 0);
1121 
1122     CPacked_seg::TIds& ids = ps->SetIds();
1123     ids.resize(2);
1124 
1125     // Ids
1126     ids[0].Reset(new CSeq_id);
1127     SerialAssign<CSeq_id>(*ids[0], pairwise_aln.GetFirstId()->GetSeqId());
1128     ids[1].Reset(new CSeq_id);
1129     SerialAssign<CSeq_id>(*ids[1], pairwise_aln.GetSecondId()->GetSeqId());
1130 
1131 
1132     // Tmp vars
1133     CPacked_seg::TNumseg seg = 0;
1134     int matrix_pos = 0;
1135 
1136     // Main loop to set all values
1137     ITERATE(CPairwiseAln::TAlnRngColl, aln_rng_i, pairwise_aln) {
1138         CPacked_seg::TStarts::value_type start = aln_rng_i->GetFirstFrom();
1139         present[matrix_pos] = (start != kInvalidSeqPos);
1140         starts[matrix_pos++] = start;
1141         if ( !aln_rng_i->IsDirect() ) {
1142             if ( !ps->IsSetStrands() ) {
1143                 ps->SetStrands().resize(matrix_size, eNa_strand_plus);
1144             }
1145             ps->SetStrands()[matrix_pos] = eNa_strand_minus;
1146         }
1147         start = aln_rng_i->GetSecondFrom();
1148         present[matrix_pos] = (start != kInvalidSeqPos);
1149         starts[matrix_pos++] = start;
1150         lens[seg++] = aln_rng_i->GetLength();
1151     }
1152     _ASSERT(matrix_pos == matrix_size);
1153     _ASSERT(seg == numseg);
1154 
1155     return ps;
1156 }
1157 
1158 
1159 CRef<CSeq_align>
ConvertSeq_align(const CSeq_align & src,CSeq_align::TSegs::E_Choice dst_choice,CSeq_align::TDim anchor_row,CScope * scope)1160 ConvertSeq_align(const CSeq_align& src,
1161                  CSeq_align::TSegs::E_Choice dst_choice,
1162                  CSeq_align::TDim anchor_row,
1163                  CScope* scope)
1164 {
1165     TScopeAlnSeqIdConverter id_conv(scope);
1166     TScopeIdExtract id_extract(id_conv);
1167     TScopeAlnIdMap aln_id_map(id_extract, 1);
1168     TAlnSeqIdVec ids;
1169     id_extract(src, ids);
1170     aln_id_map.push_back(src);
1171 
1172     TScopeAlnStats aln_stats(aln_id_map);
1173     CAlnUserOptions aln_user_options;
1174     CRef<CAnchoredAln> anchored_aln =
1175         CreateAnchoredAlnFromAln(aln_stats, 0, aln_user_options, anchor_row);
1176 
1177     return CreateSeqAlignFromAnchoredAln(*anchored_aln, dst_choice, scope);
1178 }
1179 
1180 
1181 END_NCBI_SCOPE
1182