1 /* $Id: aln_generators.cpp 601341 2020-02-05 20:27:33Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Kamen Todorov, NCBI
27 *
28 * File Description:
29 * Alignment generators
30 *
31 * ===========================================================================
32 */
33
34
35 #include <ncbi_pch.hpp>
36
37 #include <objects/seqalign/Dense_seg.hpp>
38 #include <objects/seqalign/Std_seg.hpp>
39 #include <objects/seqalign/Seq_align_set.hpp>
40 #include <objects/seqalign/Dense_diag.hpp>
41 #include <objects/seqalign/Sparse_seg.hpp>
42 #include <objects/seqalign/Spliced_seg.hpp>
43 #include <objects/seqalign/Spliced_exon.hpp>
44 #include <objects/seqalign/Spliced_exon_chunk.hpp>
45 #include <objects/seqalign/Product_pos.hpp>
46 #include <objects/seqalign/Prot_pos.hpp>
47 #include <objmgr/scope.hpp>
48 #include <objmgr/bioseq_handle.hpp>
49
50 #include <objects/seqloc/Seq_loc.hpp>
51 #include <objects/seqloc/Seq_id.hpp>
52
53 #include <objtools/alnmgr/aln_generators.hpp>
54 #include <objtools/alnmgr/alnexception.hpp>
55 #include <objtools/alnmgr/aln_serial.hpp>
56 #include <objtools/alnmgr/aln_converters.hpp>
57
58 #define USE_RANGE_SET
59 #ifdef USE_RANGE_SET
60 # include <util/range_set.hpp>
61 # define CRangeCollection CRangeSet
62 #else
63 # include <util/range_coll.hpp>
64 #endif
65
66 #include <serial/typeinfo.hpp> // for SerialAssign
67
68 BEGIN_NCBI_SCOPE
69 USING_SCOPE(objects);
70
71
72 CRef<CSeq_align>
CreateSeqAlignFromAnchoredAln(const CAnchoredAln & anchored_aln,CSeq_align::TSegs::E_Choice choice,CScope * scope)73 CreateSeqAlignFromAnchoredAln(const CAnchoredAln& anchored_aln,
74 CSeq_align::TSegs::E_Choice choice,
75 CScope* scope)
76 {
77 CRef<CSeq_align> sa(new CSeq_align);
78 sa->SetType(CSeq_align::eType_not_set);
79 sa->SetDim(anchored_aln.GetDim());
80
81 switch(choice) {
82 case CSeq_align::TSegs::e_Dendiag:
83 CreateDense_diagFromAnchoredAln(sa->SetSegs().SetDendiag(), anchored_aln, scope);
84 break;
85 case CSeq_align::TSegs::e_Denseg:
86 sa->SetSegs().SetDenseg(*CreateDensegFromAnchoredAln(anchored_aln, scope));
87 break;
88 case CSeq_align::TSegs::e_Std:
89 break;
90 case CSeq_align::TSegs::e_Packed:
91 sa->SetSegs().SetPacked(*CreatePackedsegFromAnchoredAln(anchored_aln, scope));
92 break;
93 case CSeq_align::TSegs::e_Disc:
94 sa->SetSegs().SetDisc(*CreateAlignSetFromAnchoredAln(anchored_aln, scope));
95 break;
96 case CSeq_align::TSegs::e_Spliced:
97 sa->SetSegs().SetSpliced(*CreateSplicedsegFromAnchoredAln(anchored_aln, scope));
98 break;
99 case CSeq_align::TSegs::e_Sparse:
100 break;
101 case CSeq_align::TSegs::e_not_set:
102 NCBI_THROW(CAlnException, eInvalidRequest,
103 "Invalid CSeq_align::TSegs type.");
104 break;
105 }
106 return sa;
107 }
108
109
110 CRef<CSeq_align>
CreateSeqAlignFromPairwiseAln(const CPairwiseAln & pairwise_aln,CSeq_align::TSegs::E_Choice choice,CScope * scope)111 CreateSeqAlignFromPairwiseAln(const CPairwiseAln& pairwise_aln,
112 CSeq_align::TSegs::E_Choice choice,
113 CScope* scope)
114 {
115 CRef<CSeq_align> sa(new CSeq_align);
116 sa->SetType(CSeq_align::eType_not_set);
117 sa->SetDim(2);
118
119 switch(choice) {
120 case CSeq_align::TSegs::e_Denseg:
121 sa->SetSegs().SetDenseg(*CreateDensegFromPairwiseAln(pairwise_aln, scope));
122 break;
123 case CSeq_align::TSegs::e_Disc:
124 sa->SetSegs().SetDisc(*CreateAlignSetFromPairwiseAln(pairwise_aln, scope));
125 break;
126 case CSeq_align::TSegs::e_Packed:
127 sa->SetSegs().SetPacked(*CreatePackedsegFromPairwiseAln(pairwise_aln, scope));
128 break;
129 case CSeq_align::TSegs::e_Spliced:
130 sa->SetSegs().SetSpliced(*CreateSplicedsegFromPairwiseAln(pairwise_aln, scope));
131 break;
132 case CSeq_align::TSegs::e_Std:
133 case CSeq_align::TSegs::e_Dendiag:
134 case CSeq_align::TSegs::e_Sparse:
135 case CSeq_align::TSegs::e_not_set:
136 NCBI_THROW(CAlnException, eInvalidRequest,
137 "Unsupported CSeq_align::TSegs type.");
138 break;
139 }
140 return sa;
141 }
142
143
144 //#define _TRACE_CSegmentedRangeCollection
145 #ifdef _TRACE_CSegmentedRangeCollection
operator <<(ostream & out,const CRangeCollection<CPairwiseAln::TPos> & segmetned_range_coll)146 ostream& operator<<(ostream& out, const CRangeCollection<CPairwiseAln::TPos>& segmetned_range_coll)
147 {
148 out << "CRangeCollection<CPairwiseAln::TPos>" << endl;
149
150 ITERATE (CRangeCollection<CPairwiseAln::TPos>, rng_it, segmetned_range_coll) {
151 out << (CPairwiseAln::TRng)*rng_it << endl;
152 }
153 return out << endl;
154 }
155 #endif
156
157 class CSegmentedRangeCollection : public CRangeCollection<CPairwiseAln::TPos>
158 {
159 public:
160 typedef ncbi::CRangeCollection<CPairwiseAln::TPos> TParent;
161
CutAtPosition(position_type pos)162 void CutAtPosition(position_type pos) {
163 if ( pos > 0 ) {
164 DivideAfter(pos-1);
165 }
166 }
167
insert(const TRange & r)168 void insert(const TRange& r) {
169 #ifdef _TRACE_CSegmentedRangeCollection
170 cerr << "=====================" << endl;
171 cerr << "Original:" << *this;
172 cerr << "Inserting: " << endl << (CPairwiseAln::TRng)r << endl << endl;
173 #endif
174 // Cut
175 CutAtPosition(r.GetFrom());
176 CutAtPosition(r.GetToOpen());
177 #ifdef _TRACE_CSegmentedRangeCollection
178 cerr << "After the cut:" << *this << endl;
179 #endif
180
181 // Find the diff if any
182 TParent addition;
183 addition += r;
184 addition -= *this;
185
186 if ( !addition.empty() ) {
187 #ifdef _TRACE_CSegmentedRangeCollection
188 cerr << "Addition: " << addition << endl;
189 #endif
190 // Insert the diff
191 #ifndef USE_RANGE_SET
192 iterator it = find_nc(addition.begin()->GetToOpen());
193 ITERATE(TParent, add_it, addition) {
194 TRange rr(add_it->GetFrom(), add_it->GetTo());
195 while (it != TParent::m_vRanges.end() &&
196 rr.GetFrom() >= it->GetFrom()) {
197 ++it;
198 }
199 it = TParent::m_vRanges.insert(it, rr);
200 ++it;
201 }
202 #else
203 CombineWithAndKeepAbutting(addition);
204 #endif
205 }
206 #ifdef _TRACE_CSegmentedRangeCollection
207 else {
208 cerr << "No addition." << endl << endl;
209 }
210 #endif
211 #ifdef _TRACE_CSegmentedRangeCollection
212 cerr << "Result: " << *this;
213 cerr << "=====================" << endl << endl;
214 #endif
215 }
216 };
217
218
219 void
CreateDense_diagFromAnchoredAln(CSeq_align::TSegs::TDendiag & dd,const CAnchoredAln & anchored_aln,CScope * scope)220 CreateDense_diagFromAnchoredAln(CSeq_align::TSegs::TDendiag& dd,
221 const CAnchoredAln& anchored_aln,
222 CScope* scope)
223 {
224 const CAnchoredAln::TPairwiseAlnVector& pairwises = anchored_aln.GetPairwiseAlns();
225
226 typedef CSegmentedRangeCollection TAnchorSegments;
227 TAnchorSegments anchor_segments;
228 ITERATE(CAnchoredAln::TPairwiseAlnVector, pairwise_aln_i, pairwises) {
229 ITERATE (CPairwiseAln::TAlnRngColl, rng_i, **pairwise_aln_i) {
230 anchor_segments.insert(CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
231 }
232 }
233
234 CSeq_align::TSegs::TDendiag diags;
235 CDense_diag::TDim dim = anchored_aln.GetDim();
236 for ( auto anchor_segments_seg = anchor_segments.begin(); anchor_segments_seg != anchor_segments.end(); ++anchor_segments_seg ) {
237 CRef<CDense_diag> diag(new CDense_diag);
238 diag->SetDim(dim);
239 diag->SetIds().resize(dim);
240 for (int row = 0; row < dim; ++row) {
241 CRef<CSeq_id> id(new CSeq_id);
242 id->Assign(anchored_aln.GetId(dim - row - 1)->GetSeqId());
243 diag->SetIds()[row] = id;
244 }
245 diag->SetStarts().resize(dim, kInvalidSeqPos);
246 diag->SetStrands().resize(dim);
247 diag->SetLen(anchor_segments_seg->GetLength());
248 diags.push_back(diag);
249 }
250
251 for (int row = 0; row < dim; ++row) {
252 size_t seg = 0;
253 CSeq_align::TSegs::TDendiag::iterator diag_it = diags.begin();
254
255 TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
256 CPairwiseAln::TAlnRngColl::const_iterator aln_rng_i =
257 pairwises[dim - row - 1]->begin();
258 bool direct = aln_rng_i->IsDirect();
259 TSignedSeqPos left_delta = 0;
260 TSignedSeqPos right_delta = aln_rng_i->GetLength();
261 while (seg_i != anchor_segments.end()) {
262 if (aln_rng_i != pairwises[dim - row - 1]->end() &&
263 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
264 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
265 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
266 NCBI_THROW(CAlnException, eInternalFailure,
267 "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
268 }
269
270 // dec right_delta
271 _ASSERT(right_delta >= seg_i->GetLength());
272 if (right_delta < seg_i->GetLength()) {
273 NCBI_THROW(CAlnException, eInternalFailure,
274 "right_delta < seg_i->GetLength()");
275 }
276 right_delta -= seg_i->GetLength();
277
278 (*diag_it)->SetStarts()[row] =
279 (direct ?
280 aln_rng_i->GetSecondFrom() + left_delta :
281 aln_rng_i->GetSecondFrom() + right_delta);
282
283 // inc left_delta
284 left_delta += seg_i->GetLength();
285
286 if (right_delta == 0) {
287 _ASSERT(left_delta == aln_rng_i->GetLength());
288 ++aln_rng_i;
289 if (aln_rng_i != pairwises[dim - row - 1]->end()) {
290 direct = aln_rng_i->IsDirect();
291 left_delta = 0;
292 right_delta = aln_rng_i->GetLength();
293 }
294 }
295 }
296 (*diag_it)->SetStrands()[row] =
297 (direct ? eNa_strand_plus : eNa_strand_minus);
298 ++seg_i;
299 ++seg;
300 ++diag_it;
301 }
302 }
303 // Cleanup: remove rows with gaps, remove one-row diags.
304 NON_CONST_ITERATE(CSeq_align::TSegs::TDendiag, diag_it, diags) {
305 size_t row = 0;
306 CDense_diag& diag = **diag_it;
307 CDense_diag::TStarts& starts = diag.SetStarts();
308 while (row < starts.size()) {
309 if (starts[row] == kInvalidSeqPos) {
310 starts.erase(starts.begin() + row);
311 CDense_diag::TIds& ids = diag.SetIds();
312 ids.erase(ids.begin() + row);
313 CDense_diag::TStrands& strands = diag.SetStrands();
314 strands.erase(strands.begin() + row);
315 continue;
316 }
317 ++row;
318 }
319 if (diag.GetStarts().size() < 2) continue;
320 diag.SetDim(starts.size());
321 #if _DEBUG
322 diag.Validate();
323 #endif
324 dd.push_back(*diag_it);
325 }
326 }
327
328
329 CRef<CDense_seg>
CreateDensegFromAnchoredAln(const CAnchoredAln & anchored_aln,CScope * scope)330 CreateDensegFromAnchoredAln(const CAnchoredAln& anchored_aln,
331 CScope* scope)
332 {
333 const CAnchoredAln::TPairwiseAlnVector& pairwises = anchored_aln.GetPairwiseAlns();
334
335 typedef CSegmentedRangeCollection TAnchorSegments;
336 TAnchorSegments anchor_segments;
337 ITERATE(CAnchoredAln::TPairwiseAlnVector, pairwise_aln_i, pairwises) {
338 ITERATE (CPairwiseAln::TAlnRngColl, rng_i, **pairwise_aln_i) {
339 anchor_segments.insert(CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
340 }
341 }
342
343 // Create a dense-seg
344 CRef<CDense_seg> ds(new CDense_seg);
345
346 // Determine dimensions
347 CDense_seg::TNumseg& numseg = ds->SetNumseg();
348 numseg = anchor_segments.size();
349 CDense_seg::TDim& dim = ds->SetDim();
350 dim = anchored_aln.GetDim();
351
352 // Tmp vars
353 CDense_seg::TDim row;
354 CDense_seg::TNumseg seg;
355
356 // Ids
357 CDense_seg::TIds& ids = ds->SetIds();
358 ids.resize(dim);
359 for (row = 0; row < dim; ++row) {
360 ids[row].Reset(new CSeq_id);
361 SerialAssign<CSeq_id>(*ids[row], anchored_aln.GetId(dim - row - 1)->GetSeqId());
362 }
363
364 // Lens
365 CDense_seg::TLens& lens = ds->SetLens();
366 lens.resize(numseg);
367 TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
368 for (seg = 0; seg < numseg; ++seg, ++seg_i) {
369 lens[seg] = seg_i->GetLength();
370 }
371
372 int matrix_size = dim * numseg;
373
374 // Strands (just resize, will set while setting starts)
375 CDense_seg::TStrands& strands = ds->SetStrands();
376 strands.resize(matrix_size, eNa_strand_unknown);
377
378 // Starts and strands
379 CDense_seg::TStarts& starts = ds->SetStarts();
380 starts.resize(matrix_size, -1);
381 for (row = 0; row < dim; ++row) {
382 seg = 0;
383 int matrix_row_pos = row; // optimization to eliminate multiplication
384 seg_i = anchor_segments.begin();
385 CPairwiseAln::TAlnRngColl::const_iterator aln_rng_i = pairwises[dim - row - 1]->begin();
386 bool direct = aln_rng_i->IsDirect();
387 TSignedSeqPos left_delta = 0;
388 TSignedSeqPos right_delta = aln_rng_i->GetLength();
389 while (seg_i != anchor_segments.end()) {
390 _ASSERT(seg < numseg);
391 _ASSERT(matrix_row_pos == row + dim * seg);
392 if (aln_rng_i != pairwises[dim - row - 1]->end() &&
393 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
394 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
395 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
396 NCBI_THROW(CAlnException, eInternalFailure,
397 "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
398 }
399
400 // dec right_delta
401 _ASSERT(right_delta >= seg_i->GetLength());
402 if (right_delta < seg_i->GetLength()) {
403 NCBI_THROW(CAlnException, eInternalFailure,
404 "right_delta < seg_i->GetLength()");
405 }
406 right_delta -= seg_i->GetLength();
407
408 starts[matrix_row_pos] =
409 (direct ?
410 aln_rng_i->GetSecondFrom() + left_delta :
411 aln_rng_i->GetSecondFrom() + right_delta);
412
413 // inc left_delta
414 left_delta += seg_i->GetLength();
415
416 if (right_delta == 0) {
417 _ASSERT(left_delta == aln_rng_i->GetLength());
418 ++aln_rng_i;
419 if (aln_rng_i != pairwises[dim - row - 1]->end()) {
420 direct = aln_rng_i->IsDirect();
421 left_delta = 0;
422 right_delta = aln_rng_i->GetLength();
423 }
424 }
425 }
426 strands[matrix_row_pos] = (direct ? eNa_strand_plus : eNa_strand_minus);
427 ++seg_i;
428 ++seg;
429 matrix_row_pos += dim;
430 }
431 }
432 #if _DEBUG
433 ds->Validate(true);
434 #endif
435 return ds;
436 }
437
438
439 CRef<CDense_seg>
CreateDensegFromPairwiseAln(const CPairwiseAln & pairwise_aln,CScope * scope)440 CreateDensegFromPairwiseAln(const CPairwiseAln& pairwise_aln,
441 CScope* scope)
442 {
443 // Create a dense-seg
444 CRef<CDense_seg> ds(new CDense_seg);
445
446
447 // Determine dimensions
448 CDense_seg::TNumseg& numseg = ds->SetNumseg();
449 numseg = pairwise_aln.size();
450 ds->SetDim(2);
451 int matrix_size = 2 * numseg;
452
453 CDense_seg::TLens& lens = ds->SetLens();
454 lens.resize(numseg);
455
456 CDense_seg::TStarts& starts = ds->SetStarts();
457 starts.resize(matrix_size, -1);
458
459 CDense_seg::TIds& ids = ds->SetIds();
460 ids.resize(2);
461
462
463 // Ids
464 ids[0].Reset(new CSeq_id);
465 SerialAssign<CSeq_id>(*ids[0], pairwise_aln.GetFirstId()->GetSeqId());
466 ids[1].Reset(new CSeq_id);
467 SerialAssign<CSeq_id>(*ids[1], pairwise_aln.GetSecondId()->GetSeqId());
468
469
470 // Tmp vars
471 CDense_seg::TNumseg seg = 0;
472 int matrix_pos = 0;
473
474
475 // Main loop to set all values
476 ITERATE(CPairwiseAln::TAlnRngColl, aln_rng_i, pairwise_aln) {
477 starts[matrix_pos++] = aln_rng_i->GetFirstFrom();
478 if ( !aln_rng_i->IsDirect() ) {
479 if ( !ds->IsSetStrands() ) {
480 ds->SetStrands().resize(matrix_size, eNa_strand_plus);
481 }
482 ds->SetStrands()[matrix_pos] = eNa_strand_minus;
483 }
484 starts[matrix_pos++] = aln_rng_i->GetSecondFrom();
485 lens[seg++] = aln_rng_i->GetLength();
486 }
487 _ASSERT(matrix_pos == matrix_size);
488 _ASSERT(seg == numseg);
489
490
491 #ifdef _DEBUG
492 ds->Validate(true);
493 #endif
494 return ds;
495 }
496
497
498 static const TSignedSeqPos kMaxSplicedExonIndelLength = 15;
499
InitSplicedsegFromPairwiseAln(CSpliced_seg & spliced_seg,const CPairwiseAln & pairwise_aln,CScope * scope)500 void InitSplicedsegFromPairwiseAln(CSpliced_seg& spliced_seg,
501 const CPairwiseAln& pairwise_aln,
502 CScope* scope)
503 {
504 // Sort by product positions (if minus strand, then reverse).
505 // Make sure genomic positions are sorted in the corresponding direction too, no overlaps etc.
506 // Small one-row gap -> indel.
507 // Large genomic gap -> intron (start new exon).
508 // Other gaps -> intron, set 'partial' on both sides.
509 // If the product does not start/end at the sequence extreme, set 'partial' for the exon.
510
511 // Check strands - one per row.
512 _ASSERT((pairwise_aln.GetFlags() & CPairwiseAln::fMixedDir) != CPairwiseAln::fMixedDir);
513 // Product is nuc or prot.
514 _ASSERT(pairwise_aln.GetFirstBaseWidth() == 1 ||
515 pairwise_aln.GetFirstBaseWidth() == 3);
516 bool prot = pairwise_aln.GetFirstBaseWidth() == 3;
517 // The other row is genomic.
518 _ASSERT(pairwise_aln.GetSecondBaseWidth() == 1);
519
520 // Ids
521 CRef<CSeq_id> product_id(new CSeq_id);
522 product_id->Assign(pairwise_aln.GetFirstId()->GetSeqId());
523 spliced_seg.SetProduct_id(*product_id);
524 CRef<CSeq_id> genomic_id(new CSeq_id);
525 genomic_id->Assign(pairwise_aln.GetSecondId()->GetSeqId());
526 spliced_seg.SetGenomic_id(*genomic_id);
527
528 // Product type
529 spliced_seg.SetProduct_type(prot ?
530 CSpliced_seg::eProduct_type_protein
531 : CSpliced_seg::eProduct_type_transcript);
532
533 // Exons
534 CSpliced_seg::TExons& exons = spliced_seg.SetExons();
535
536 typedef TSignedSeqPos TPos;
537 typedef CRange<TPos> TRng;
538
539 TPos last_prod_end = 0;
540 TPos last_gen_end = 0;
541 CRef<CSpliced_exon> exon;
542 CPairwiseAln::TAlnRngColl::const_iterator rg_it = pairwise_aln.begin();
543 bool gen_direct = rg_it == pairwise_aln.end() || rg_it->IsDirect();
544 bool prod_direct = prot ||
545 rg_it == pairwise_aln.end() || rg_it->IsFirstDirect();
546 // Adjust genomic strand - in CPairwiseAln it's relative to the first seq.
547 if ( !prod_direct ) {
548 gen_direct = !gen_direct;
549 }
550
551 TRng ex_prod_rg;
552 TRng ex_gen_rg;
553
554 // Main loop to set all values
555 ITERATE(CPairwiseAln::TAlnRngColl, rg_it, pairwise_aln) {
556 const CPairwiseAln::TAlnRng& rg = *rg_it;
557
558 // Unaligned ranges.
559 TPos prod_skip, gen_skip;
560 if (rg_it == pairwise_aln.begin()) {
561 prod_skip = 0;
562 gen_skip = 0;
563 }
564 else {
565 prod_skip = rg.GetFirstFrom() - last_prod_end;
566 gen_skip = gen_direct == prod_direct ?
567 rg.GetSecondFrom() - last_gen_end
568 : last_gen_end - rg.GetSecondToOpen();
569 }
570
571 // Break exon, ignore long gaps between exons.
572 if (!exon || prod_skip > kMaxSplicedExonIndelLength ||
573 gen_skip > kMaxSplicedExonIndelLength) {
574 if ( exon ) {
575 _ASSERT(exon->IsSetProduct_start());
576 _ASSERT(exon->IsSetGenomic_start());
577 _ASSERT(exon->IsSetProduct_end());
578 _ASSERT(exon->IsSetGenomic_end());
579 if ( prod_direct ) {
580 exons.push_back(exon);
581 }
582 else {
583 exons.push_front(exon);
584 }
585 exon.Reset();
586 ex_prod_rg = TRng::GetEmpty();
587 ex_gen_rg = TRng::GetEmpty();
588 }
589 prod_skip = 0;
590 gen_skip = 0;
591 }
592
593 if (prod_skip > 0 || gen_skip > 0) {
594 _ASSERT(exon);
595 TSeqPos mismatch = min(prod_skip, gen_skip);
596 if (mismatch > 0) {
597 CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
598 chunk->SetMismatch(mismatch);
599 if ( prod_direct ) {
600 exon->SetParts().push_back(chunk);
601 }
602 else {
603 exon->SetParts().push_front(chunk);
604 }
605 prod_skip -= mismatch;
606 gen_skip -= mismatch;
607 }
608 if (prod_skip > 0) {
609 CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
610 chunk->SetProduct_ins(prod_skip);
611 if ( prod_direct ) {
612 exon->SetParts().push_back(chunk);
613 }
614 else {
615 exon->SetParts().push_front(chunk);
616 }
617 }
618 if (gen_skip > 0) {
619 CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
620 chunk->SetGenomic_ins(gen_skip);
621 if ( prod_direct ) {
622 exon->SetParts().push_back(chunk);
623 }
624 else {
625 exon->SetParts().push_front(chunk);
626 }
627 }
628 prod_skip = 0;
629 gen_skip = 0;
630 }
631
632 if ( !exon ) {
633 // Start new exon
634 exon.Reset(new CSpliced_exon);
635 // The first exon must start at 0 or be partial.
636 if (exons.empty() && rg.GetFirstFrom() > 0) {
637 exon->SetPartial(true);
638 }
639 if ( !prot ) {
640 exon->SetProduct_strand(prod_direct
641 ? eNa_strand_plus : eNa_strand_minus);
642 }
643 exon->SetGenomic_strand(gen_direct
644 ? eNa_strand_plus : eNa_strand_minus);
645 }
646 // Aligned chunk
647 CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
648 chunk->SetMatch(rg.GetLength());
649 if ( prod_direct ) {
650 exon->SetParts().push_back(chunk);
651 }
652 else {
653 exon->SetParts().push_front(chunk);
654 }
655
656 // Update exon extremes
657 ex_prod_rg.CombineWith(TRng(rg.GetFirstFrom(), rg.GetFirstTo()));
658 ex_gen_rg.CombineWith(TRng(rg.GetSecondFrom(), rg.GetSecondTo()));
659 if ( exon ) {
660 if (prot) {
661 exon->SetProduct_start().SetProtpos().SetAmin(ex_prod_rg.GetFrom() / 3);
662 exon->SetProduct_start().SetProtpos().SetFrame(ex_prod_rg.GetFrom() % 3 + 1);
663 exon->SetProduct_end().SetProtpos().SetAmin(ex_prod_rg.GetTo() / 3);
664 exon->SetProduct_end().SetProtpos().SetFrame(ex_prod_rg.GetTo() % 3 + 1);
665 } else {
666 exon->SetProduct_start().SetNucpos(ex_prod_rg.GetFrom());
667 exon->SetProduct_end().SetNucpos(ex_prod_rg.GetTo());
668 }
669 exon->SetGenomic_start(ex_gen_rg.GetFrom());
670 exon->SetGenomic_end(ex_gen_rg.GetTo());
671 }
672
673 last_prod_end = rg.GetFirstToOpen();
674 last_gen_end = gen_direct == prod_direct ?
675 rg.GetSecondToOpen() : rg.GetSecondFrom();
676 }
677 if ( exon ) {
678 _ASSERT(exon->IsSetProduct_start());
679 _ASSERT(exon->IsSetGenomic_start());
680 _ASSERT(exon->IsSetProduct_end());
681 _ASSERT(exon->IsSetGenomic_end());
682 if ( prod_direct ) {
683 exons.push_back(exon);
684 }
685 else {
686 exons.push_front(exon);
687 }
688 }
689 else if ( !exons.empty() ) {
690 // Get the last added exon.
691 exon = prod_direct ? exons.front() : exons.back();
692 }
693
694 // Check if the last exon ends at product end.
695 if (exon && scope) {
696 TSeqPos prod_end = 0;
697 if ( exon->GetProduct_end().IsNucpos() ) {
698 prod_end = exon->GetProduct_end().GetNucpos();
699 }
700 else {
701 prod_end = exon->GetProduct_end().GetProtpos().GetAmin();
702 }
703 CBioseq_Handle h = scope->GetBioseqHandle(*product_id);
704 if ( h ) {
705 TSeqPos prod_len = h.GetBioseqLength();
706 if (prod_len != kInvalidSeqPos && prod_len != prod_end + 1) {
707 exon->SetPartial(true);
708 }
709 }
710 }
711
712 #ifdef _DEBUG
713 spliced_seg.Validate(true);
714 #endif
715 }
716
717
718 CRef<CSpliced_seg>
CreateSplicedsegFromAnchoredAln(const CAnchoredAln & anchored_aln,CScope * scope)719 CreateSplicedsegFromAnchoredAln(const CAnchoredAln& anchored_aln,
720 CScope* scope)
721 {
722 _ASSERT(anchored_aln.GetDim() == 2);
723
724 // Sort by product positions (if minus strand, then reverse).
725 // Make sure genomic positions are sorted in the corresponding direction too, no overlaps etc.
726 // Small one-row gap -> indel.
727 // Large genomic gap -> intron (start new exon).
728 // Other gaps -> intron, set 'partial' on both sides.
729 // If the product does not start/end at the sequence extreme, set 'partial' for the exon.
730
731 // Create a spliced_seg
732 CRef<CSpliced_seg> spliced_seg(new CSpliced_seg);
733 CAnchoredAln::TDim anchor_row = anchored_aln.GetAnchorRow();
734 const CPairwiseAln& pairwise = *anchored_aln.GetPairwiseAlns()[1 - anchor_row];
735 InitSplicedsegFromPairwiseAln(*spliced_seg, pairwise, scope);
736 return spliced_seg;
737 }
738
739
740 CRef<CSpliced_seg>
CreateSplicedsegFromPairwiseAln(const CPairwiseAln & pairwise_aln,CScope * scope)741 CreateSplicedsegFromPairwiseAln(const CPairwiseAln& pairwise_aln,
742 CScope* scope)
743 {
744 // Create a dense-seg
745 CRef<CSpliced_seg> ss(new CSpliced_seg);
746 InitSplicedsegFromPairwiseAln(*ss, pairwise_aln, scope);
747 return ss;
748 }
749
750
s_TranslatePairwise(CPairwiseAln & out_pw,const CPairwiseAln & pw,const CPairwiseAln & tr)751 void s_TranslatePairwise(
752 CPairwiseAln& out_pw, // output pairwise (needs to be empty)
753 const CPairwiseAln& pw, // input pairwise to translate from
754 const CPairwiseAln& tr) // translating pairwise
755 {
756 ITERATE (CPairwiseAln, it, pw) {
757 CPairwiseAln::TAlnRng ar = *it;
758 ar.SetFirstFrom(tr.GetSecondPosByFirstPos(ar.GetFirstFrom()));
759 if (ar.GetFirstFrom() < 0) continue; // skip unaligned ranges
760 out_pw.insert(ar);
761 }
762 }
763
764
765 typedef CAnchoredAln::TDim TDim;
766
767
CreateSeqAlignFromEachPairwiseAln(const CAnchoredAln::TPairwiseAlnVector pairwises,TDim anchor,vector<CRef<CSeq_align>> & out_seqaligns,CSeq_align::TSegs::E_Choice choice,CScope * scope)768 void CreateSeqAlignFromEachPairwiseAln(
769 const CAnchoredAln::TPairwiseAlnVector pairwises,
770 TDim anchor,
771 vector<CRef<CSeq_align> >& out_seqaligns,
772 CSeq_align::TSegs::E_Choice choice,
773 CScope* scope)
774 {
775 out_seqaligns.resize(pairwises.size() - 1);
776 for (TDim row = 0, sa_idx = 0;
777 row < (TDim) pairwises.size();
778 ++row) {
779 if (row == anchor) continue;
780 CRef<CSeq_align> sa(new CSeq_align);
781 sa->SetType(CSeq_align::eType_partial);
782 sa->SetDim(2);
783
784 const CPairwiseAln& pw = *pairwises[row];
785 CRef<CPairwiseAln> p(new CPairwiseAln(pairwises[anchor]->GetSecondId(),
786 pw.GetSecondId(),
787 pw.GetFlags()));
788 s_TranslatePairwise(*p, pw, *pairwises[anchor]);
789
790 switch(choice) {
791 case CSeq_align::TSegs::e_Denseg: {
792 CRef<CDense_seg> ds = CreateDensegFromPairwiseAln(*p, scope);
793 sa->SetSegs().SetDenseg(*ds);
794 break;
795 }
796 case CSeq_align::TSegs::e_Disc: {
797 CRef<CSeq_align_set> disc = CreateAlignSetFromPairwiseAln(*p, scope);
798 sa->SetSegs().SetDisc(*disc);
799 break;
800 }
801 case CSeq_align::TSegs::e_Packed: {
802 CRef<CPacked_seg> ps = CreatePackedsegFromPairwiseAln(*p, scope);
803 sa->SetSegs().SetPacked(*ps);
804 break;
805 }
806 case CSeq_align::TSegs::e_Spliced: {
807 CRef<CSpliced_seg> ss = CreateSplicedsegFromPairwiseAln(*p, scope);
808 sa->SetSegs().SetSpliced(*ss);
809 break;
810 }
811 case CSeq_align::TSegs::e_Dendiag:
812 case CSeq_align::TSegs::e_Std:
813 case CSeq_align::TSegs::e_Sparse:
814 NCBI_THROW(CAlnException, eInvalidRequest,
815 "Unsupported CSeq_align::TSegs type.");
816 case CSeq_align::TSegs::e_not_set:
817 default:
818 NCBI_THROW(CAlnException, eInvalidRequest,
819 "Invalid CSeq_align::TSegs type.");
820 }
821 out_seqaligns[sa_idx++].Reset(sa);
822 }
823 }
824
825
826 CRef<CSeq_align_set>
CreateAlignSetFromAnchoredAln(const CAnchoredAln & anchored_aln,CScope * scope)827 CreateAlignSetFromAnchoredAln(const CAnchoredAln& anchored_aln,
828 CScope* scope)
829 {
830 CRef<CSeq_align_set> disc(new CSeq_align_set);
831
832 const CAnchoredAln::TPairwiseAlnVector& pairwises = anchored_aln.GetPairwiseAlns();
833
834 typedef CSegmentedRangeCollection TAnchorSegments;
835 TAnchorSegments anchor_segments;
836 ITERATE(CAnchoredAln::TPairwiseAlnVector, pairwise_aln_i, pairwises) {
837 ITERATE (CPairwiseAln::TAlnRngColl, rng_i, **pairwise_aln_i) {
838 anchor_segments.insert(CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
839 }
840 }
841
842 CDense_seg::TDim row;
843 CDense_seg::TNumseg seg;
844
845 // Determine dimensions
846 CDense_seg::TNumseg numseg = anchor_segments.size();
847 CDense_seg::TDim dim = anchored_aln.GetDim();
848
849 vector< CRef<CDense_seg> > dsegs;
850 dsegs.resize(numseg);
851 for (size_t i = 0; i < dsegs.size(); ++i) {
852 dsegs[i].Reset(new CDense_seg);
853 CDense_seg& dseg = *dsegs[i];
854 dseg.SetDim(dim);
855 dseg.SetNumseg(1);
856 // Ids
857 CDense_seg::TIds& ids = dseg.SetIds();
858 ids.resize(dim);
859 for (row = 0; row < dim; ++row) {
860 ids[row].Reset(new CSeq_id);
861 SerialAssign<CSeq_id>(*ids[row], anchored_aln.GetId(dim - row - 1)->GetSeqId());
862 }
863 // Lens, strands, starts - just prepare the storage
864 dseg.SetLens().resize(1);
865 dseg.SetStrands().resize(dim, eNa_strand_unknown);
866 dseg.SetStarts().resize(dim, -1);
867 }
868
869 for (row = 0; row < dim; ++row) {
870 seg = 0;
871 TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
872 CPairwiseAln::TAlnRngColl::const_iterator aln_rng_i = pairwises[dim - row - 1]->begin();
873 bool direct = aln_rng_i->IsDirect();
874 TSignedSeqPos left_delta = 0;
875 TSignedSeqPos right_delta = aln_rng_i->GetLength();
876 while (seg_i != anchor_segments.end()) {
877 _ASSERT(seg < numseg);
878 CDense_seg& dseg = *dsegs[seg];
879 dseg.SetLens()[0] = seg_i->GetLength();
880 CDense_seg::TStarts& starts = dseg.SetStarts();
881
882 dseg.SetStrands()[row] = (direct ? eNa_strand_plus : eNa_strand_minus);
883 if (aln_rng_i != pairwises[dim - row - 1]->end() &&
884 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
885 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
886 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
887 NCBI_THROW(CAlnException, eInternalFailure,
888 "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
889 }
890
891 // dec right_delta
892 _ASSERT(right_delta >= seg_i->GetLength());
893 if (right_delta < seg_i->GetLength()) {
894 NCBI_THROW(CAlnException, eInternalFailure,
895 "right_delta < seg_i->GetLength()");
896 }
897 right_delta -= seg_i->GetLength();
898
899 starts[row] =
900 (direct ?
901 aln_rng_i->GetSecondFrom() + left_delta :
902 aln_rng_i->GetSecondFrom() + right_delta);
903
904 // inc left_delta
905 left_delta += seg_i->GetLength();
906
907 if (right_delta == 0) {
908 _ASSERT(left_delta == aln_rng_i->GetLength());
909 ++aln_rng_i;
910 if (aln_rng_i != pairwises[dim - row - 1]->end()) {
911 direct = aln_rng_i->IsDirect();
912 left_delta = 0;
913 right_delta = aln_rng_i->GetLength();
914 }
915 }
916 }
917 // Add only densegs with both rows non-empty
918 if (starts[0] >= 0 && starts[1] >= 0) {
919 CRef<CSeq_align> seg_aln(new CSeq_align);
920 seg_aln->SetType(CSeq_align::eType_not_set);
921 seg_aln->SetDim(dim);
922 disc->Set().push_back(seg_aln);
923 seg_aln->SetSegs().SetDenseg(dseg);
924 }
925 ++seg_i;
926 ++seg;
927 }
928 }
929
930 return disc;
931 }
932
933
934 CRef<CSeq_align_set>
CreateAlignSetFromPairwiseAln(const CPairwiseAln & pairwise_aln,CScope * scope)935 CreateAlignSetFromPairwiseAln(const CPairwiseAln& pairwise_aln,
936 CScope* scope)
937 {
938 CRef<CSeq_align_set> disc(new CSeq_align_set);
939
940 CDense_seg::TNumseg numseg = pairwise_aln.size();
941
942 vector< CRef<CDense_seg> > dsegs;
943 dsegs.resize(numseg);
944 for (size_t i = 0; i < dsegs.size(); ++i) {
945 CRef<CSeq_align> seg_aln(new CSeq_align);
946 seg_aln->SetType(CSeq_align::eType_not_set);
947 seg_aln->SetDim(2);
948 disc->Set().push_back(seg_aln);
949 CDense_seg& dseg = seg_aln->SetSegs().SetDenseg();
950 dsegs[i].Reset(&dseg);
951 dseg.SetDim(2);
952 dseg.SetNumseg(1);
953 // Ids
954 CDense_seg::TIds& ids = dseg.SetIds();
955 ids.resize(2);
956 ids[0].Reset(new CSeq_id);
957 SerialAssign<CSeq_id>(*ids[0], pairwise_aln.GetFirstId()->GetSeqId());
958 ids[1].Reset(new CSeq_id);
959 SerialAssign<CSeq_id>(*ids[1], pairwise_aln.GetSecondId()->GetSeqId());
960 // Lens, strands, starts - just prepare the storage
961 dseg.SetLens().resize(1);
962 dseg.SetStrands().resize(2, eNa_strand_unknown);
963 dseg.SetStarts().resize(2, -1);
964 }
965
966 // Main loop to set all values
967 CDense_seg::TNumseg seg = 0;
968 ITERATE(CPairwiseAln::TAlnRngColl, aln_rng_i, pairwise_aln) {
969 CDense_seg& dseg = *dsegs[seg];
970 dseg.SetStarts()[0] = aln_rng_i->GetFirstFrom();
971 if ( !aln_rng_i->IsDirect() ) {
972 if ( !dseg.IsSetStrands() ) {
973 dseg.SetStrands().resize(2, eNa_strand_plus);
974 }
975 dseg.SetStrands()[1] = eNa_strand_minus;
976 }
977 dseg.SetStarts()[1] = aln_rng_i->GetSecondFrom();
978 dseg.SetLens()[0] = aln_rng_i->GetLength();
979 seg++;
980 }
981
982 return disc;
983 }
984
985
986 CRef<CPacked_seg>
CreatePackedsegFromAnchoredAln(const CAnchoredAln & anchored_aln,CScope * scope)987 CreatePackedsegFromAnchoredAln(const CAnchoredAln& anchored_aln,
988 CScope* scope)
989 {
990 const CAnchoredAln::TPairwiseAlnVector& pairwises = anchored_aln.GetPairwiseAlns();
991
992 typedef CSegmentedRangeCollection TAnchorSegments;
993 TAnchorSegments anchor_segments;
994 ITERATE(CAnchoredAln::TPairwiseAlnVector, pairwise_aln_i, pairwises) {
995 ITERATE (CPairwiseAln::TAlnRngColl, rng_i, **pairwise_aln_i) {
996 anchor_segments.insert(CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
997 }
998 }
999
1000 // Create a packed-seg
1001 CRef<CPacked_seg> ps(new CPacked_seg);
1002
1003 // Determine dimensions
1004 CPacked_seg::TNumseg& numseg = ps->SetNumseg();
1005 numseg = anchor_segments.size();
1006 CPacked_seg::TDim& dim = ps->SetDim();
1007 dim = anchored_aln.GetDim();
1008
1009 // Tmp vars
1010 CPacked_seg::TDim row;
1011 CPacked_seg::TNumseg seg;
1012
1013 // Ids
1014 CPacked_seg::TIds& ids = ps->SetIds();
1015 ids.resize(dim);
1016 for (row = 0; row < dim; ++row) {
1017 ids[row].Reset(new CSeq_id);
1018 SerialAssign<CSeq_id>(*ids[row], anchored_aln.GetId(dim - row - 1)->GetSeqId());
1019 }
1020
1021 // Lens
1022 CPacked_seg::TLens& lens = ps->SetLens();
1023 lens.resize(numseg);
1024 TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
1025 for (seg = 0; seg < numseg; ++seg, ++seg_i) {
1026 lens[seg] = seg_i->GetLength();
1027 }
1028
1029 int matrix_size = dim * numseg;
1030
1031 // Present
1032 CPacked_seg::TPresent& present = ps->SetPresent();
1033 present.resize(matrix_size);
1034
1035 // Strands (just resize, will set while setting starts)
1036 CPacked_seg::TStrands& strands = ps->SetStrands();
1037 strands.resize(matrix_size, eNa_strand_unknown);
1038
1039 // Starts and strands
1040 CPacked_seg::TStarts& starts = ps->SetStarts();
1041 starts.resize(matrix_size, 0);
1042 for (row = 0; row < dim; ++row) {
1043 seg = 0;
1044 int matrix_row_pos = row; // optimization to eliminate multiplication
1045 seg_i = anchor_segments.begin();
1046 CPairwiseAln::TAlnRngColl::const_iterator aln_rng_i = pairwises[dim - row - 1]->begin();
1047 bool direct = aln_rng_i->IsDirect();
1048 TSignedSeqPos left_delta = 0;
1049 TSignedSeqPos right_delta = aln_rng_i->GetLength();
1050 while (seg_i != anchor_segments.end()) {
1051 _ASSERT(seg < numseg);
1052 _ASSERT(matrix_row_pos == row + dim * seg);
1053 strands[matrix_row_pos] = (direct ? eNa_strand_plus : eNa_strand_minus);
1054 if (aln_rng_i != pairwises[dim - row - 1]->end() &&
1055 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
1056 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
1057 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
1058 NCBI_THROW(CAlnException, eInternalFailure,
1059 "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
1060 }
1061
1062 // dec right_delta
1063 _ASSERT(right_delta >= seg_i->GetLength());
1064 if (right_delta < seg_i->GetLength()) {
1065 NCBI_THROW(CAlnException, eInternalFailure,
1066 "right_delta < seg_i->GetLength()");
1067 }
1068 right_delta -= seg_i->GetLength();
1069
1070 CPacked_seg::TStarts::value_type start = (direct ?
1071 aln_rng_i->GetSecondFrom() + left_delta
1072 : aln_rng_i->GetSecondFrom() + right_delta);
1073 starts[matrix_row_pos] = start;
1074
1075 present[matrix_row_pos] = (start != kInvalidSeqPos);
1076
1077 // inc left_delta
1078 left_delta += seg_i->GetLength();
1079
1080 if (right_delta == 0) {
1081 _ASSERT(left_delta == aln_rng_i->GetLength());
1082 ++aln_rng_i;
1083 if (aln_rng_i != pairwises[dim - row - 1]->end()) {
1084 direct = aln_rng_i->IsDirect();
1085 left_delta = 0;
1086 right_delta = aln_rng_i->GetLength();
1087 }
1088 }
1089 }
1090 ++seg_i;
1091 ++seg;
1092 matrix_row_pos += dim;
1093 }
1094 }
1095 return ps;
1096 }
1097
1098
1099 CRef<CPacked_seg>
CreatePackedsegFromPairwiseAln(const CPairwiseAln & pairwise_aln,CScope * scope)1100 CreatePackedsegFromPairwiseAln(const CPairwiseAln& pairwise_aln,
1101 CScope* scope)
1102 {
1103 // Create a packed-seg
1104 CRef<CPacked_seg> ps(new CPacked_seg);
1105
1106
1107 // Determine dimensions
1108 CPacked_seg::TNumseg& numseg = ps->SetNumseg();
1109 numseg = pairwise_aln.size();
1110 ps->SetDim(2);
1111 int matrix_size = 2 * numseg;
1112
1113 CPacked_seg::TLens& lens = ps->SetLens();
1114 lens.resize(numseg);
1115
1116 CPacked_seg::TStarts& starts = ps->SetStarts();
1117 starts.resize(matrix_size, 0);
1118
1119 CPacked_seg::TPresent& present = ps->SetPresent();
1120 present.resize(matrix_size, 0);
1121
1122 CPacked_seg::TIds& ids = ps->SetIds();
1123 ids.resize(2);
1124
1125 // Ids
1126 ids[0].Reset(new CSeq_id);
1127 SerialAssign<CSeq_id>(*ids[0], pairwise_aln.GetFirstId()->GetSeqId());
1128 ids[1].Reset(new CSeq_id);
1129 SerialAssign<CSeq_id>(*ids[1], pairwise_aln.GetSecondId()->GetSeqId());
1130
1131
1132 // Tmp vars
1133 CPacked_seg::TNumseg seg = 0;
1134 int matrix_pos = 0;
1135
1136 // Main loop to set all values
1137 ITERATE(CPairwiseAln::TAlnRngColl, aln_rng_i, pairwise_aln) {
1138 CPacked_seg::TStarts::value_type start = aln_rng_i->GetFirstFrom();
1139 present[matrix_pos] = (start != kInvalidSeqPos);
1140 starts[matrix_pos++] = start;
1141 if ( !aln_rng_i->IsDirect() ) {
1142 if ( !ps->IsSetStrands() ) {
1143 ps->SetStrands().resize(matrix_size, eNa_strand_plus);
1144 }
1145 ps->SetStrands()[matrix_pos] = eNa_strand_minus;
1146 }
1147 start = aln_rng_i->GetSecondFrom();
1148 present[matrix_pos] = (start != kInvalidSeqPos);
1149 starts[matrix_pos++] = start;
1150 lens[seg++] = aln_rng_i->GetLength();
1151 }
1152 _ASSERT(matrix_pos == matrix_size);
1153 _ASSERT(seg == numseg);
1154
1155 return ps;
1156 }
1157
1158
1159 CRef<CSeq_align>
ConvertSeq_align(const CSeq_align & src,CSeq_align::TSegs::E_Choice dst_choice,CSeq_align::TDim anchor_row,CScope * scope)1160 ConvertSeq_align(const CSeq_align& src,
1161 CSeq_align::TSegs::E_Choice dst_choice,
1162 CSeq_align::TDim anchor_row,
1163 CScope* scope)
1164 {
1165 TScopeAlnSeqIdConverter id_conv(scope);
1166 TScopeIdExtract id_extract(id_conv);
1167 TScopeAlnIdMap aln_id_map(id_extract, 1);
1168 TAlnSeqIdVec ids;
1169 id_extract(src, ids);
1170 aln_id_map.push_back(src);
1171
1172 TScopeAlnStats aln_stats(aln_id_map);
1173 CAlnUserOptions aln_user_options;
1174 CRef<CAnchoredAln> anchored_aln =
1175 CreateAnchoredAlnFromAln(aln_stats, 0, aln_user_options, anchor_row);
1176
1177 return CreateSeqAlignFromAnchoredAln(*anchored_aln, dst_choice, scope);
1178 }
1179
1180
1181 END_NCBI_SCOPE
1182