1 /* $Id: align_shadow.cpp 586001 2019-05-08 14:35:02Z chetvern $
2 * ===========================================================================
3 *
4 * public DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Yuri Kapustin
27 *
28 * File Description:
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33 #include <algo/align/util/align_shadow.hpp>
34 #include <objects/seqalign/Std_seg.hpp>
35 #include <objects/seqloc/Seq_loc.hpp>
36 #include <objects/seqloc/Seq_interval.hpp>
37
38 #include <algorithm>
39 #include <numeric>
40
41 #include <math.h>
42
43 BEGIN_NCBI_SCOPE
44
45 namespace {
46
47 const CAlignShadow::TCoord g_UndefCoord (
48 numeric_limits<CAlignShadow::TCoord>::max());
49
50 typedef pair<objects::ENa_strand, objects::ENa_strand> TStrands;
51
52
RetrieveStdStrands(const objects::CSeq_align & seq_align)53 TStrands RetrieveStdStrands(const objects::CSeq_align& seq_align)
54 {
55 USING_SCOPE(objects);
56
57 const CSeq_align::TSegs::TStd & std_list (seq_align.GetSegs().GetStd());
58 const CStd_seg & stdseg (*(std_list.front()));
59 const CStd_seg::TLoc & locs (stdseg.GetLoc());
60 return TStrands(locs[0]->GetStrand(), locs[1]->GetStrand());
61 }
62
63 }
64
CAlignShadow(const objects::CSeq_align & seq_align,bool save_xcript)65 CAlignShadow::CAlignShadow(const objects::CSeq_align& seq_align, bool save_xcript)
66 {
67 USING_SCOPE(objects);
68
69 if (seq_align.CheckNumRows() != 2) {
70 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
71 "Pairwise seq-align required to init align-shadow");
72 }
73
74 const bool is_denseg (seq_align.GetSegs().IsDenseg());
75 const bool is_stdseg (seq_align.GetSegs().IsStd());
76 if(!is_denseg && !is_stdseg) {
77 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
78 "Must be a dense-seg or std_seg to init align-shadow");
79 }
80
81 m_Id.first.Reset(&seq_align.GetSeq_id(0));
82 m_Id.second.Reset(&seq_align.GetSeq_id(1));
83
84 ENa_strand qstrand (eNa_strand_unknown);
85 ENa_strand sstrand (eNa_strand_unknown);
86
87 if(is_denseg) {
88 qstrand = seq_align.GetSeqStrand(0);
89 sstrand = seq_align.GetSeqStrand(1);
90 }
91 else {
92 const TStrands strands (RetrieveStdStrands(seq_align));
93 qstrand = strands.first;
94 sstrand = strands.second;
95 }
96
97 if(qstrand == eNa_strand_minus) {
98 m_Box[1] = seq_align.GetSeqStart(0);
99 m_Box[0] = seq_align.GetSeqStop(0);
100 }
101 else {
102 m_Box[0] = seq_align.GetSeqStart(0);
103 m_Box[1] = seq_align.GetSeqStop(0);
104 }
105
106 if(sstrand == eNa_strand_minus) {
107 m_Box[3] = seq_align.GetSeqStart(1);
108 m_Box[2] = seq_align.GetSeqStop(1);
109 }
110 else {
111 m_Box[2] = seq_align.GetSeqStart(1);
112 m_Box[3] = seq_align.GetSeqStop(1);
113 }
114
115 if(save_xcript) {
116
117 if(is_denseg) {
118
119 // compile edit transcript treating diags as matches
120 const CDense_seg & ds (seq_align.GetSegs().GetDenseg());
121 const CDense_seg::TStarts& starts (ds.GetStarts());
122 const CDense_seg::TLens& lens (ds.GetLens());
123 const CDense_seg::TStrands& strands = ds.GetStrands();
124
125 const char indel[2] = {'D', 'I'};
126 TSignedSeqPos next[2];
127 {
128 for (size_t j: {0,1}) {
129 size_t i = j;
130 while(i < starts.size() && starts[i]==-1) i += 2;
131 next[j] = starts[i];
132 if (strands[i] == eNa_strand_minus) {
133 next[j] += lens[i/2];
134 }
135 }
136 }
137
138 size_t i (0);
139 for (size_t ii_lens = 0; ii_lens < lens.size(); ++ii_lens) {
140 char c;
141
142 for (size_t j: {0,1}) {
143 if (starts[i+j] != -1) {
144 if (strands[i+j] == eNa_strand_minus) {
145 if (next[j] != starts[i+j]+lens[ii_lens]) {
146 m_Transcript.push_back(indel[j]);
147 m_Transcript.append(NStr::NumericToString(next[j]-starts[i+j]-lens[ii_lens]));
148 }
149 next[j] = starts[i+j];
150 } else {
151 if (next[j] != starts[i+j]) {
152 m_Transcript.push_back(indel[j]);
153 m_Transcript.append(NStr::NumericToString(starts[i+j]-next[j]));
154 }
155 next[j] = starts[i+j] + lens[ii_lens];
156 }
157 }
158 }
159
160 if(starts[i] < 0) {
161 c = 'I';
162 }
163 else if(starts[i+1] < 0) {
164 c = 'D';
165 }
166 else {
167 c = 'M';
168 }
169 m_Transcript.push_back(c);
170 if(lens[ii_lens] > 1) {
171 m_Transcript.append(NStr::NumericToString(lens[ii_lens]));
172 }
173 i += 2;
174 }
175 }
176 else {
177 NCBI_THROW(CAlgoAlignUtilException,
178 eInternal,
179 "CAlignShadow(): save_xcript mode not supported "
180 "for seq-align segments other than dense-seg.");
181 }
182 }
183 }
184
185
CAlignShadow(void)186 CAlignShadow::CAlignShadow(void)
187 {
188 m_Box[0] = m_Box[1] = m_Box[2] = m_Box[3] = g_UndefCoord;
189 }
190
191
CAlignShadow(const TId & idquery,TCoord qstart,bool qstrand,const TId & idsubj,TCoord sstart,bool sstrand,const string & xcript)192 CAlignShadow::CAlignShadow(const TId& idquery, TCoord qstart, bool qstrand,
193 const TId& idsubj, TCoord sstart, bool sstrand,
194 const string& xcript)
195 {
196 m_Id.first = idquery;
197 m_Id.second = idsubj;
198
199 m_Box[0] = qstart;
200 m_Box[2] = sstart;
201
202 TCoord q = qstart, q0 = q, s = sstart, s0 = s;
203 const int qinc (qstrand? 1: -1), sinc (sstrand? 1: -1);
204
205 ITERATE(string, ii, xcript) {
206
207 switch(*ii) {
208
209 case 'M': case 'R':
210 q0 = q;
211 q += qinc;
212 s0 = s;
213 s += sinc;
214 break;
215
216 case 'I':
217 s0 = s;
218 s += sinc;
219 break;
220
221 case 'D':
222 q0 = q;
223 q += qinc;
224 break;
225
226 default:
227 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
228 "CAlignShadow()::CAlignShadow(): "
229 "Unexpected transcript symbol found");
230 }
231 }
232
233 m_Box[1] = q0;
234 m_Box[3] = s0;
235
236 m_Transcript = s_RunLengthEncode(xcript);
237 }
238
239
operator <<(CNcbiOstream & os,const CAlignShadow & align_shadow)240 CNcbiOstream& operator << (CNcbiOstream& os, const CAlignShadow& align_shadow)
241 {
242 USING_SCOPE(objects);
243
244 os << align_shadow.GetId(0)->AsFastaString() << '\t'
245 << align_shadow.GetId(1)->AsFastaString() << '\t';
246
247 align_shadow.x_PartialSerialize(os);
248
249 return os;
250 }
251
252
253 //////////////////////////////////////////////////////////////////////////////
254 // getters and setters
255
256
GetId(Uint1 where) const257 const CAlignShadow::TId& CAlignShadow::GetId(Uint1 where) const
258 {
259 switch(where) {
260 case 0: return m_Id.first;
261 case 1: return m_Id.second;
262 default: {
263 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
264 "CAlignShadow::GetId() - argument out of range");
265 }
266 }
267 }
268
269
GetQueryId(void) const270 const CAlignShadow::TId& CAlignShadow::GetQueryId(void) const
271 {
272 return m_Id.first;
273 }
274
275
GetSubjId(void) const276 const CAlignShadow::TId& CAlignShadow::GetSubjId(void) const
277 {
278 return m_Id.second;
279 }
280
281
SetId(Uint1 where,const TId & id)282 void CAlignShadow::SetId(Uint1 where, const TId& id)
283 {
284 switch(where) {
285 case 0: m_Id.first = id; break;
286 case 1: m_Id.second = id; break;
287 default: {
288 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
289 "CAlignShadow::SetId() - argument out of range");
290 }
291 }
292 }
293
294
SetQueryId(const TId & id)295 void CAlignShadow::SetQueryId(const TId& id)
296 {
297 m_Id.first = id;
298 }
299
300
SetSubjId(const TId & id)301 void CAlignShadow::SetSubjId(const TId& id)
302 {
303 m_Id.second = id;
304 }
305
306
GetStrand(Uint1 where) const307 bool CAlignShadow::GetStrand(Uint1 where) const
308 {
309 #ifdef _DEBUG
310 if(0 != where && where != 1) {
311 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
312 "CAlignShadow::GetStrand() - argument out of range");
313 }
314 #endif
315
316 return where == 0? m_Box[0] <= m_Box[1]: m_Box[2] <= m_Box[3];
317 }
318
319
GetQueryStrand(void) const320 bool CAlignShadow::GetQueryStrand(void) const
321 {
322 return m_Box[0] <= m_Box[1];
323 }
324
325
326
GetSubjStrand(void) const327 bool CAlignShadow::GetSubjStrand(void) const
328 {
329 return m_Box[2] <= m_Box[3];
330 }
331
332
SetStrand(Uint1 where,bool strand)333 void CAlignShadow::SetStrand(Uint1 where, bool strand)
334 {
335 #ifdef _DEBUG
336 if(0 != where && where != 1) {
337 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
338 "CAlignShadow::SetStrand() - argument out of range");
339 }
340 #endif
341
342 const Uint1 i1 (where << 1), i2 (i1 + 1);
343
344 if(m_Box[i1] == g_UndefCoord || m_Box[i2] == g_UndefCoord) {
345 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
346 "CAlignShadow::SetStrand() -start and/or stop not yet set");
347 }
348
349 const bool cur_strand (GetStrand(where));
350 if(strand != cur_strand) {
351 swap(m_Box[i1], m_Box[i2]);
352 }
353 }
354
355
SetQueryStrand(bool strand)356 void CAlignShadow::SetQueryStrand(bool strand)
357 {
358 SetStrand(0, strand);
359 }
360
361
SetSubjStrand(bool strand)362 void CAlignShadow::SetSubjStrand(bool strand)
363 {
364 SetStrand(1, strand);
365 }
366
367
SwapQS(void)368 void CAlignShadow::SwapQS(void)
369 {
370 TCoord a = m_Box[0], b = m_Box[1];
371 m_Box[0] = m_Box[2];
372 m_Box[1] = m_Box[3];
373 m_Box[2] = a;
374 m_Box[3] = b;
375 TId id = GetQueryId();
376 SetQueryId(GetSubjId());
377 SetSubjId(id);
378 }
379
380
FlipStrands(void)381 void CAlignShadow::FlipStrands(void)
382 {
383 SetQueryStrand(!GetQueryStrand());
384 SetSubjStrand(!GetSubjStrand());
385 if(m_Transcript.size()) {
386 m_Transcript = s_RunLengthDecode(m_Transcript);
387 reverse(m_Transcript.begin(), m_Transcript.end());
388 m_Transcript = s_RunLengthEncode(m_Transcript);
389 }
390 }
391
392
GetBox(void) const393 const CAlignShadow::TCoord* CAlignShadow::GetBox(void) const
394 {
395 return m_Box;
396 }
397
398
SetBox(const TCoord box[4])399 void CAlignShadow::SetBox(const TCoord box [4])
400 {
401 copy(box, box + 4, m_Box);
402 }
403
404
GetStart(Uint1 where) const405 CAlignShadow::TCoord CAlignShadow::GetStart(Uint1 where) const
406 {
407 #ifdef _DEBUG
408 if(0 != where && where != 1) {
409 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
410 "CAlignShadow::GetStart() - argument out of range");
411 }
412 #endif
413
414 return m_Box[where << 1];
415 }
416
417
GetStop(Uint1 where) const418 CAlignShadow::TCoord CAlignShadow::GetStop(Uint1 where) const
419 {
420 #ifdef _DEBUG
421 if(0 != where && where != 1) {
422 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
423 "CAlignShadow::GetStop() - argument out of range");
424 }
425 #endif
426
427 return m_Box[(where << 1) | 1];
428 }
429
430
GetQueryStart(void) const431 CAlignShadow::TCoord CAlignShadow::GetQueryStart(void) const
432 {
433 return m_Box[0];
434 }
435
436
GetQueryStop(void) const437 CAlignShadow::TCoord CAlignShadow::GetQueryStop(void) const
438 {
439 return m_Box[1];
440 }
441
442
GetSubjStart(void) const443 CAlignShadow::TCoord CAlignShadow::GetSubjStart(void) const
444 {
445 return m_Box[2];
446 }
447
448
GetSubjStop(void) const449 CAlignShadow::TCoord CAlignShadow::GetSubjStop(void) const
450 {
451 return m_Box[3];
452 }
453
454
SetStart(Uint1 where,TCoord val)455 void CAlignShadow::SetStart(Uint1 where, TCoord val)
456 {
457 #ifdef _DEBUG
458 if(0 != where && where != 1) {
459 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
460 "CAlignShadow::GetStart() - argument out of range");
461 }
462 #endif
463
464 m_Box[(where << 1) | 1] = val;
465 }
466
467
SetStop(Uint1 where,TCoord val)468 void CAlignShadow::SetStop(Uint1 where, TCoord val)
469 {
470 #ifdef _DEBUG
471 if(0 != where && where != 1) {
472 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
473 "CAlignShadow::GetStop() - argument out of range");
474 }
475 #endif
476
477 m_Box[where << 1] = val;
478 }
479
480
SetQueryStart(TCoord val)481 void CAlignShadow::SetQueryStart(TCoord val)
482 {
483 m_Box[0] = val;
484 }
485
486
SetQueryStop(TCoord val)487 void CAlignShadow::SetQueryStop(TCoord val)
488 {
489 m_Box[1] = val;
490 }
491
492
SetSubjStart(TCoord val)493 void CAlignShadow::SetSubjStart(TCoord val)
494 {
495 m_Box[2] = val;
496 }
497
498
SetSubjStop(TCoord val)499 void CAlignShadow::SetSubjStop(TCoord val)
500 {
501 m_Box[3] = val;
502 }
503
504
505 // // // //
506
507
GetMin(Uint1 where) const508 CAlignShadow::TCoord CAlignShadow::GetMin(Uint1 where) const
509 {
510 #ifdef _DEBUG
511 if(0 != where && where != 1) {
512 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
513 "CAlignShadow::GetMin() - argument out of range");
514 }
515 #endif
516
517 Uint1 i1 = where << 1, i2 = i1 + 1;
518 return min(m_Box[i1], m_Box[i2]);
519 }
520
521
GetMax(Uint1 where) const522 CAlignShadow::TCoord CAlignShadow::GetMax(Uint1 where) const
523 {
524 #ifdef _DEBUG
525 if(0 != where && where != 1) {
526 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
527 "CAlignShadow::GetMax() - argument out of range");
528 }
529 #endif
530
531 Uint1 i1 = where << 1, i2 = i1 + 1;
532 return max(m_Box[i1], m_Box[i2]);
533 }
534
535
SetMin(Uint1 where,TCoord val)536 void CAlignShadow::SetMin(Uint1 where, TCoord val)
537 {
538 #ifdef _DEBUG
539 if(0 != where && where != 1) {
540 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
541 "CAlignShadow::SetMin() - argument out of range");
542 }
543 #endif
544
545 const Uint1 i1 (where << 1), i2 (i1 + 1);
546
547 if(m_Box[i1] == g_UndefCoord || m_Box[i2] == g_UndefCoord) {
548 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
549 "CAlignShadow::SetMin() - start and/or stop not yet set");
550 }
551 else {
552
553 if(m_Box[i1] <= m_Box[i2] && val <= m_Box[i2]) {
554 m_Box[i1] = val;
555 }
556 else if(val <= m_Box[i1]) {
557 m_Box[i2] = val;
558 }
559 else {
560 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
561 "CAlignShadow::SetMin() - new position is invalid");
562 }
563 }
564 }
565
566
567
SetMax(Uint1 where,TCoord val)568 void CAlignShadow::SetMax(Uint1 where, TCoord val)
569 {
570 #ifdef _DEBUG
571 if(0 != where && where != 1) {
572 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
573 "CAlignShadow::SetMax() - argument out of range");
574 }
575 #endif
576
577 const Uint1 i1 (where << 1), i2 (i1 + 1);
578
579 if(m_Box[i1] == g_UndefCoord || m_Box[i2] == g_UndefCoord) {
580 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
581 "CAlignShadow::SetMax() - start and/or stop not yet set");
582 }
583 else {
584
585 if(m_Box[i1] <= m_Box[i2] && val >= m_Box[i1]) {
586 m_Box[i2] = val;
587 }
588 else if(val >= m_Box[i2]) {
589 m_Box[i1] = val;
590 }
591 else {
592 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
593 "CAlignShadow::SetMax() - new position is invalid");
594 }
595 }
596 }
597
598
SetQueryMax(TCoord val)599 void CAlignShadow::SetQueryMax(TCoord val)
600 {
601 SetMax(0, val);
602 }
603
604
SetSubjMax(TCoord val)605 void CAlignShadow::SetSubjMax(TCoord val)
606 {
607 SetMax(1, val);
608 }
609
610
SetQueryMin(TCoord val)611 void CAlignShadow::SetQueryMin(TCoord val)
612 {
613 SetMin(0, val);
614 }
615
616
SetSubjMin(TCoord val)617 void CAlignShadow::SetSubjMin(TCoord val)
618 {
619 SetMin(1, val);
620 }
621
622
GetTranscript(void) const623 const CAlignShadow::TTranscript& CAlignShadow::GetTranscript(void) const
624 {
625 return m_Transcript;
626 }
627
628
629 /////////////////////////////////////////////////////////////////////////////
630 // partial serialization
631
x_PartialSerialize(CNcbiOstream & os) const632 void CAlignShadow::x_PartialSerialize(CNcbiOstream& os) const
633 {
634 os << GetQueryStart() + 1 << '\t' << GetQueryStop() + 1 << '\t'
635 << GetSubjStart() + 1 << '\t' << GetSubjStop() + 1;
636 if(m_Transcript.size() > 0) {
637 os << '\t' << m_Transcript;
638 }
639 }
640
641
GetQueryMin() const642 CAlignShadow::TCoord CAlignShadow::GetQueryMin() const
643 {
644 return min(m_Box[0], m_Box[1]);
645 }
646
647
GetSubjMin() const648 CAlignShadow::TCoord CAlignShadow::GetSubjMin() const
649 {
650 return min(m_Box[2], m_Box[3]);
651 }
652
653
GetQueryMax() const654 CAlignShadow::TCoord CAlignShadow::GetQueryMax() const
655 {
656 return max(m_Box[0], m_Box[1]);
657 }
658
659
GetSubjMax() const660 CAlignShadow::TCoord CAlignShadow::GetSubjMax() const
661 {
662 return max(m_Box[2], m_Box[3]);
663 }
664
665
GetQuerySpan(void) const666 CAlignShadow::TCoord CAlignShadow::GetQuerySpan(void) const
667 {
668 return 1 + GetQueryMax() - GetQueryMin();
669 }
670
671
GetSubjSpan(void) const672 CAlignShadow::TCoord CAlignShadow::GetSubjSpan(void) const
673 {
674 return 1 + GetSubjMax() - GetSubjMin();
675 }
676
677
Shift(Int4 shift_query,Int4 shift_subj)678 void CAlignShadow::Shift(Int4 shift_query, Int4 shift_subj)
679 {
680 m_Box[0] += shift_query;
681 m_Box[1] += shift_query;
682 m_Box[2] += shift_subj;
683 m_Box[3] += shift_subj;
684 }
685
686
Modify(Uint1 point,TCoord new_pos)687 void CAlignShadow::Modify(Uint1 point, TCoord new_pos)
688 {
689 TCoord qmin, qmax;
690 bool qstrand;
691 if(m_Box[0] < m_Box[1]) {
692 qmin = m_Box[0];
693 qmax = m_Box[1];
694 qstrand = true;
695 }
696 else {
697 qmin = m_Box[1];
698 qmax = m_Box[0];
699 qstrand = false;
700 }
701
702 TCoord smin, smax;
703 bool sstrand;
704 if(m_Box[2] < m_Box[3]) {
705 smin = m_Box[2];
706 smax = m_Box[3];
707 sstrand = true;
708 }
709 else {
710 smin = m_Box[3];
711 smax = m_Box[2];
712 sstrand = false;
713 }
714
715 bool newpos_invalid = false;
716 if(point <= 1) {
717 if(new_pos < qmin || new_pos > qmax) {
718 newpos_invalid = true;
719 }
720 }
721 else {
722 if(new_pos < smin || new_pos > smax) {
723 newpos_invalid = true;
724 }
725 }
726
727 if(newpos_invalid) {
728 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
729 "CAlignShadow::Modify(): requested new position invalid");
730 }
731
732 const bool same_strands = qstrand == sstrand;
733
734 TCoord q = 0, s = 0;
735
736 if(m_Transcript.size() > 0) {
737
738 q = GetQueryStart();
739 s = GetSubjStart();
740 Int1 dq = (qstrand? +1: -1), ds = (sstrand? +1: -1);
741 string xcript (s_RunLengthDecode(m_Transcript));
742
743 size_t n1 = 0;
744 bool need_trace = true;
745 if(point <= 1) {
746 if(q == new_pos) need_trace = false;
747 }
748 else {
749 if(s == new_pos) need_trace = false;
750 }
751
752 const bool point_is_start((point%2) ^ (GetStrand(point/2)? 1: 0));
753 if(need_trace) {
754
755 char c = 0;
756 ITERATE(TTranscript, ii, xcript) {
757
758 ++n1;
759 switch(c = *ii) {
760 case 'M': case 'R': q += dq; s += ds; break;
761 case 'D': q += dq; break;
762 case 'I': s += ds; break;
763 default: {
764 NCBI_THROW(CAlgoAlignUtilException, eInternal,
765 "CAlignShadow::Modify(): unexpected transcript symbol");
766 }
767 }
768
769 if(point_is_start) {
770 if(point <= 1) {
771 if(q == new_pos) break;
772 }
773 else {
774 if(s == new_pos) break;
775 }
776 }
777 else {
778 if(point <= 1) {
779 if(q == new_pos + dq) break;
780 }
781 else {
782 if(s == new_pos + ds) break;
783 }
784 }
785 }
786
787 if(!point_is_start && n1 > 0) {
788 switch(c) {
789 case 'M': case 'R': q -= dq; s -= ds; break;
790 case 'D': q -= dq; break;
791 case 'I': s -= ds; break;
792 }
793 }
794
795 }
796
797 switch(point) {
798 case 0: // query min
799 SetQueryMin(q);
800 if(same_strands) { SetSubjMin(s); } else { SetSubjMax(s); }
801 break;
802 case 1: // query max
803 SetQueryMax(q);
804 if(same_strands) { SetSubjMax(s); } else { SetSubjMin(s); }
805 break;
806 case 2: // subj min
807 SetSubjMin(s);
808 if(same_strands) { SetQueryMin(q); } else { SetQueryMax(q); }
809 break;
810 case 3: // subj max
811 SetSubjMax(s);
812 if(same_strands) { SetQueryMax(q); } else { SetQueryMin(q); }
813 break;
814 default:
815 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
816 "CAlignShadow::Modify(): Invalid end point requested.");
817 }
818
819 if(n1 > 0) {
820 if( point_is_start ) {
821 xcript = xcript.substr(n1, xcript.size() - n1);
822 }
823 else {
824 xcript.resize(n1);
825 }
826 m_Transcript = s_RunLengthEncode(xcript);
827 }
828 }
829 else {
830
831 //note that with the following logic calculated delta might exceed hit length
832 //example query [0-6] subj [307-288] new_pos=307 point=2
833 //leads to a disaster. The hit becomes
834 // 0 4294967295 307 307
835 // In the situation like that delta must be adjusted
836 TCoord qlen = 1 + qmax - qmin, slen = 1 + smax - smin;
837 double k = double(qlen) / slen;
838 Int4 delta_q, delta_s;
839 switch(point) {
840
841 case 0: // query min
842
843 delta_q = new_pos - qmin;
844 delta_s = Int4(round(delta_q / k));
845 if( smin + delta_s > smax ) {
846 delta_s = smax - smin;
847 }
848
849 SetQueryMin(qmin + delta_q);
850 if(same_strands) {
851 SetSubjMin(smin + delta_s);
852 }
853 else {
854 SetSubjMax(smax - delta_s);
855 }
856
857 break;
858
859 case 1: // query max
860
861 delta_q = qmax - new_pos;
862 delta_s = Int4(round(delta_q / k));
863 if( smin + delta_s > smax ) {
864 delta_s = smax - smin;
865 }
866
867 SetQueryMax(qmax - delta_q);
868 if(same_strands) {
869 SetSubjMax(smax - delta_s);
870 }
871 else {
872 SetSubjMin(smin + delta_s);
873 }
874
875 break;
876
877 case 2: // subj min
878
879 delta_s = new_pos - smin;
880 delta_q = Int4(round(delta_s * k));
881 if( qmin + delta_q > qmax ) {
882 delta_q = qmax - qmin;
883 }
884
885 SetSubjMin(smin + delta_s);
886 if(same_strands) {
887 SetQueryMin(qmin + delta_q);
888 }
889 else {
890 SetQueryMax(qmax - delta_q);
891 }
892
893 break;
894
895 case 3: // subj max
896
897 delta_s = smax - new_pos;
898 delta_q = Int4(round(delta_s * k));
899 if( qmin + delta_q > qmax ) {
900 delta_q = qmax - qmin;
901 }
902
903 SetSubjMax(smax - delta_s);
904 if(same_strands) {
905 SetQueryMax(qmax - delta_q);
906 }
907 else {
908 SetQueryMin(qmin + delta_q);
909 }
910
911 break;
912
913 default:
914 NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
915 "CAlignShadow::Modify(): invalid end requested");
916 };
917 }
918 }
919
920
s_RunLengthEncode(const string & in)921 string CAlignShadow::s_RunLengthEncode(const string& in)
922 {
923 string out;
924 const size_t dim (in.size());
925 if(dim == 0) {
926 return kEmptyStr;
927 }
928
929 const char* p (in.data());
930 char c0 (p[0]);
931 out.append(1, c0);
932 size_t count (1);
933 for(size_t k (1); k < dim; ++k) {
934
935 char c (p[k]);
936 if(isdigit(c)) { // consider already encoded
937 return in;
938 }
939
940 if(c != c0) {
941 c0 = c;
942 if(count > 1) {
943 out += NStr::NumericToString(count);
944 }
945 count = 1;
946 out.append(1, c0);
947 }
948 else {
949 ++count;
950 }
951 }
952
953 if(count > 1) {
954 out += NStr::NumericToString(count);
955 }
956
957 return out;
958 }
959
960
s_RunLengthDecode(const string & in)961 string CAlignShadow::s_RunLengthDecode(const string& in)
962 {
963 string out;
964 char C = 0;
965 Uint4 N = 0;
966 ITERATE(string, ii, in) {
967
968 char c = *ii;
969 if('0' <= c && c <= '9') {
970 N = N * 10 + c - '0';
971 }
972 else {
973 if(N > 0) {
974 out.append(N - 1, C);
975 N = 0;
976 }
977 out.push_back(C = c);
978 }
979 }
980 if(N > 0) {
981 out.append(N - 1, C);
982 }
983 return out;
984 }
985
986
987 END_NCBI_SCOPE
988