1 /* $Id: align_shadow.cpp 586001 2019-05-08 14:35:02Z chetvern $
2 * ===========================================================================
3 *
4 *                            public DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Yuri Kapustin
27 *
28 * File Description:
29 *
30 */
31 
32 #include <ncbi_pch.hpp>
33 #include <algo/align/util/align_shadow.hpp>
34 #include <objects/seqalign/Std_seg.hpp>
35 #include <objects/seqloc/Seq_loc.hpp>
36 #include <objects/seqloc/Seq_interval.hpp>
37 
38 #include <algorithm>
39 #include <numeric>
40 
41 #include <math.h>
42 
43 BEGIN_NCBI_SCOPE
44 
45 namespace {
46 
47     const CAlignShadow::TCoord g_UndefCoord (
48         numeric_limits<CAlignShadow::TCoord>::max());
49 
50     typedef pair<objects::ENa_strand, objects::ENa_strand> TStrands;
51 
52 
RetrieveStdStrands(const objects::CSeq_align & seq_align)53 TStrands RetrieveStdStrands(const objects::CSeq_align& seq_align)
54 {
55     USING_SCOPE(objects);
56 
57     const CSeq_align::TSegs::TStd & std_list (seq_align.GetSegs().GetStd());
58     const CStd_seg & stdseg (*(std_list.front()));
59     const CStd_seg::TLoc & locs (stdseg.GetLoc());
60     return TStrands(locs[0]->GetStrand(), locs[1]->GetStrand());
61 }
62 
63 }
64 
CAlignShadow(const objects::CSeq_align & seq_align,bool save_xcript)65 CAlignShadow::CAlignShadow(const objects::CSeq_align& seq_align, bool save_xcript)
66 {
67     USING_SCOPE(objects);
68 
69     if (seq_align.CheckNumRows() != 2) {
70         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
71                    "Pairwise seq-align required to init align-shadow");
72     }
73 
74     const bool is_denseg (seq_align.GetSegs().IsDenseg());
75     const bool is_stdseg (seq_align.GetSegs().IsStd());
76     if(!is_denseg && !is_stdseg) {
77         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
78                    "Must be a dense-seg or std_seg to init align-shadow");
79     }
80 
81     m_Id.first.Reset(&seq_align.GetSeq_id(0));
82     m_Id.second.Reset(&seq_align.GetSeq_id(1));
83 
84     ENa_strand qstrand (eNa_strand_unknown);
85     ENa_strand sstrand (eNa_strand_unknown);
86 
87     if(is_denseg) {
88         qstrand = seq_align.GetSeqStrand(0);
89         sstrand = seq_align.GetSeqStrand(1);
90     }
91     else {
92         const TStrands strands (RetrieveStdStrands(seq_align));
93         qstrand = strands.first;
94         sstrand = strands.second;
95     }
96 
97     if(qstrand == eNa_strand_minus) {
98         m_Box[1] = seq_align.GetSeqStart(0);
99         m_Box[0] = seq_align.GetSeqStop(0);
100     }
101     else {
102         m_Box[0] = seq_align.GetSeqStart(0);
103         m_Box[1] = seq_align.GetSeqStop(0);
104     }
105 
106     if(sstrand == eNa_strand_minus) {
107         m_Box[3] = seq_align.GetSeqStart(1);
108         m_Box[2] = seq_align.GetSeqStop(1);
109     }
110     else {
111         m_Box[2] = seq_align.GetSeqStart(1);
112         m_Box[3] = seq_align.GetSeqStop(1);
113     }
114 
115     if(save_xcript) {
116 
117         if(is_denseg) {
118 
119             // compile edit transcript treating diags as matches
120             const CDense_seg & ds (seq_align.GetSegs().GetDenseg());
121             const CDense_seg::TStarts& starts (ds.GetStarts());
122             const CDense_seg::TLens& lens (ds.GetLens());
123             const CDense_seg::TStrands& strands = ds.GetStrands();
124 
125             const char indel[2] = {'D', 'I'};
126             TSignedSeqPos next[2];
127             {
128                 for (size_t j: {0,1}) {
129                     size_t i = j;
130                     while(i < starts.size() && starts[i]==-1) i += 2;
131                     next[j] = starts[i];
132                     if (strands[i] == eNa_strand_minus) {
133                         next[j] += lens[i/2];
134                     }
135                 }
136             }
137 
138             size_t i (0);
139             for (size_t ii_lens = 0; ii_lens < lens.size(); ++ii_lens) {
140                 char c;
141 
142                 for (size_t j: {0,1}) {
143                     if (starts[i+j] != -1) {
144                         if (strands[i+j] == eNa_strand_minus) {
145                             if (next[j] != starts[i+j]+lens[ii_lens]) {
146                                 m_Transcript.push_back(indel[j]);
147                                 m_Transcript.append(NStr::NumericToString(next[j]-starts[i+j]-lens[ii_lens]));
148                             }
149                             next[j] = starts[i+j];
150                         } else {
151                             if (next[j] != starts[i+j]) {
152                                 m_Transcript.push_back(indel[j]);
153                                 m_Transcript.append(NStr::NumericToString(starts[i+j]-next[j]));
154                             }
155                             next[j] = starts[i+j] + lens[ii_lens];
156                         }
157                     }
158                 }
159 
160                 if(starts[i] < 0) {
161                     c = 'I';
162                 }
163                 else if(starts[i+1] < 0) {
164                     c = 'D';
165                 }
166                 else {
167                     c = 'M';
168                 }
169                 m_Transcript.push_back(c);
170                 if(lens[ii_lens] > 1) {
171                     m_Transcript.append(NStr::NumericToString(lens[ii_lens]));
172                 }
173                 i += 2;
174             }
175         }
176         else {
177             NCBI_THROW(CAlgoAlignUtilException,
178                        eInternal,
179                        "CAlignShadow(): save_xcript mode not supported "
180                        "for seq-align segments other than dense-seg.");
181         }
182     }
183 }
184 
185 
CAlignShadow(void)186 CAlignShadow::CAlignShadow(void)
187 {
188     m_Box[0] = m_Box[1] = m_Box[2] = m_Box[3] = g_UndefCoord;
189 }
190 
191 
CAlignShadow(const TId & idquery,TCoord qstart,bool qstrand,const TId & idsubj,TCoord sstart,bool sstrand,const string & xcript)192 CAlignShadow::CAlignShadow(const TId& idquery, TCoord qstart, bool qstrand,
193                            const TId& idsubj, TCoord sstart, bool sstrand,
194                            const string& xcript)
195 {
196     m_Id.first  = idquery;
197     m_Id.second = idsubj;
198 
199     m_Box[0] = qstart;
200     m_Box[2] = sstart;
201 
202     TCoord q = qstart, q0 = q, s = sstart, s0 = s;
203     const int qinc (qstrand? 1: -1), sinc (sstrand? 1: -1);
204 
205     ITERATE(string, ii, xcript) {
206 
207         switch(*ii) {
208 
209         case 'M': case 'R':
210             q0 = q;
211             q += qinc;
212             s0 = s;
213             s += sinc;
214             break;
215 
216         case 'I':
217             s0 = s;
218             s += sinc;
219             break;
220 
221         case 'D':
222             q0 = q;
223             q += qinc;
224             break;
225 
226         default:
227             NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
228                        "CAlignShadow()::CAlignShadow(): "
229                        "Unexpected transcript symbol found");
230         }
231     }
232 
233     m_Box[1] = q0;
234     m_Box[3] = s0;
235 
236     m_Transcript = s_RunLengthEncode(xcript);
237 }
238 
239 
operator <<(CNcbiOstream & os,const CAlignShadow & align_shadow)240 CNcbiOstream& operator << (CNcbiOstream& os, const CAlignShadow& align_shadow)
241 {
242     USING_SCOPE(objects);
243 
244     os  << align_shadow.GetId(0)->AsFastaString() << '\t'
245         << align_shadow.GetId(1)->AsFastaString() << '\t';
246 
247     align_shadow.x_PartialSerialize(os);
248 
249     return os;
250 }
251 
252 
253 //////////////////////////////////////////////////////////////////////////////
254 // getters and  setters
255 
256 
GetId(Uint1 where) const257 const CAlignShadow::TId& CAlignShadow::GetId(Uint1 where) const
258 {
259     switch(where) {
260     case 0: return m_Id.first;
261     case 1: return m_Id.second;
262     default: {
263         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
264                    "CAlignShadow::GetId() - argument out of range");
265     }
266     }
267 }
268 
269 
GetQueryId(void) const270 const CAlignShadow::TId& CAlignShadow::GetQueryId(void) const
271 {
272     return m_Id.first;
273 }
274 
275 
GetSubjId(void) const276 const CAlignShadow::TId& CAlignShadow::GetSubjId(void) const
277 {
278     return m_Id.second;
279 }
280 
281 
SetId(Uint1 where,const TId & id)282 void CAlignShadow::SetId(Uint1 where, const TId& id)
283 {
284     switch(where) {
285     case 0: m_Id.first = id; break;
286     case 1: m_Id.second = id; break;
287     default: {
288         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
289                    "CAlignShadow::SetId() - argument out of range");
290     }
291     }
292 }
293 
294 
SetQueryId(const TId & id)295 void CAlignShadow::SetQueryId(const TId& id)
296 {
297     m_Id.first = id;
298 }
299 
300 
SetSubjId(const TId & id)301 void CAlignShadow::SetSubjId(const TId& id)
302 {
303     m_Id.second = id;
304 }
305 
306 
GetStrand(Uint1 where) const307 bool CAlignShadow::GetStrand(Uint1 where) const
308 {
309 #ifdef _DEBUG
310     if(0 != where && where != 1) {
311         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
312                    "CAlignShadow::GetStrand() - argument out of range");
313     }
314 #endif
315 
316     return where == 0? m_Box[0] <= m_Box[1]: m_Box[2] <= m_Box[3];
317 }
318 
319 
GetQueryStrand(void) const320 bool CAlignShadow::GetQueryStrand(void) const
321 {
322     return m_Box[0] <= m_Box[1];
323 }
324 
325 
326 
GetSubjStrand(void) const327 bool CAlignShadow::GetSubjStrand(void) const
328 {
329     return m_Box[2] <= m_Box[3];
330 }
331 
332 
SetStrand(Uint1 where,bool strand)333 void CAlignShadow::SetStrand(Uint1 where, bool strand)
334 {
335 #ifdef _DEBUG
336     if(0 != where && where != 1) {
337         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
338                    "CAlignShadow::SetStrand() - argument out of range");
339     }
340 #endif
341 
342     const Uint1 i1 (where << 1), i2 (i1 + 1);
343 
344     if(m_Box[i1] == g_UndefCoord || m_Box[i2] == g_UndefCoord) {
345         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
346                    "CAlignShadow::SetStrand() -start and/or stop not yet set");
347     }
348 
349     const bool cur_strand (GetStrand(where));
350     if(strand != cur_strand) {
351         swap(m_Box[i1], m_Box[i2]);
352     }
353 }
354 
355 
SetQueryStrand(bool strand)356 void CAlignShadow::SetQueryStrand(bool strand)
357 {
358     SetStrand(0, strand);
359 }
360 
361 
SetSubjStrand(bool strand)362 void CAlignShadow::SetSubjStrand(bool strand)
363 {
364     SetStrand(1, strand);
365 }
366 
367 
SwapQS(void)368 void CAlignShadow::SwapQS(void)
369 {
370     TCoord a = m_Box[0], b = m_Box[1];
371     m_Box[0] = m_Box[2];
372     m_Box[1] = m_Box[3];
373     m_Box[2] = a;
374     m_Box[3] = b;
375     TId id = GetQueryId();
376     SetQueryId(GetSubjId());
377     SetSubjId(id);
378 }
379 
380 
FlipStrands(void)381 void CAlignShadow::FlipStrands(void)
382 {
383     SetQueryStrand(!GetQueryStrand());
384     SetSubjStrand(!GetSubjStrand());
385     if(m_Transcript.size()) {
386         m_Transcript = s_RunLengthDecode(m_Transcript);
387         reverse(m_Transcript.begin(), m_Transcript.end());
388         m_Transcript = s_RunLengthEncode(m_Transcript);
389     }
390 }
391 
392 
GetBox(void) const393 const  CAlignShadow::TCoord* CAlignShadow::GetBox(void) const
394 {
395     return m_Box;
396 }
397 
398 
SetBox(const TCoord box[4])399 void CAlignShadow::SetBox(const TCoord box [4])
400 {
401     copy(box, box + 4, m_Box);
402 }
403 
404 
GetStart(Uint1 where) const405 CAlignShadow::TCoord CAlignShadow::GetStart(Uint1 where) const
406 {
407 #ifdef _DEBUG
408     if(0 != where && where != 1) {
409         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
410                    "CAlignShadow::GetStart() - argument out of range");
411     }
412 #endif
413 
414     return m_Box[where << 1];
415 }
416 
417 
GetStop(Uint1 where) const418 CAlignShadow::TCoord CAlignShadow::GetStop(Uint1 where) const
419 {
420 #ifdef _DEBUG
421     if(0 != where && where != 1) {
422         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
423                    "CAlignShadow::GetStop() - argument out of range");
424     }
425 #endif
426 
427     return m_Box[(where << 1) | 1];
428 }
429 
430 
GetQueryStart(void) const431 CAlignShadow::TCoord CAlignShadow::GetQueryStart(void) const
432 {
433     return m_Box[0];
434 }
435 
436 
GetQueryStop(void) const437 CAlignShadow::TCoord CAlignShadow::GetQueryStop(void) const
438 {
439     return m_Box[1];
440 }
441 
442 
GetSubjStart(void) const443 CAlignShadow::TCoord CAlignShadow::GetSubjStart(void) const
444 {
445     return m_Box[2];
446 }
447 
448 
GetSubjStop(void) const449 CAlignShadow::TCoord CAlignShadow::GetSubjStop(void) const
450 {
451     return m_Box[3];
452 }
453 
454 
SetStart(Uint1 where,TCoord val)455 void CAlignShadow::SetStart(Uint1 where, TCoord val)
456 {
457 #ifdef _DEBUG
458     if(0 != where && where != 1) {
459         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
460                    "CAlignShadow::GetStart() - argument out of range");
461     }
462 #endif
463 
464     m_Box[(where << 1) | 1] = val;
465 }
466 
467 
SetStop(Uint1 where,TCoord val)468 void CAlignShadow::SetStop(Uint1 where, TCoord val)
469 {
470 #ifdef _DEBUG
471     if(0 != where && where != 1) {
472         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
473                    "CAlignShadow::GetStop() - argument out of range");
474     }
475 #endif
476 
477     m_Box[where << 1] = val;
478 }
479 
480 
SetQueryStart(TCoord val)481 void CAlignShadow::SetQueryStart(TCoord val)
482 {
483     m_Box[0] = val;
484 }
485 
486 
SetQueryStop(TCoord val)487 void CAlignShadow::SetQueryStop(TCoord val)
488 {
489      m_Box[1] = val;
490 }
491 
492 
SetSubjStart(TCoord val)493 void CAlignShadow::SetSubjStart(TCoord val)
494 {
495     m_Box[2] = val;
496 }
497 
498 
SetSubjStop(TCoord val)499 void CAlignShadow::SetSubjStop(TCoord val)
500 {
501     m_Box[3] = val;
502 }
503 
504 
505 // // // //
506 
507 
GetMin(Uint1 where) const508 CAlignShadow::TCoord CAlignShadow::GetMin(Uint1 where) const
509 {
510 #ifdef _DEBUG
511     if(0 != where && where != 1) {
512         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
513                    "CAlignShadow::GetMin() - argument out of range");
514     }
515 #endif
516 
517     Uint1 i1 = where << 1, i2 = i1 + 1;
518     return min(m_Box[i1], m_Box[i2]);
519 }
520 
521 
GetMax(Uint1 where) const522 CAlignShadow::TCoord CAlignShadow::GetMax(Uint1 where) const
523 {
524 #ifdef _DEBUG
525     if(0 != where && where != 1) {
526         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
527                    "CAlignShadow::GetMax() - argument out of range");
528     }
529 #endif
530 
531     Uint1 i1 = where << 1, i2 = i1 + 1;
532     return max(m_Box[i1], m_Box[i2]);
533 }
534 
535 
SetMin(Uint1 where,TCoord val)536 void CAlignShadow::SetMin(Uint1 where, TCoord val)
537 {
538 #ifdef _DEBUG
539     if(0 != where && where != 1) {
540         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
541                    "CAlignShadow::SetMin() - argument out of range");
542     }
543 #endif
544 
545     const Uint1 i1 (where << 1), i2 (i1 + 1);
546 
547     if(m_Box[i1] == g_UndefCoord || m_Box[i2] == g_UndefCoord) {
548         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
549                    "CAlignShadow::SetMin() - start and/or stop not yet set");
550     }
551     else {
552 
553         if(m_Box[i1] <= m_Box[i2] && val <= m_Box[i2]) {
554             m_Box[i1] = val;
555         }
556         else if(val <= m_Box[i1]) {
557             m_Box[i2] = val;
558         }
559         else {
560             NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
561                        "CAlignShadow::SetMin() - new position is invalid");
562         }
563     }
564 }
565 
566 
567 
SetMax(Uint1 where,TCoord val)568 void CAlignShadow::SetMax(Uint1 where, TCoord val)
569 {
570 #ifdef _DEBUG
571     if(0 != where && where != 1) {
572         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
573                    "CAlignShadow::SetMax() - argument out of range");
574     }
575 #endif
576 
577     const Uint1 i1 (where << 1), i2 (i1 + 1);
578 
579     if(m_Box[i1] == g_UndefCoord || m_Box[i2] == g_UndefCoord) {
580         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
581                    "CAlignShadow::SetMax() - start and/or stop not yet set");
582     }
583     else {
584 
585         if(m_Box[i1] <= m_Box[i2] && val >= m_Box[i1]) {
586             m_Box[i2] = val;
587         }
588         else if(val >= m_Box[i2]) {
589             m_Box[i1] = val;
590         }
591         else {
592             NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
593                        "CAlignShadow::SetMax() - new position is invalid");
594         }
595     }
596 }
597 
598 
SetQueryMax(TCoord val)599 void CAlignShadow::SetQueryMax(TCoord val)
600 {
601     SetMax(0, val);
602 }
603 
604 
SetSubjMax(TCoord val)605 void CAlignShadow::SetSubjMax(TCoord val)
606 {
607     SetMax(1, val);
608 }
609 
610 
SetQueryMin(TCoord val)611 void CAlignShadow::SetQueryMin(TCoord val)
612 {
613     SetMin(0, val);
614 }
615 
616 
SetSubjMin(TCoord val)617 void CAlignShadow::SetSubjMin(TCoord val)
618 {
619     SetMin(1, val);
620 }
621 
622 
GetTranscript(void) const623 const CAlignShadow::TTranscript& CAlignShadow::GetTranscript(void) const
624 {
625     return m_Transcript;
626 }
627 
628 
629 /////////////////////////////////////////////////////////////////////////////
630 // partial serialization
631 
x_PartialSerialize(CNcbiOstream & os) const632 void CAlignShadow::x_PartialSerialize(CNcbiOstream& os) const
633 {
634     os << GetQueryStart() + 1 << '\t' << GetQueryStop() + 1 << '\t'
635        << GetSubjStart() + 1 << '\t' << GetSubjStop() + 1;
636     if(m_Transcript.size() > 0) {
637         os << '\t' << m_Transcript;
638     }
639 }
640 
641 
GetQueryMin() const642 CAlignShadow::TCoord CAlignShadow::GetQueryMin() const
643 {
644     return min(m_Box[0], m_Box[1]);
645 }
646 
647 
GetSubjMin() const648 CAlignShadow::TCoord CAlignShadow::GetSubjMin() const
649 {
650     return min(m_Box[2], m_Box[3]);
651 }
652 
653 
GetQueryMax() const654 CAlignShadow::TCoord CAlignShadow::GetQueryMax() const
655 {
656     return max(m_Box[0], m_Box[1]);
657 }
658 
659 
GetSubjMax() const660 CAlignShadow::TCoord CAlignShadow::GetSubjMax() const
661 {
662     return max(m_Box[2], m_Box[3]);
663 }
664 
665 
GetQuerySpan(void) const666 CAlignShadow::TCoord CAlignShadow::GetQuerySpan(void) const
667 {
668     return 1 + GetQueryMax() - GetQueryMin();
669 }
670 
671 
GetSubjSpan(void) const672 CAlignShadow::TCoord CAlignShadow::GetSubjSpan(void) const
673 {
674     return 1 + GetSubjMax() - GetSubjMin();
675 }
676 
677 
Shift(Int4 shift_query,Int4 shift_subj)678 void CAlignShadow::Shift(Int4 shift_query, Int4 shift_subj)
679 {
680     m_Box[0] += shift_query;
681     m_Box[1] += shift_query;
682     m_Box[2] += shift_subj;
683     m_Box[3] += shift_subj;
684 }
685 
686 
Modify(Uint1 point,TCoord new_pos)687 void CAlignShadow::Modify(Uint1 point, TCoord new_pos)
688 {
689     TCoord qmin, qmax;
690     bool qstrand;
691     if(m_Box[0] < m_Box[1]) {
692         qmin = m_Box[0];
693         qmax = m_Box[1];
694         qstrand = true;
695     }
696     else {
697         qmin = m_Box[1];
698         qmax = m_Box[0];
699         qstrand = false;
700     }
701 
702     TCoord smin, smax;
703     bool sstrand;
704     if(m_Box[2] < m_Box[3]) {
705         smin = m_Box[2];
706         smax = m_Box[3];
707         sstrand = true;
708     }
709     else {
710         smin = m_Box[3];
711         smax = m_Box[2];
712         sstrand = false;
713     }
714 
715     bool newpos_invalid = false;
716     if(point <= 1) {
717         if(new_pos < qmin || new_pos > qmax) {
718             newpos_invalid = true;
719         }
720     }
721     else {
722         if(new_pos < smin || new_pos > smax) {
723             newpos_invalid = true;
724         }
725     }
726 
727     if(newpos_invalid) {
728         NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
729                    "CAlignShadow::Modify(): requested new position invalid");
730     }
731 
732     const bool same_strands = qstrand == sstrand;
733 
734     TCoord q = 0, s = 0;
735 
736     if(m_Transcript.size() > 0) {
737 
738         q = GetQueryStart();
739         s = GetSubjStart();
740         Int1 dq = (qstrand? +1: -1), ds = (sstrand?  +1: -1);
741         string xcript (s_RunLengthDecode(m_Transcript));
742 
743         size_t n1 = 0;
744         bool need_trace = true;
745         if(point <= 1) {
746             if(q == new_pos) need_trace = false;
747         }
748         else {
749             if(s == new_pos) need_trace = false;
750         }
751 
752         const bool point_is_start((point%2) ^ (GetStrand(point/2)? 1: 0));
753         if(need_trace) {
754 
755             char c = 0;
756             ITERATE(TTranscript, ii, xcript) {
757 
758                 ++n1;
759                 switch(c = *ii) {
760                 case 'M': case 'R': q += dq; s += ds; break;
761                 case 'D': q += dq; break;
762                 case 'I': s += ds; break;
763                 default: {
764                     NCBI_THROW(CAlgoAlignUtilException, eInternal,
765                              "CAlignShadow::Modify(): unexpected transcript symbol");
766                 }
767                 }
768 
769                 if(point_is_start) {
770                     if(point <= 1) {
771                         if(q == new_pos) break;
772                     }
773                     else {
774                         if(s == new_pos) break;
775                     }
776                 }
777                 else {
778                     if(point <= 1) {
779                         if(q == new_pos + dq) break;
780                     }
781                     else {
782                         if(s == new_pos + ds) break;
783                     }
784                 }
785             }
786 
787             if(!point_is_start && n1 > 0) {
788                 switch(c) {
789                 case 'M': case 'R': q -= dq; s -= ds; break;
790                 case 'D': q -= dq; break;
791                 case 'I': s -= ds; break;
792                 }
793             }
794 
795         }
796 
797         switch(point) {
798         case 0: // query min
799             SetQueryMin(q);
800             if(same_strands) { SetSubjMin(s); } else { SetSubjMax(s); }
801             break;
802         case 1: // query max
803             SetQueryMax(q);
804             if(same_strands) { SetSubjMax(s); } else { SetSubjMin(s); }
805             break;
806         case 2: // subj min
807             SetSubjMin(s);
808             if(same_strands) { SetQueryMin(q); } else { SetQueryMax(q); }
809             break;
810         case 3: // subj max
811             SetSubjMax(s);
812             if(same_strands) { SetQueryMax(q); } else { SetQueryMin(q); }
813             break;
814         default:
815             NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
816                        "CAlignShadow::Modify(): Invalid end point requested.");
817         }
818 
819         if(n1 > 0) {
820             if( point_is_start ) {
821                 xcript = xcript.substr(n1, xcript.size() - n1);
822             }
823             else {
824                 xcript.resize(n1);
825             }
826             m_Transcript = s_RunLengthEncode(xcript);
827         }
828     }
829     else {
830 
831         //note that with the following logic calculated delta might exceed hit length
832         //example query [0-6] subj [307-288] new_pos=307 point=2
833         //leads to a disaster.  The hit becomes
834         // 0	4294967295	307	307
835         // In the situation like that delta must be adjusted
836         TCoord qlen = 1 + qmax - qmin, slen = 1 + smax - smin;
837         double k = double(qlen) / slen;
838         Int4 delta_q, delta_s;
839         switch(point) {
840 
841         case 0: // query min
842 
843             delta_q = new_pos - qmin;
844             delta_s = Int4(round(delta_q / k));
845             if( smin + delta_s > smax ) {
846                 delta_s = smax - smin;
847             }
848 
849             SetQueryMin(qmin + delta_q);
850             if(same_strands) {
851                 SetSubjMin(smin + delta_s);
852             }
853             else {
854                 SetSubjMax(smax - delta_s);
855             }
856 
857             break;
858 
859         case 1: // query max
860 
861             delta_q = qmax - new_pos;
862             delta_s = Int4(round(delta_q / k));
863             if( smin + delta_s > smax ) {
864                 delta_s = smax - smin;
865             }
866 
867             SetQueryMax(qmax - delta_q);
868             if(same_strands) {
869                 SetSubjMax(smax - delta_s);
870             }
871             else {
872                 SetSubjMin(smin + delta_s);
873             }
874 
875             break;
876 
877         case 2: // subj min
878 
879             delta_s = new_pos - smin;
880             delta_q = Int4(round(delta_s * k));
881             if( qmin + delta_q > qmax ) {
882                 delta_q = qmax - qmin;
883             }
884 
885             SetSubjMin(smin + delta_s);
886             if(same_strands) {
887                 SetQueryMin(qmin + delta_q);
888             }
889             else {
890                 SetQueryMax(qmax - delta_q);
891             }
892 
893             break;
894 
895         case 3: // subj max
896 
897             delta_s = smax - new_pos;
898             delta_q = Int4(round(delta_s * k));
899             if( qmin + delta_q > qmax ) {
900                 delta_q = qmax - qmin;
901             }
902 
903             SetSubjMax(smax - delta_s);
904             if(same_strands) {
905                 SetQueryMax(qmax - delta_q);
906             }
907             else {
908                 SetQueryMin(qmin + delta_q);
909             }
910 
911             break;
912 
913         default:
914             NCBI_THROW(CAlgoAlignUtilException, eBadParameter,
915                        "CAlignShadow::Modify(): invalid end requested");
916         };
917     }
918 }
919 
920 
s_RunLengthEncode(const string & in)921 string CAlignShadow::s_RunLengthEncode(const string& in)
922 {
923     string out;
924     const size_t dim (in.size());
925     if(dim == 0) {
926         return kEmptyStr;
927     }
928 
929     const char* p (in.data());
930     char c0 (p[0]);
931     out.append(1, c0);
932     size_t count (1);
933     for(size_t k (1); k < dim; ++k) {
934 
935         char c (p[k]);
936         if(isdigit(c)) { // consider already encoded
937             return in;
938         }
939 
940         if(c != c0) {
941             c0 = c;
942             if(count > 1) {
943                 out += NStr::NumericToString(count);
944             }
945             count = 1;
946             out.append(1, c0);
947         }
948         else {
949             ++count;
950         }
951     }
952 
953     if(count > 1) {
954         out += NStr::NumericToString(count);
955     }
956 
957     return out;
958 }
959 
960 
s_RunLengthDecode(const string & in)961 string CAlignShadow::s_RunLengthDecode(const string& in)
962 {
963     string out;
964     char C = 0;
965     Uint4 N = 0;
966     ITERATE(string, ii, in) {
967 
968         char c = *ii;
969         if('0' <= c && c <= '9') {
970             N = N * 10 + c - '0';
971         }
972         else {
973             if(N > 0) {
974                 out.append(N - 1, C);
975                 N = 0;
976             }
977             out.push_back(C = c);
978         }
979     }
980     if(N > 0) {
981         out.append(N - 1, C);
982     }
983     return out;
984 }
985 
986 
987 END_NCBI_SCOPE
988