1 /*  $Id: gnomon_seq.cpp 626288 2021-02-25 14:08:59Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software / database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software / database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Alexandre Souvorov
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "gnomon_seq.hpp"
35 
36 BEGIN_NCBI_SCOPE
37 BEGIN_SCOPE(gnomon)
38 
39 const EResidue k_toMinus[5] = { enT, enG, enC, enA, enN };
40 const char *const k_aa_table = "KNKNXTTTTTRSRSXIIMIXXXXXXQHQHXPPPPPRRRRRLLLLLXXXXXEDEDXAAAAAGGGGGVVVVVXXXXX*Y*YXSSSSS*CWCXLFLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
41 
Convert(const CResidueVec & src,CEResidueVec & dst)42 void Convert(const CResidueVec& src, CEResidueVec& dst)
43 {
44     int len = src.size();
45     dst.clear();
46     dst.reserve(len);
47     for(int i = 0; i < len; ++i)
48 	dst.push_back( fromACGT(src[i]) );
49 }
50 
Convert(const CResidueVec & src,CDoubleStrandSeq & dst)51 void Convert(const CResidueVec& src, CDoubleStrandSeq& dst)
52 {
53     Convert(src,dst[ePlus]);
54     ReverseComplement(dst[ePlus],dst[eMinus]);
55 }
56 
Convert(const CEResidueVec & src,CResidueVec & dst)57 void Convert(const CEResidueVec& src, CResidueVec& dst)
58 {
59     int len = src.size();
60     dst.clear();
61     dst.reserve(len);
62     for(int i = 0; i < len; ++i)
63 	dst.push_back( toACGT(src[i]) );
64 }
65 
ReverseComplement(const CEResidueVec & src,CEResidueVec & dst)66 void ReverseComplement(const CEResidueVec& src, CEResidueVec& dst)
67 {
68     int len = src.size();
69     dst.clear();
70     dst.reserve(len);
71     for(int i = len-1; i >= 0; --i)
72 	dst.push_back(k_toMinus[(int)src[i]]);
73 }
74 
75 const TResidue codons[4][4] = {"ATG", "TAA", "TAG", "TGA"};
76 const TResidue rev_codons[4][4] = {"CAT", "TTA", "CTA", "TCA"};
77 static const EResidue s_ecodons0 [3] = { enA, enT, enG };
78 static const EResidue s_ecodons1 [3] = { enT, enA, enA };
79 static const EResidue s_ecodons2 [3] = { enT, enA, enG };
80 static const EResidue s_ecodons3 [3] = { enT, enG, enA };
81 static const EResidue s_ecodons0r[3] = { enC, enA, enT };
82 static const EResidue s_ecodons1r[3] = { enT, enT, enA };
83 static const EResidue s_ecodons2r[3] = { enC, enT, enA };
84 static const EResidue s_ecodons3r[3] = { enT, enC, enA };
85 const EResidue *ecodons[4] = {s_ecodons0, s_ecodons1, s_ecodons2, s_ecodons3};
86 const EResidue *rev_ecodons[4] = {s_ecodons0r, s_ecodons1r, s_ecodons2r, s_ecodons3r};
87 
88 template <typename Res>
89 class res_traits {
90 public:
_fromACGT(TResidue x)91     static Res _fromACGT(TResidue x)
92     { return x; }
_codons(int i)93     static const Res* _codons(int i) { return codons[i]; }
_rev_codons(int i)94     static const Res* _rev_codons(int i) { return rev_codons[i]; }
95 };
96 
97 template<>
98 class res_traits<EResidue> {
99 public:
_fromACGT(TResidue x)100     static EResidue _fromACGT(TResidue x)
101     { return fromACGT(x); }
_codons(int i)102     static const EResidue* _codons(int i) { return ecodons[i]; }
_rev_codons(int i)103     static const EResidue* _rev_codons(int i) { return rev_ecodons[i]; }
104 };
105 
106 template <class Res>
IsStartCodon(const Res * seq,int strand)107 bool IsStartCodon(const Res * seq, int strand)   // seq points to A for both strands
108 {
109     const Res * start_codon;
110     if(strand == ePlus)
111         start_codon = res_traits<Res>::_codons(0);
112     else {
113         start_codon = res_traits<Res>::_rev_codons(0);
114         seq -= 2;
115     }
116     return equal(start_codon,start_codon+3,seq);
117 }
118 
119 template bool IsStartCodon<EResidue>(const EResidue * seq, int strand);
120 template bool IsStartCodon<TResidue>(const TResidue * seq, int strand);
121 
122 template <class Res>
IsStopCodon(const Res * seq,int strand)123 bool IsStopCodon(const Res * seq, int strand)   // seq points to T for both strands
124 {
125     if(strand == ePlus) {
126         if (*seq != res_traits<Res>::_codons(1)[0]) // T
127             return false;
128         ++seq;
129         for (int i = 1; i <= 3; ++i)
130             if(equal(res_traits<Res>::_codons(i)+1,res_traits<Res>::_codons(i)+3,seq))
131                 return true;
132         return false;
133     } else {
134         if (*seq != res_traits<Res>::_rev_codons(1)[2]) // A
135             return false;
136         seq -= 2;
137         for (int i = 1; i <= 3; ++i)
138             if(equal(res_traits<Res>::_rev_codons(i)+1,res_traits<Res>::_rev_codons(i)+3,seq))
139                 return true;
140         return false;
141     }
142 }
143 template bool IsStopCodon<EResidue>(const EResidue * seq, int strand);
144 template bool IsStopCodon<TResidue>(const TResidue * seq, int strand);
145 
146 
FindAllCodonInstances(TIVec positions[],const EResidue codon[],const CEResidueVec & mrna,TSignedSeqRange search_region,int fixed_frame)147 void FindAllCodonInstances(TIVec positions[], const EResidue codon[], const CEResidueVec& mrna, TSignedSeqRange search_region, int fixed_frame)
148 {
149     for (CEResidueVec::const_iterator pos = mrna.begin()+search_region.GetFrom(); (pos = search(pos,mrna.end(),codon,codon+3)) < mrna.begin()+search_region.GetTo(); ++pos) {
150         int l = pos-mrna.begin();
151         int frame = l%3;
152         if (fixed_frame==-1 || fixed_frame==frame)
153             positions[frame].push_back(l);
154     }
155 }
156 
FindAllStarts(TIVec starts[],const CEResidueVec & mrna,TSignedSeqRange search_region,int fixed_frame)157 void FindAllStarts(TIVec starts[], const CEResidueVec& mrna, TSignedSeqRange search_region, int fixed_frame)
158 {
159     FindAllCodonInstances(starts, ecodons[0], mrna, search_region, fixed_frame);
160 }
161 
FindAllStops(TIVec stops[],const CEResidueVec & mrna,TSignedSeqRange search_region,int fixed_frame)162 void FindAllStops(TIVec stops[], const CEResidueVec& mrna, TSignedSeqRange search_region, int fixed_frame)
163 {
164     for (int i=1; i <=3; ++i)
165         FindAllCodonInstances(stops, ecodons[i], mrna, search_region, fixed_frame);
166     for (int f = 0; f < 3; ++f)
167         sort(stops[f].begin(), stops[f].end());
168 }
169 
170 
Partial5pCodonIsStop(const CEResidueVec & seq_strand,int start,int frame)171 bool Partial5pCodonIsStop(const CEResidueVec& seq_strand, int start, int frame) {
172     if(frame == 0)      // no partial codon
173         return false;
174 
175     int codon_start = start+frame-3;
176     if(codon_start >= 0 && IsStopCodon(&seq_strand[codon_start]))
177         return true;
178 
179     return false;
180 }
181 
FindStartsStops(const CGeneModel & model,const CEResidueVec & contig_seq,const CEResidueVec & mrna,const CAlignMap & mrnamap,TIVec starts[3],TIVec stops[3],int & frame,bool obeystart)182 void FindStartsStops(const CGeneModel& model, const CEResidueVec& contig_seq, const CEResidueVec& mrna, const CAlignMap& mrnamap, TIVec starts[3],  TIVec stops[3], int& frame, bool obeystart)
183 {
184     int left_cds_limit = -1;
185     int reading_frame_start = mrna.size();
186     int reading_frame_stop = mrna.size();
187     int right_cds_limit = mrna.size();
188     frame = -1;
189     EStrand strand = model.Strand();
190 
191     if (!model.ReadingFrame().Empty()) {
192         //        left_cds_limit = mrnamap.MapOrigToEdited(model.GetCdsInfo().MaxCdsLimits().GetFrom());
193         //        _ASSERT(left_cds_limit >= 0 || model.GetCdsInfo().MaxCdsLimits().GetFrom() == TSignedSeqRange::GetWholeFrom());
194         //        right_cds_limit = mrnamap.MapOrigToEdited(model.GetCdsInfo().MaxCdsLimits().GetTo());
195         //        _ASSERT(right_cds_limit >= 0 || model.GetCdsInfo().MaxCdsLimits().GetTo() == TSignedSeqRange::GetWholeTo());
196 
197         //        if(strand == eMinus) {
198         //            std::swap(left_cds_limit,right_cds_limit);
199         //        }
200         //        if(right_cds_limit < 0) right_cds_limit = mrna.size();
201 
202         TSignedSeqRange rf = mrnamap.MapRangeOrigToEdited(model.ReadingFrame(),true);
203         reading_frame_start = rf.GetFrom();
204         _ASSERT(reading_frame_start >= 0);
205         reading_frame_stop = rf.GetTo();
206         _ASSERT(reading_frame_stop >= 0);
207 
208         if (reading_frame_start == 0 && IsStartCodon(&mrna[reading_frame_start]) && reading_frame_start+3 < reading_frame_stop)
209             reading_frame_start += 3;
210 
211         _ASSERT( -1 <= left_cds_limit && left_cds_limit <= reading_frame_start );
212         _ASSERT( 0 <= reading_frame_start && reading_frame_start <= reading_frame_stop && reading_frame_stop < int(mrna.size()) );
213         _ASSERT( reading_frame_stop <= right_cds_limit && right_cds_limit <= int(mrna.size()) );
214 
215         frame = reading_frame_start%3;
216 
217         if (left_cds_limit<0) {
218             if (reading_frame_start >= 3) {
219                 FindAllStops(stops,mrna,TSignedSeqRange(0,reading_frame_start),frame);   // 5' inframe stops
220             }
221 
222             if (stops[frame].size()>0)
223                 left_cds_limit = stops[frame].back()+3;                                  // earliest start of CDS
224         } else {
225             FindAllStops(stops,mrna,TSignedSeqRange(0,left_cds_limit),frame);
226         }
227 
228         reading_frame_start = reading_frame_stop-5;              // allow starts inside reading frame if not protein
229         if(model.GetCdsInfo().ProtReadingFrame().NotEmpty()) {
230             TSignedSeqRange protrf = mrnamap.MapRangeOrigToEdited(model.GetCdsInfo().ProtReadingFrame(),true);
231             reading_frame_start = min(protrf.GetFrom(),reading_frame_start);
232         }
233     }
234 
235     if (left_cds_limit<0) {                                                             // don't know frame or no upstream stop
236         int model_start = mrnamap.MapEditedToOrig(0);
237 
238         if(Include(model.GetCdsInfo().ProtReadingFrame(),model_start) && reading_frame_start < 3) {
239             starts[0].push_back(-3);            // proteins are scored no matter what
240         } else {
241             if(strand == eMinus)
242                 model_start = contig_seq.size()-1-model_start;
243             for (int i = 0; i<3; ++i) {
244                 if (frame == -1 || frame == i) {
245 
246                     if(Partial5pCodonIsStop(contig_seq,model_start,i))
247                         stops[i].push_back(i-3); // stop to limit MaxCds
248                     else
249                         starts[i].push_back(i-3);                                      // fake start(s) for open cds
250 
251 
252                     /*
253                     if (UpstreamStartOrSplice(contig_seq,model_start,i))
254                         starts[i].push_back(i-3);
255                     else
256                         stops[i].push_back(i-3); // bogus stop to limit MaxCds
257                     */
258                 }
259             }
260         }
261 
262         left_cds_limit = 0;
263     }
264 
265     if(obeystart && model.HasStart()) {
266         TSignedSeqRange start = mrnamap.MapRangeOrigToEdited(model.GetCdsInfo().Start(),false);
267         starts[frame].push_back(start.GetFrom());
268     } else if(reading_frame_start-left_cds_limit >= 3) {
269         FindAllStarts(starts,mrna,TSignedSeqRange(left_cds_limit,reading_frame_start-1),frame); // 5' starts or all starts
270     }
271 
272     if (frame==-1) {
273         FindAllStops(stops,mrna,TSignedSeqRange(0,mrna.size()-1),frame);                        // all stops
274     } else if (right_cds_limit - reading_frame_stop >= 3) {
275         FindAllStops(stops,mrna,TSignedSeqRange(reading_frame_stop+1,right_cds_limit),frame);   // inframe 3' stops
276     }
277 
278     if (int(mrna.size()) <= right_cds_limit) {                                                  // fake stops for partials
279         stops[mrna.size()%3].push_back(mrna.size());
280         stops[(mrna.size()-1)%3].push_back(mrna.size()-1);
281         stops[(mrna.size()-2)%3].push_back(mrna.size()-2);
282     }
283 
284 }
285 
FindUpstreamStop(const vector<int> & stops,int start,int & stop)286 bool FindUpstreamStop(const vector<int>& stops, int start, int& stop)
287 {
288     vector<int>::const_iterator it_stop = lower_bound(stops.begin(),stops.end(),start);
289 
290     if(it_stop != stops.begin()) {
291         stop = *(--it_stop);
292         return true;
293     } else
294         return false;
295 }
296 
297 /*
298 TInDels CAlignMap::GetAllCorrections() const {
299 
300     TInDels indels = GetInDels(false);
301     for(int range = 0; range < (int)m_orig_ranges.size(); ++range) {
302         const string& mism = m_edited_ranges[range].GetMismatch();
303         if(!mism.empty()) {
304             int len = m_orig_ranges[range].GetTo()-m_orig_ranges[range].GetFrom()+1;
305             _ASSERT(len == (int)mism.size());
306             indels.push_back(CInDelInfo(m_orig_ranges[range].GetFrom(), len, CInDelInfo::eMism, mism));
307         }
308     }
309     sort(indels.begin(), indels.end());
310 
311     return indels;
312 }
313 */
314 
PushInDel(TInDels & indels,bool fs_only,TSignedSeqPos p,int len,CInDelInfo::EType type,const string & seq="")315 void PushInDel(TInDels& indels, bool fs_only, TSignedSeqPos p, int len, CInDelInfo::EType type, const string& seq = "") {
316     if(!fs_only || len%3 != 0)
317         indels.push_back(CInDelInfo(p, len, type, seq));
318 }
319 
320 /*
321 TInDels CAlignMap::GetInDels(bool fs_only) const {
322     TInDels indels;
323 
324     if(m_orig_ranges.front().GetTypeFrom() != eGgap) {
325         if(m_orig_ranges.front().GetExtraFrom() > 0) {   // everything starts from insertion
326             int len = m_orig_ranges.front().GetExtraFrom();
327             TSignedSeqPos p = m_orig_ranges.front().GetFrom()-len;
328             PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
329         }
330         if(m_edited_ranges.front().GetExtraFrom() > 0) {   // everything starts from deletion
331             int len = m_edited_ranges.front().GetExtraFrom();
332             string seq = m_edited_ranges.front().GetExtraSeqFrom();
333             TSignedSeqPos p = m_orig_ranges.front().GetFrom();
334             PushInDel(indels, fs_only, p, len,CInDelInfo::eDel, seq);
335         }
336     }
337 
338     for(unsigned int range = 1; range < m_orig_ranges.size(); ++range) {
339         if(m_orig_ranges[range].GetTypeFrom() == eGgap)
340             continue;
341 
342         if(fs_only) {
343             int len = m_orig_ranges[range].GetExtraFrom()-m_edited_ranges[range].GetExtraFrom();
344             if(m_orig_ranges[range].GetTypeFrom() != eInDel)
345                 len += m_orig_ranges[range-1].GetExtraTo()-m_edited_ranges[range-1].GetExtraTo();
346             if(len%3 == 0)
347                 continue;
348         }
349 
350         if(m_orig_ranges[range-1].GetTypeTo() != eInDel) {
351             if(m_orig_ranges[range-1].GetExtraTo() > 0) {
352                 int len = m_orig_ranges[range-1].GetExtraTo();
353                 TSignedSeqPos p = m_orig_ranges[range-1].GetTo()+1;
354                 PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
355             }
356             if(m_edited_ranges[range-1].GetExtraTo() > 0) {
357                 int len = m_edited_ranges[range-1].GetExtraTo();
358                 string seq = m_edited_ranges[range-1].GetExtraSeqTo();
359                 TSignedSeqPos p = m_orig_ranges[range-1].GetTo()+m_orig_ranges[range-1].GetExtraTo()+1; // if there is insertion+deletion (mismatch) this is correct position
360                 PushInDel(indels, fs_only, p, len, CInDelInfo::eDel, seq);
361             }
362         }
363 
364         if(m_orig_ranges[range].GetExtraFrom() > 0) {   // insertion on the left side
365             int len = m_orig_ranges[range].GetExtraFrom();
366             TSignedSeqPos p = m_orig_ranges[range].GetFrom()-len;
367             PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
368         }
369         if(m_edited_ranges[range].GetExtraFrom() > 0) {   // deletion on the left side
370             int len = m_edited_ranges[range].GetExtraFrom();
371             string seq = m_edited_ranges[range].GetExtraSeqFrom();
372             TSignedSeqPos p = m_orig_ranges[range].GetFrom();    // if there is insertion+deletion (mismatch) this is correct position
373             PushInDel(indels, fs_only, p, len, CInDelInfo::eDel, seq);
374         }
375     }
376 
377     if(m_orig_ranges.back().GetTypeTo() != eGgap) {
378         if(m_orig_ranges.back().GetExtraTo() > 0) {   // everything ends by insertion
379             int len = m_orig_ranges.back().GetExtraTo();
380             TSignedSeqPos p = m_orig_ranges.back().GetTo()+1;
381             PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
382         }
383         if(m_edited_ranges.back().GetExtraTo() > 0) {   // everything ends by deletion
384             int len = m_edited_ranges.back().GetExtraTo();
385             string seq = m_edited_ranges.back().GetExtraSeqTo();
386             TSignedSeqPos p = m_orig_ranges.back().GetTo()+1;
387             PushInDel(indels, fs_only, p, len, CInDelInfo::eDel, seq);
388         }
389     }
390 
391     return indels;
392 }
393 */
394 
InsertOneToOneRange(TSignedSeqPos orig_start,TSignedSeqPos edited_start,int len,const string & mism,int left_orige,int left_edite,int right_orige,int right_edite,EEdgeType left_type,EEdgeType right_type,const string & left_edit_extra_seq,const string & right_edit_extra_seq)395 void CAlignMap::InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, int len, const string& mism, int left_orige, int left_edite, int right_orige, int right_edite,  EEdgeType left_type, EEdgeType right_type, const string& left_edit_extra_seq, const string& right_edit_extra_seq)
396 {
397     _ASSERT(len > 0);
398     _ASSERT(m_orig_ranges.empty() || (orig_start > m_orig_ranges.back().GetTo() && edited_start > m_edited_ranges.back().GetTo()));
399 
400     CAlignMap::SMapRangeEdge orig_from(orig_start, left_orige, left_type);
401     CAlignMap::SMapRangeEdge orig_to(orig_start+len-1, right_orige, right_type);
402     m_orig_ranges.push_back(SMapRange(orig_from, orig_to, kEmptyStr));
403 
404     CAlignMap::SMapRangeEdge edited_from(edited_start, left_edite, left_type, left_edit_extra_seq);
405     _ASSERT((int)left_edit_extra_seq.length() == 0 || (int)left_edit_extra_seq.length() == left_edite);
406     CAlignMap::SMapRangeEdge edited_to(edited_start+len-1, right_edite, right_type, right_edit_extra_seq);
407     _ASSERT((int)right_edit_extra_seq.length() == 0 || (int)right_edit_extra_seq.length() == right_edite);
408     m_edited_ranges.push_back(SMapRange(edited_from, edited_to, mism));
409 }
410 
InsertIndelRangesForInterval(TSignedSeqPos orig_a,TSignedSeqPos orig_b,TSignedSeqPos edit_a,TInDels::const_iterator fsi_begin,TInDels::const_iterator fsi_end,EEdgeType type_a,EEdgeType type_b,const string & gseq_a,const string & gseq_b)411 TSignedSeqPos CAlignMap::InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin,
412                                                       TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string& gseq_a, const string& gseq_b)
413 {
414     TInDels::const_iterator fsi = fsi_begin;
415 	for( ;fsi != fsi_end && fsi->Loc() < orig_a; ++fsi ) {
416         _ASSERT( !fsi->IntersectingWith(orig_a,orig_b) );
417     }
418 
419     int left_orige = 0;
420     int left_edite = gseq_a.length();
421     string left_edit_extra_seq = gseq_a;
422     string mism;
423 
424     for( ;fsi != fsi_end && fsi->Loc() == orig_a && !fsi->IsMismatch(); ++fsi ) {    // first left end
425         if(fsi->IsInsertion()) {
426             _ASSERT(type_a != eBoundary);
427             orig_a += fsi->Len();
428             left_orige += fsi->Len();
429         } else {
430             edit_a += fsi->Len();
431             left_edite += fsi->Len();
432             left_edit_extra_seq += fsi->GetInDelV();
433         }
434     }
435     for( ; fsi != fsi_end && fsi->IsMismatch() && fsi->Loc() == orig_a+(int)mism.size(); ++fsi)
436         mism += fsi->GetInDelV();
437 
438     while(fsi != fsi_end && fsi->InDelEnd() <= orig_b+1) {
439         int len = (mism.empty() ? fsi->Loc()-orig_a : mism.size());
440         _ASSERT(len > 0 && orig_a+len-1 <= orig_b);
441 
442         int bb = orig_a+len;
443         int right_orige = 0;
444         int right_edite = 0;
445         string right_edit_extra_seq;
446         for( ;fsi != fsi_end && fsi->Loc() == bb && !fsi->IsMismatch(); ++fsi ) {      // right end
447             if (fsi->IsInsertion()) {
448                 right_orige += fsi->Len();
449                 bb += fsi->Len();
450             } else {
451                 right_edite += fsi->Len();
452                 right_edit_extra_seq += fsi->GetInDelV();
453             }
454         }
455 
456         int next_orig_a = orig_a+len+right_orige;
457         EEdgeType tb = eInDel;
458         if(next_orig_a > orig_b) {
459             right_edit_extra_seq += gseq_b;
460             right_edite += gseq_b.length();
461             tb = type_b;
462         }
463         InsertOneToOneRange(orig_a, edit_a, len, mism, left_orige, left_edite, right_orige, right_edite, type_a, tb, left_edit_extra_seq, right_edit_extra_seq);
464 
465         orig_a = next_orig_a;
466         edit_a += len+right_edite;
467         type_a = eInDel;
468         left_orige = right_orige;
469         left_edite = right_edite;
470         left_edit_extra_seq = right_edit_extra_seq;
471         mism.clear();
472         for( ; fsi != fsi_end && fsi->IsMismatch() && fsi->Loc() == orig_a+(int)mism.size(); ++fsi)
473             mism += fsi->GetInDelV();
474     }
475 
476     if(!mism.empty()) {
477         int len = mism.size();
478         string right_edit_extra_seq;
479         EEdgeType tb = eInDel;
480         if(orig_a+len > orig_b) {
481             right_edit_extra_seq = gseq_b;
482             tb = type_b;
483         }
484         InsertOneToOneRange(orig_a, edit_a, len, mism, left_orige, left_edite, 0, gseq_b.length(), type_a, tb, left_edit_extra_seq, right_edit_extra_seq);
485         orig_a += len;
486         edit_a += len;
487         type_a = eInDel;
488         left_orige = 0;
489         left_edite = 0;
490         left_edit_extra_seq.clear();
491         mism.clear();
492     }
493 
494     if(orig_a <= orig_b) {
495         int len = orig_b-orig_a+1;
496         _ASSERT(len > 0);
497         InsertOneToOneRange(orig_a, edit_a, len, mism, left_orige, left_edite, 0, gseq_b.length(), type_a, type_b, left_edit_extra_seq, gseq_b);
498         edit_a += len;
499     }
500 
501     return edit_a;
502 }
503 
CAlignMap(const CGeneModel::TExons & exons,const vector<TSignedSeqRange> & transcript_exons,const TInDels & indels,EStrand orientation,int target_len)504 CAlignMap::CAlignMap(const CGeneModel::TExons& exons, const vector<TSignedSeqRange>& transcript_exons, const TInDels& indels, EStrand orientation, int target_len ) : m_orientation(orientation), m_target_len(target_len) {
505 
506 #ifdef _DEBUG
507     _ASSERT(transcript_exons.size() == exons.size());
508     _ASSERT(transcript_exons.size() == 1 || (orientation == ePlus && transcript_exons.front().GetFrom() < transcript_exons.back().GetFrom()) ||
509            (orientation == eMinus && transcript_exons.front().GetFrom() > transcript_exons.back().GetFrom()));
510     int diff = 0;
511     for(unsigned int i = 0; i < exons.size(); ++i) {
512         int exonlen = (exons[i].Limits().Empty()) ? exons[i].m_seq.length() : exons[i].Limits().GetLength();
513         diff += exonlen-(transcript_exons[i].GetTo()-transcript_exons[i].GetFrom()+1);
514     }
515     ITERATE(TInDels, f, indels) {
516         if(!f->IsMismatch())
517             diff += (f->IsDeletion()) ? f->Len() : -f->Len();
518     }
519     _ASSERT(diff == 0);
520 #endif
521 
522     m_orig_ranges.reserve(exons.size()+indels.size());
523     m_edited_ranges.reserve(exons.size()+indels.size());
524 
525     TSignedSeqPos estart = (m_orientation == ePlus) ? transcript_exons.front().GetFrom() : transcript_exons.back().GetFrom();
526     for(unsigned int i = 0; i < exons.size(); ++i) {
527         if(exons[i].Limits().Empty()) {
528             _ASSERT(i == 0 || exons[i-1].Limits().NotEmpty());
529             _ASSERT(i == exons.size()-1 || exons[i+1].Limits().NotEmpty());
530         } else {
531             EEdgeType type_a = exons[i].m_fsplice ? eSplice : eBoundary;
532             EEdgeType type_b = exons[i].m_ssplice ? eSplice : eBoundary;
533             string gseq_a;
534             string gseq_b;
535             if(i > 0 && exons[i-1].Limits().Empty()) {  // prev exon is Ggap
536                 type_a = eGgap;
537                 gseq_a = exons[i-1].m_seq;
538                 estart += gseq_a.length();
539             }
540             if(i < exons.size()-1 && exons[i+1].Limits().Empty()) {  // next exon is Ggap
541                 type_b = eGgap;
542                 gseq_b = exons[i+1].m_seq;
543             }
544             if(m_orientation == eMinus) {
545                 ReverseComplement(gseq_a.begin(),gseq_a.end());
546                 ReverseComplement(gseq_b.begin(),gseq_b.end());
547             }
548             estart = InsertIndelRangesForInterval(exons[i].GetFrom(), exons[i].GetTo(), estart, indels.begin(), indels.end(), type_a, type_b, gseq_a, gseq_b);
549         }
550 
551         if(i != exons.size()-1) {
552             if(m_orientation == ePlus) {
553                 estart += transcript_exons[i+1].GetFrom()-transcript_exons[i].GetTo()-1;
554             } else {
555                 estart += transcript_exons[i].GetFrom()-transcript_exons[i+1].GetTo()-1;
556             }
557         }
558     }
559 }
560 
CAlignMap(const CGeneModel::TExons & exons,const TInDels & indels,EStrand strand,TSignedSeqRange lim,int holelen,int polyalen)561 CAlignMap::CAlignMap(const CGeneModel::TExons& exons, const TInDels& indels, EStrand strand, TSignedSeqRange lim, int holelen, int polyalen) : m_orientation(strand) {
562 
563     TInDels::const_iterator fsi_begin = indels.begin();
564     TInDels::const_iterator fsi_end = indels.end();
565 
566     m_orig_ranges.reserve(exons.size()+(fsi_end-fsi_begin));
567     m_edited_ranges.reserve(exons.size()+(fsi_end-fsi_begin));
568 
569     TSignedSeqPos estart = 0;
570     for(unsigned int i = 0; i < exons.size(); ++i) {
571         if(exons[i].Limits().Empty()) {
572             _ASSERT(i == 0 || exons[i-1].Limits().NotEmpty());
573             _ASSERT(i == exons.size()-1 || exons[i+1].Limits().NotEmpty());
574         } else {
575             TSignedSeqPos start = exons[i].GetFrom();
576             TSignedSeqPos stop = exons[i].GetTo();
577             EEdgeType type_a = exons[i].m_fsplice ? eSplice : eBoundary;
578             EEdgeType type_b = exons[i].m_ssplice ? eSplice : eBoundary;
579             string gseq_a;
580             string gseq_b;
581             if(i > 0 && exons[i-1].Limits().Empty()) {  // prev exon is Ggap
582                 type_a = eGgap;
583                 gseq_a = exons[i-1].m_seq;
584                 estart += gseq_a.length();
585             }
586             if(i < exons.size()-1 && exons[i+1].Limits().Empty()) {  // next exon is Ggap
587                 type_b = eGgap;
588                 gseq_b = exons[i+1].m_seq;
589             }
590             if(m_orientation == eMinus) {
591                 ReverseComplement(gseq_a.begin(),gseq_a.end());
592                 ReverseComplement(gseq_b.begin(),gseq_b.end());
593             }
594 
595             if(stop < lim.GetFrom()) continue;
596             if(lim.GetTo() < start) break;
597 
598             if(lim.GetFrom() >= start) {
599                 start = lim.GetFrom();
600                 type_a = eBoundary;
601             }
602             if(lim.GetTo() <= stop) {
603                 stop = lim.GetTo();
604                 type_b = eBoundary;
605             }
606 
607             estart = InsertIndelRangesForInterval(start, stop, estart, fsi_begin, fsi_end, type_a, type_b, gseq_a, gseq_b);
608             if(i != exons.size()-1 && (!exons[i+1].m_fsplice || !exons[i].m_ssplice))
609                 estart += holelen;
610         }
611     }
612 
613     if(!m_edited_ranges.empty())
614         m_target_len = m_edited_ranges.back().GetExtendedTo()+1+polyalen;
615 
616     _ASSERT(m_edited_ranges.size() == m_orig_ranges.size());
617 }
618 
619 template <class Vec>
EditedSequence(const Vec & original_sequence,Vec & edited_sequence,bool includeholes) const620 void CAlignMap::EditedSequence(const Vec& original_sequence, Vec& edited_sequence, bool includeholes) const
621 {
622     edited_sequence.clear();
623 
624     string s;
625     if(includeholes) {
626         int l = (m_orientation == ePlus) ? m_edited_ranges.front().GetFrom() : CAlignMap::TargetLen()-m_edited_ranges.back().GetTo()-1;
627         s.insert(s.end(), l, 'N');
628     } else {
629         s = m_edited_ranges.front().GetExtraSeqFrom();
630     }
631     ITERATE(string, i, s)
632         edited_sequence.push_back(res_traits<typename Vec::value_type>::_fromACGT(*i));
633 
634     for(int range = 0; range < (int)m_orig_ranges.size(); ++range) {
635         string seq = m_edited_ranges[range].GetMismatch();
636 
637         if(seq.empty()) {
638             int a = m_orig_ranges[range].GetFrom();
639             int b = m_orig_ranges[range].GetTo()+1;
640             edited_sequence.insert(edited_sequence.end(),original_sequence.begin()+a, original_sequence.begin()+b);
641         }
642 
643         if(range < (int)m_orig_ranges.size()-1) {
644             if(m_edited_ranges[range].GetTypeTo() == eBoundary) {
645                 if(includeholes) {
646                     int l = m_edited_ranges[range+1].GetFrom()-m_edited_ranges[range].GetTo()-1;                  // missed part
647                     seq.insert(seq.end(), l, 'N');
648                 }
649             } else if(m_edited_ranges[range].GetTypeTo() == eSplice) {
650                 seq += m_edited_ranges[range].GetExtraSeqTo() + m_edited_ranges[range+1].GetExtraSeqFrom();  // indels from two exon ends
651             } else {
652                 seq += m_edited_ranges[range].GetExtraSeqTo(); // indel inside exon or Ggap
653             }
654         } else {
655             if(includeholes) {
656                 int l = (m_orientation == ePlus) ? CAlignMap::TargetLen()-m_edited_ranges.back().GetTo()-1 : m_edited_ranges.front().GetFrom();
657                 seq.insert(seq.end(), l, 'N');
658             } else {
659                 seq += m_edited_ranges.back().GetExtraSeqTo();
660             }
661         }
662         ITERATE(string, i, seq)
663             edited_sequence.push_back(res_traits<typename Vec::value_type>::_fromACGT(*i));
664     }
665 
666     if(m_orientation == eMinus)
667         ReverseComplement(edited_sequence.begin(), edited_sequence.end());
668 }
669 
FindLowerRange(const vector<CAlignMap::SMapRange> & a,TSignedSeqPos p)670 int CAlignMap::FindLowerRange(const vector<CAlignMap::SMapRange>& a,  TSignedSeqPos p) {
671     int num = lower_bound(a.begin(), a.end(), CAlignMap::SMapRange(p+1, p+1, kEmptyStr))-a.begin()-1;
672     return num;
673 }
674 
675 template
676 void CAlignMap::EditedSequence<CResidueVec>(const CResidueVec& original_sequence, CResidueVec& edited_sequence, bool includeholes) const;
677 template
678 void CAlignMap::EditedSequence<CEResidueVec>(const CEResidueVec& original_sequence, CEResidueVec& edited_sequence, bool includeholes) const;
679 template
680 void CAlignMap::EditedSequence<string>(const string& original_sequence, string& edited_sequence, bool includeholes) const;
681 
ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const682 TSignedSeqRange CAlignMap::ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const {
683 
684     if(m_orientation == eMinus) {
685         int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
686         TSignedSeqPos left = edited_range.GetTo();
687         TSignedSeqPos right = edited_range.GetFrom();
688 
689         if(left == TSignedSeqRange::GetWholeTo()) {
690             left = TSignedSeqRange::GetWholeFrom();
691         } else {
692             left = offset-left;
693         }
694 
695         if(right == TSignedSeqRange::GetWholeFrom()) {
696             right = TSignedSeqRange::GetWholeTo();
697         } else {
698             right = offset-right;
699         }
700 
701         edited_range = TSignedSeqRange(left, right);
702     }
703 
704     _ASSERT(edited_range.NotEmpty() && Include(TSignedSeqRange(m_edited_ranges.front().GetExtendedFrom(),m_edited_ranges.back().GetExtendedTo()), edited_range));
705 
706     TSignedSeqPos a = edited_range.GetFrom();
707     int numa =  FindLowerRange(m_edited_ranges, a);
708 
709     if(numa < 0 || a > m_edited_ranges[numa].GetTo()) {   // a was insertion on the genome, moved to first projectable point
710         ++numa;
711         if((int)m_edited_ranges.size() == numa)
712             return TSignedSeqRange::GetEmpty();
713         a = m_edited_ranges[numa].GetFrom();
714     }
715 
716     TSignedSeqPos b = edited_range.GetTo();
717     int numb =  FindLowerRange(m_edited_ranges, b);
718 
719     if(b > m_edited_ranges[numb].GetTo()) {   // a was insertion on the genome, moved to first projectable point
720        b = m_edited_ranges[numb].GetTo();
721     }
722 
723     if(m_orientation == eMinus) {
724         int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
725         TSignedSeqPos left = b;
726         TSignedSeqPos right = a;
727 
728         if(left == TSignedSeqRange::GetWholeTo()) {
729             left = TSignedSeqRange::GetWholeFrom();
730         } else {
731             left = offset-left;
732         }
733 
734         if(right == TSignedSeqRange::GetWholeFrom()) {
735             right = TSignedSeqRange::GetWholeTo();
736         } else {
737             right = offset-right;
738         }
739 
740         a = left;
741         b = right;
742     }
743 
744     return TSignedSeqRange(a, b);
745 }
746 
747 
ShrinkToRealPoints(TSignedSeqRange orig_range,bool snap_to_codons) const748 TSignedSeqRange CAlignMap::ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons) const {
749 
750     _ASSERT(orig_range.NotEmpty() && Include(TSignedSeqRange(m_orig_ranges.front().GetExtendedFrom(),m_orig_ranges.back().GetExtendedTo()), orig_range));
751 
752     TSignedSeqPos a = orig_range.GetFrom();
753     int numa =  FindLowerRange(m_orig_ranges, a);
754 
755     if(numa < 0 || a > m_orig_ranges[numa].GetTo()) {   // a was insertion on the genome, moved to first projectable point
756         ++numa;
757         if((int)m_orig_ranges.size() == numa)
758             return TSignedSeqRange::GetEmpty();
759         a = m_orig_ranges[numa].GetFrom();
760     }
761     if(snap_to_codons) {
762         bool snapped = false;
763         while(!snapped) {
764             TSignedSeqPos tp = m_edited_ranges[numa].GetFrom()+a-m_orig_ranges[numa].GetFrom();
765             if(m_orientation == eMinus)
766                 tp = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-tp;
767             if((m_orientation == ePlus && tp%3 == 0) || (m_orientation == eMinus && tp%3 == 2)) {
768                 snapped = true;
769             } else {                                       // not a codon boundary
770                 if(a < m_orig_ranges[numa].GetTo()) {      // can move in this interval
771                     ++a;
772                 } else {                                   // moved to next interval
773                     ++numa;
774                     if((int)m_orig_ranges.size() == numa)
775                         return TSignedSeqRange::GetEmpty();
776                     a = m_orig_ranges[numa].GetFrom();
777                 }
778             }
779         }
780     }
781 
782 
783     TSignedSeqPos b = orig_range.GetTo();
784     int numb =  FindLowerRange(m_orig_ranges, b);
785 
786     if(b > m_orig_ranges[numb].GetTo()) {   // a was insertion on the genome, moved to first projectable point
787        b = m_orig_ranges[numb].GetTo();
788     }
789     if(snap_to_codons) {
790         bool snapped = false;
791         while(!snapped) {
792             TSignedSeqPos tp = m_edited_ranges[numb].GetFrom()+b-m_orig_ranges[numb].GetFrom();
793             if(m_orientation == eMinus)
794                 tp = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-tp;
795             if((m_orientation == ePlus && tp%3 == 2) || (m_orientation == eMinus && tp%3==0)) {
796                 snapped = true;
797             } else {                                       // not a codon boundary
798                 if(b > m_orig_ranges[numb].GetFrom()) {      // can move in this interval
799                     --b;
800                 } else {                                   // moved to next interval
801                     --numb;
802                     if(numb < 0)
803                         return TSignedSeqRange::GetEmpty();
804                     b = m_orig_ranges[numb].GetTo();
805                 }
806             }
807         }
808     }
809 
810     return TSignedSeqRange(a, b);
811 }
812 
FShiftedMove(TSignedSeqPos orig_pos,int len) const813 TSignedSeqPos CAlignMap::FShiftedMove(TSignedSeqPos orig_pos, int len) const {
814     orig_pos = MapOrigToEdited(orig_pos);
815     if(orig_pos < 0)
816         return orig_pos;
817     if(m_orientation == ePlus)
818         orig_pos += len;
819     else
820         orig_pos -= len;
821     orig_pos = MapEditedToOrig(orig_pos);
822     return orig_pos;
823 }
824 
825 
MapAtoB(const vector<CAlignMap::SMapRange> & a,const vector<CAlignMap::SMapRange> & b,TSignedSeqPos p,ERangeEnd move_mode)826 TSignedSeqPos CAlignMap::MapAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqPos p, ERangeEnd move_mode) {
827     if(p < a.front().GetExtendedFrom() || p > a.back().GetExtendedTo()) return -1;
828 
829     if(p < a.front().GetFrom()) {
830         if(move_mode == eLeftEnd && b.front().GetTypeFrom() != eGgap) {
831             return b.front().GetExtendedFrom();
832         } else {
833             return -1;
834         }
835     }
836 
837     if(p > a.back().GetTo()) {
838         if(move_mode == eRightEnd && b.back().GetTypeTo() != eGgap) {
839            return b.back().GetExtendedTo();
840         } else {
841             return -1;
842         }
843     }
844 
845     int num = FindLowerRange(a, p);                               // range a[num] exists and its start <= p
846                                                                   // if a[num+1] exists all points are > p
847     if(p > a[num].GetTo()) {                                      //  between ranges (insertion or intron in a), num+1 exists
848         if(a[num].GetTypeTo() == eGgap)
849             return -1;
850 
851         switch(move_mode) {
852         case eLeftEnd:
853             return b[num+1].GetExtendedFrom();
854         case eRightEnd:
855             return b[num].GetExtendedTo();
856         default:
857             return -1;
858         }
859     } else if(p == a[num].GetTo()) {
860         if(move_mode == eRightEnd && b[num].GetTypeTo() != eGgap) {
861             return b[num].GetExtendedTo();
862         } else if(p == a[num].GetFrom() && move_mode == eLeftEnd && b[num].GetTypeFrom() != eGgap) {          // one base interval
863             return b[num].GetExtendedFrom();
864         } else {
865             return b[num].GetTo();
866         }
867     } else if(p == a[num].GetFrom()) {
868         if(move_mode == eLeftEnd && b[num].GetTypeFrom() != eGgap) {
869             return b[num].GetExtendedFrom();
870         } else {
871             return b[num].GetFrom();
872         }
873     } else {
874         return b[num].GetFrom()+p-a[num].GetFrom();
875     }
876 }
877 
MapOrigToEdited(TSignedSeqPos orig_pos) const878 TSignedSeqPos CAlignMap::MapOrigToEdited(TSignedSeqPos orig_pos)  const {
879     TSignedSeqPos p = MapAtoB(m_orig_ranges, m_edited_ranges, orig_pos, eSinglePoint);
880 
881     if(m_orientation == eMinus && p >= 0) {
882         p = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-p;
883     }
884     return p;
885 }
886 
MapEditedToOrig(TSignedSeqPos edited_pos) const887 TSignedSeqPos CAlignMap::MapEditedToOrig(TSignedSeqPos edited_pos)  const {
888     if(m_orientation == eMinus) {
889         edited_pos = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-edited_pos;
890     }
891 
892     return MapAtoB(m_edited_ranges, m_orig_ranges, edited_pos, eSinglePoint);
893 }
894 
895 
MapRangeAtoB(const vector<CAlignMap::SMapRange> & a,const vector<CAlignMap::SMapRange> & b,TSignedSeqRange r,ERangeEnd lend,ERangeEnd rend)896 TSignedSeqRange CAlignMap::MapRangeAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend) {
897 
898     if(r.Empty()) return TSignedSeqRange::GetEmpty();
899 
900     TSignedSeqPos left;
901     if(r.GetFrom() == TSignedSeqRange::GetWholeFrom()) {
902         left = TSignedSeqRange::GetWholeFrom();
903     } else {
904         left = MapAtoB(a, b, r.GetFrom(), lend);
905         if(left < 0)
906             return TSignedSeqRange::GetEmpty();
907     }
908     TSignedSeqPos right;
909     if(r.GetTo() == TSignedSeqRange::GetWholeTo()) {
910         right = TSignedSeqRange::GetWholeTo();
911     } else {
912         right = MapAtoB(a, b, r.GetTo(), rend);
913         if(right < 0)
914             return TSignedSeqRange::GetEmpty();
915     }
916 
917     _ASSERT(right >= left);
918 
919     return TSignedSeqRange(left, right);
920 }
921 
MapRangeOrigToEdited(TSignedSeqRange orig_range,ERangeEnd lend,ERangeEnd rend) const922 TSignedSeqRange CAlignMap::MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const {
923 
924     if(orig_range.Empty()) return TSignedSeqRange::GetEmpty();
925 
926     TSignedSeqRange er = MapRangeAtoB(m_orig_ranges, m_edited_ranges, orig_range, lend, rend);
927 
928     if(er.Empty() || m_orientation == ePlus)
929         return er;
930 
931     int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
932     TSignedSeqPos left = er.GetTo();
933     TSignedSeqPos right = er.GetFrom();
934 
935     if(left == TSignedSeqRange::GetWholeTo()) {
936         left = TSignedSeqRange::GetWholeFrom();
937     } else {
938         left = offset-left;
939     }
940 
941     if(right == TSignedSeqRange::GetWholeFrom()) {
942         right = TSignedSeqRange::GetWholeTo();
943     } else {
944         right = offset-right;
945     }
946 
947     return TSignedSeqRange(left, right);
948 }
949 
MapRangeEditedToOrig(TSignedSeqRange edited_range,bool withextras) const950 TSignedSeqRange CAlignMap::MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras) const {
951 
952     if(edited_range.Empty()) return TSignedSeqRange::GetEmpty();
953 
954     if(m_orientation == eMinus) {
955         int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
956         TSignedSeqPos left = edited_range.GetTo();
957         TSignedSeqPos right = edited_range.GetFrom();
958 
959         if(left == TSignedSeqRange::GetWholeTo()) {
960             left = TSignedSeqRange::GetWholeFrom();
961         } else {
962             left = offset-left;
963         }
964 
965         if(right == TSignedSeqRange::GetWholeFrom()) {
966             right = TSignedSeqRange::GetWholeTo();
967         } else {
968             right = offset-right;
969         }
970 
971         edited_range = TSignedSeqRange(left, right);
972     }
973 
974     return MapRangeAtoB(m_edited_ranges, m_orig_ranges, edited_range, withextras);
975 }
976 
FShiftedLen(TSignedSeqRange ab,ERangeEnd lend,ERangeEnd rend) const977 int CAlignMap::FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const {
978 
979     int len = MapRangeOrigToEdited(ab, lend, rend).GetLength();
980 
981     for(int i = 1; i < (int)m_edited_ranges.size(); ++i) {
982         if(m_edited_ranges[i].GetTypeFrom() == eBoundary && Include(ab,m_orig_ranges[i].GetFrom()))
983             len -= m_edited_ranges[i].GetFrom()-m_edited_ranges[i-1].GetTo()-1;
984     }
985 
986     return len;
987 }
988 
FShiftedLen(TSignedSeqRange ab,bool withextras) const989 int CAlignMap::FShiftedLen(TSignedSeqRange ab, bool withextras) const {
990 
991     int len = MapRangeOrigToEdited(ab, withextras).GetLength();
992 
993     for(int i = 1; i < (int)m_edited_ranges.size(); ++i) {
994         if(m_edited_ranges[i].GetTypeFrom() == eBoundary && Include(ab,m_orig_ranges[i-1].GetTo()) && Include(ab,m_orig_ranges[i].GetFrom()))
995             len -= m_edited_ranges[i].GetFrom()-m_edited_ranges[i-1].GetTo()-1;
996     }
997 
998     return len;
999 }
1000 
1001 
1002 
1003 END_SCOPE(gnomon)
1004 END_NCBI_SCOPE
1005