1 #ifndef ALGO_GNOMON___GNOMON_MODEL__HPP
2 #define ALGO_GNOMON___GNOMON_MODEL__HPP
3
4 /* $Id: gnomon_model.hpp 626290 2021-02-25 14:09:15Z ivanov $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Authors: Alexandre Souvorov
30 *
31 * File Description:
32 *
33 */
34
35 #include <corelib/ncbiobj.hpp>
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbi_limits.hpp>
38 #include "set.hpp"
39
40 #include <set>
41 #include <vector>
42 #include <algorithm>
43 #include <math.h>
44
45 #include <objmgr/seq_vector_ci.hpp> // CSeqVectorTypes::TResidue
46 #include <util/range.hpp> // TSignedSeqRange
47
48 BEGIN_NCBI_SCOPE
49 BEGIN_SCOPE(objects)
50 class CSeq_align;
51 class CSeq_id;
52 class CGenetic_code;
53 END_SCOPE(objects)
54 USING_SCOPE(objects);
55 BEGIN_SCOPE(gnomon)
56
57 class CGnomonEngine;
58
59 // Making this a constant declaration (kBadScore) would be preferable,
60 // but backfires on WorkShop, where it is implicitly static and hence
61 // unavailable for use in inline functions.
62 inline
BadScore()63 double BadScore() { return -numeric_limits<double>::max(); }
64
65 enum EStrand { ePlus, eMinus};
OtherStrand(EStrand s)66 inline EStrand OtherStrand(EStrand s) { return (s == ePlus ? eMinus : ePlus); }
67
68 typedef objects::CSeqVectorTypes::TResidue TResidue; // unsigned char
69
70 typedef vector<TResidue> CResidueVec;
71
72 typedef vector<int> TIVec;
73
Precede(TSignedSeqRange l,TSignedSeqRange r)74 inline bool Precede(TSignedSeqRange l, TSignedSeqRange r) { return l.GetTo() < r.GetFrom(); }
Include(TSignedSeqRange big,TSignedSeqRange small)75 inline bool Include(TSignedSeqRange big, TSignedSeqRange small) { return (big.GetFrom()<=small.GetFrom() && small.GetTo()<=big.GetTo()); }
Include(TSignedSeqRange r,TSignedSeqPos p)76 inline bool Include(TSignedSeqRange r, TSignedSeqPos p) { return (r.GetFrom()<=p && p<=r.GetTo()); }
Enclosed(TSignedSeqRange big,TSignedSeqRange small)77 inline bool Enclosed(TSignedSeqRange big, TSignedSeqRange small) { return (big != small && Include(big, small)); }
78
79 class CSupportInfo
80 {
81 public:
82 CSupportInfo(Int8 model_id, bool core=false);
83
84
85 Int8 GetId() const;
86 void SetCore(bool core);
87 bool IsCore() const;
88 bool operator==(const CSupportInfo& s) const;
89 bool operator<(const CSupportInfo& s) const;
90
91 private:
92 Int8 m_id;
93 bool m_core_align;
94 };
95
96 typedef CVectorSet<CSupportInfo> CSupportInfoSet;
97
98 class CAlignModel;
99
100 class NCBI_XALGOGNOMON_EXPORT CInDelInfo {
101 public:
102
103 struct SSource {
SSourceCInDelInfo::SSource104 SSource() : m_strand(ePlus) {}
105 string m_acc;
106 TSignedSeqRange m_range;
107 EStrand m_strand;
108 };
109
110 enum EType {eDel, eIns, eMism};
111 enum EStatus {eGenomeNotCorrect, eGenomeCorrect, eUnknown};
112
CInDelInfo(TSignedSeqPos l,int len,EType type,const string & v=kEmptyStr,const SSource & s=SSource ())113 CInDelInfo(TSignedSeqPos l, int len, EType type, const string& v = kEmptyStr, const SSource& s = SSource()) { Init(l, len, type, v, s); }
114
Loc() const115 TSignedSeqPos Loc() const { return m_loc; }
Len() const116 int Len() const { return m_len; }
InDelEnd() const117 int InDelEnd() const { return ((IsInsertion() || IsMismatch()) ? Loc()+Len() : Loc()); } // first base "after" correction
IsInsertion() const118 bool IsInsertion() const { return m_type == eIns; }
IsDeletion() const119 bool IsDeletion() const { return m_type == eDel; }
IsMismatch() const120 bool IsMismatch() const { return m_type == eMism; }
IntersectingWith(TSignedSeqPos a,TSignedSeqPos b) const121 bool IntersectingWith(TSignedSeqPos a, TSignedSeqPos b) const // insertion/mismatch at least partially inside, deletion inside or flanking
122 {
123 return (IsDeletion() && Loc() >= a && Loc() <= b+1) ||
124 ((IsInsertion() || IsMismatch()) && Loc() <= b && a <= Loc()+Len()-1);
125 }
operator <(const CInDelInfo & fsi) const126 bool operator<(const CInDelInfo& fsi) const // source is ignored!!!!!!!!!!!
127 {
128 if(m_loc != fsi.m_loc)
129 return m_loc < fsi.m_loc;
130 else if(m_type != fsi.m_type)
131 return m_type < fsi.m_type; // if location is same deletion first
132 else if(m_len != fsi.m_len)
133 return m_len < fsi.m_len;
134 else
135 return m_indelv < fsi.m_indelv;
136 }
operator !=(const CInDelInfo & fsi) const137 bool operator!=(const CInDelInfo& fsi) const { return (*this < fsi || fsi < *this); }
operator ==(const CInDelInfo & fsi) const138 bool operator==(const CInDelInfo& fsi) const { return !(*this != fsi); }
GetInDelV() const139 string GetInDelV() const { return m_indelv; }
GetSource() const140 const SSource& GetSource() const { return m_source; }
GetType() const141 EType GetType() const { return m_type; };
SetStatus(EStatus s)142 void SetStatus(EStatus s) { m_status = s; }
GetStatus() const143 EStatus GetStatus() const { return m_status; }
SetLoc(TSignedSeqPos l)144 void SetLoc(TSignedSeqPos l) { m_loc = l; }
145
146 private:
Init(TSignedSeqPos l,int len,EType type,const string & v,const SSource & s)147 void Init(TSignedSeqPos l, int len, EType type, const string& v, const SSource& s) {
148 m_loc = l;
149 m_len = len;
150 m_type = type;
151 m_status = eUnknown;
152 m_indelv = v;
153 m_source = s;
154 _ASSERT(m_indelv.empty() || (int)m_indelv.length() == len);
155 _ASSERT(m_indelv.empty() || m_type != eIns);
156 if((IsDeletion() || IsMismatch()) && GetInDelV().empty())
157 m_indelv.insert( m_indelv.end(), Len(),'N');
158 }
159
160 TSignedSeqPos m_loc; // left location for insertion, deletion is before m_loc
161 // insertion - when there are extra bases in the genome
162 int m_len;
163 EType m_type;
164 EStatus m_status;
165 string m_indelv;
166 SSource m_source;
167 };
168
169 typedef vector<CInDelInfo> TInDels;
170
171 template <class Res>
172 bool IsStartCodon(const Res * seq, int strand = ePlus); // seq points to the first base in biological order
173 template <class Res>
174 bool IsStopCodon(const Res * seq, int strand = ePlus); // seq points to the first base in biological order
175
176
177 class CRangeMapper {
178 public:
~CRangeMapper()179 virtual ~CRangeMapper() {}
180 virtual TSignedSeqRange operator()(TSignedSeqRange r, bool withextras = true) const = 0;
181 };
182
183 class NCBI_XALGOGNOMON_EXPORT CModelExon {
184 public:
CModelExon(TSignedSeqPos f=0,TSignedSeqPos s=0,bool fs=false,bool ss=false,const string & fsig="",const string & ssig="",double ident=0,const string & seq="",const CInDelInfo::SSource & src=CInDelInfo::SSource ())185 CModelExon(TSignedSeqPos f = 0, TSignedSeqPos s = 0, bool fs = false, bool ss = false, const string& fsig = "", const string& ssig = "", double ident = 0, const string& seq = "", const CInDelInfo::SSource& src = CInDelInfo::SSource()) :
186 m_fsplice(fs), m_ssplice(ss), m_fsplice_sig(fsig), m_ssplice_sig(ssig), m_ident(ident), m_seq(seq), m_source(src), m_range(f,s)
187 {
188 _ASSERT(m_seq.empty() || m_range.Empty());
189 };
190
operator ==(const CModelExon & p) const191 bool operator==(const CModelExon& p) const
192 {
193 return (m_range==p.m_range && m_fsplice == p.m_fsplice && m_ssplice == p.m_ssplice);
194 }
operator !=(const CModelExon & p) const195 bool operator!=(const CModelExon& p) const
196 {
197 return !(*this == p);
198 }
operator <(const CModelExon & p) const199 bool operator<(const CModelExon& p) const { return Precede(Limits(),p.Limits()); }
200
operator TSignedSeqRange() const201 operator TSignedSeqRange() const { return m_range; }
Limits() const202 const TSignedSeqRange& Limits() const { return m_range; }
Limits()203 TSignedSeqRange& Limits() { return m_range; }
GetFrom() const204 TSignedSeqPos GetFrom() const { return m_range.GetFrom(); }
GetTo() const205 TSignedSeqPos GetTo() const { return m_range.GetTo(); }
206 void Extend(const CModelExon& e);
AddFrom(int d)207 void AddFrom(int d) { m_range.SetFrom( m_range.GetFrom() +d ); }
AddTo(int d)208 void AddTo(int d) { m_range.SetTo( m_range.GetTo() +d ); }
209
210 bool m_fsplice, m_ssplice;
211 string m_fsplice_sig, m_ssplice_sig; // obeys strand
212 double m_ident;
213 string m_seq; // exon sequence if in gap; obeys strand
214 CInDelInfo::SSource m_source;
215
Remap(const CRangeMapper & mapper)216 void Remap(const CRangeMapper& mapper) { m_range = mapper(m_range); }
217 private:
218 TSignedSeqRange m_range;
219 };
220
221 class CAlignMap;
222
223 class CCDSInfo {
224 public:
CCDSInfo(bool gcoords=true)225 CCDSInfo(bool gcoords = true): m_confirmed_start(false), m_confirmed_stop(false), m_open(false), m_score(BadScore()), m_genomic_coordinates(gcoords) {}
226
227 bool operator== (const CCDSInfo& another) const;
228
229 //CDS mapped to transcript should be used only for for final models (not alignments)
230 //Change in indels or 5' UTR will invalidate the cooordinates (in particular conversion from CAlignModel to CGeneModel);
IsMappedToGenome() const231 bool IsMappedToGenome() const { return m_genomic_coordinates; }
232 CCDSInfo MapFromOrigToEdited(const CAlignMap& amap) const;
233 CCDSInfo MapFromEditedToOrig(const CAlignMap& amap) const; // returns 'empty' CDS if can't map
234
ReadingFrame() const235 TSignedSeqRange ReadingFrame() const { return m_reading_frame; }
ProtReadingFrame() const236 TSignedSeqRange ProtReadingFrame() const { return m_reading_frame_from_proteins; }
Cds() const237 TSignedSeqRange Cds() const { return Start()+ReadingFrame()+Stop(); }
MaxCdsLimits() const238 TSignedSeqRange MaxCdsLimits() const { return m_max_cds_limits; }
239
Start() const240 TSignedSeqRange Start() const {return m_start; }
Stop() const241 TSignedSeqRange Stop() const {return m_stop; }
HasStart() const242 bool HasStart() const { return Start().NotEmpty(); }
HasStop() const243 bool HasStop () const { return Stop().NotEmpty(); }
ConfirmedStart() const244 bool ConfirmedStart() const { return m_confirmed_start; } // start is confirmed by protein alignment
ConfirmedStop() const245 bool ConfirmedStop() const { return m_confirmed_stop; } // stop is confirmed by protein alignment
246
OpenCds() const247 bool OpenCds() const { return m_open; } // "optimal" CDS is not internal
Score() const248 double Score() const { return m_score; }
249
250 void SetReadingFrame(TSignedSeqRange r, bool protein = false);
251 void SetStart(TSignedSeqRange r, bool confirmed = false);
252 void SetStop(TSignedSeqRange r, bool confirmed = false);
253 void Set5PrimeCdsLimit(TSignedSeqPos p);
254 void Clear5PrimeCdsLimit();
255 void SetScore(double score, bool open=false);
256
257 void CombineWith(const CCDSInfo& another_cds_info);
258 void Remap(const CRangeMapper& mapper);
259 void Clip(TSignedSeqRange limits);
260 void Cut(TSignedSeqRange hole);
261 void Clear();
262
263 int Strand() const; // -1 (minus), 0 (unknown), 1 (plus)
264
265 enum EStatus {eSelenocysteine, eGenomeNotCorrect, eGenomeCorrect, eUnknown};
266 struct SPStop : public TSignedSeqRange {
SPStopCCDSInfo::SPStop267 SPStop(TSignedSeqRange r, EStatus s) : TSignedSeqRange(r), m_status(s) {};
268
269 //not overloaded == is used for uniquing and finding intervals
270 //overloaded < is used for sorting before uniquing
operator <CCDSInfo::SPStop271 bool operator<(const SPStop& stp) const {
272 if(operator==(stp)) // == is not overloaded
273 return m_status < stp.m_status;
274 else
275 return TSignedSeqRange::operator<(stp);
276 }
277
278 EStatus m_status;
279 };
280
281 typedef vector<SPStop> TPStops;
PStops() const282 const TPStops& PStops() const { return m_p_stops; }
283 bool PStop(bool includeall = true) const; // has premature stop(s)
AddPStop(SPStop stp)284 void AddPStop(SPStop stp) { m_p_stops.push_back(stp); _ASSERT( Invariant() ); }
285 void AddPStop(TSignedSeqRange r, EStatus status);
ClearPStops()286 void ClearPStops() { m_p_stops.clear(); }
287
Invariant() const288 bool Invariant() const
289 {
290 #ifdef _DEBUG
291 if (ReadingFrame().Empty()) {
292 _ASSERT( !HasStop() && !HasStart() );
293 _ASSERT( ProtReadingFrame().Empty() && MaxCdsLimits().Empty() );
294 _ASSERT( !ConfirmedStart() );
295 _ASSERT( !ConfirmedStop() );
296 // _ASSERT( !PStop() );
297 _ASSERT( !OpenCds() );
298 _ASSERT( Score()==BadScore() );
299 return true;
300 }
301
302 _ASSERT( !Start().IntersectingWith(ReadingFrame()) );
303 _ASSERT( !Stop().IntersectingWith(ReadingFrame()) );
304 _ASSERT( ProtReadingFrame().Empty() || Include(ReadingFrame(), ProtReadingFrame()) );
305 _ASSERT( Include( MaxCdsLimits(), Cds() ) );
306
307 if (!HasStop() && !HasStart()) {
308 _ASSERT(MaxCdsLimits().GetFrom()==TSignedSeqRange::GetWholeFrom() && MaxCdsLimits().GetTo()==TSignedSeqRange::GetWholeTo());
309 } else if (HasStart() && !HasStop()) {
310 if (Precede(Start(), ReadingFrame())) {
311 _ASSERT( MaxCdsLimits().GetTo()==TSignedSeqRange::GetWholeTo() );
312 } else {
313 _ASSERT( MaxCdsLimits().GetFrom()==TSignedSeqRange::GetWholeFrom() );
314 }
315 } else if (HasStart() && HasStop()) {
316 _ASSERT( Include(Start()+Stop(),ReadingFrame()) );
317 }
318 if (HasStop()) {
319 if (Precede(ReadingFrame(),Stop())) {
320 _ASSERT( MaxCdsLimits().GetTo()==Stop().GetTo() );
321 } else {
322 _ASSERT( MaxCdsLimits().GetFrom()==Stop().GetFrom() );
323 }
324 }
325
326 if (ConfirmedStart()) {
327 _ASSERT( HasStart() );
328 }
329
330 if (ConfirmedStop()) {
331 _ASSERT( HasStop() );
332 }
333
334 // ITERATE(TPStops, s, PStops())
335 // _ASSERT( Include(MaxCdsLimits(), *s) );
336 #endif
337
338 return true;
339 }
340
341 private:
342 TSignedSeqRange m_start, m_stop;
343 TSignedSeqRange m_reading_frame;
344 TSignedSeqRange m_reading_frame_from_proteins;
345 TSignedSeqRange m_max_cds_limits;
346
347 bool m_confirmed_start, m_confirmed_stop;
348 TPStops m_p_stops;
349
350 bool m_open;
351 double m_score;
352
353 bool m_genomic_coordinates;
354 };
355
356
357 class NCBI_XALGOGNOMON_EXPORT CGeneModel
358 {
359 public:
360 enum EType {
361 eWall = 1,
362 eNested = 2,
363 eSR = 4,
364 eEST = 8,
365 emRNA = 16,
366 eProt = 32,
367 eNotForChaining = 64,
368 eChain = 128,
369 eGnomon = 256
370 };
371 static string TypeToString(int type);
372
373 enum EStatus {
374 ecDNAIntrons = 1,
375 eReversed = 2,
376 eSkipped = 4,
377 eLeftTrimmed = 8,
378 eRightTrimmed = 16,
379 eFullSupCDS = 32,
380 ePseudo = 64,
381 ePolyA = 128,
382 eCap = 256,
383 eBestPlacement = 512,
384 eUnknownOrientation = 1024,
385 eConsistentCoverage = 2048,
386 eGapFiller = 4096,
387 eUnmodifiedAlign = 8192,
388 eChangedByFilter = 16384,
389 eTSA = 32768,
390 eLeftConfirmed = 65536,
391 eRightConfirmed = 131072,
392 eLeftFlexible = 262144,
393 eRightFlexible = 524288
394 };
395
CGeneModel(EStrand s=ePlus,Int8 id=0,int type=0)396 CGeneModel(EStrand s = ePlus, Int8 id = 0, int type = 0) :
397 m_type(type), m_id(id), m_status(0), m_ident(0), m_weight(1), m_expecting_hole(false), m_strand(s), m_geneid(0), m_rank_in_gene(0) {}
~CGeneModel()398 virtual ~CGeneModel() {}
399
400 void AddExon(TSignedSeqRange exon, const string& fs = "", const string& ss = "", double ident = 0, const string& seq = "", const CInDelInfo::SSource& src = CInDelInfo::SSource());
401 void AddHole(); // between model and next exons
402 void AddGgapExon(double ident, const string& seq, const CInDelInfo::SSource& src, bool infront);
403 void AddNormalExon(TSignedSeqRange exon, const string& fs, const string& ss, double ident, bool infront);
404
405 typedef vector<CModelExon> TExons;
Exons() const406 const TExons& Exons() const { return m_exons; }
ClearExons()407 void ClearExons() {
408 m_exons.clear();
409 m_fshifts.clear();
410 m_range = TSignedSeqRange::GetEmpty();
411 m_cds_info = CCDSInfo();
412 m_edge_reading_frames.clear();
413 }
SetSplices(int i,const string & f_sig,const string & s_sig)414 void SetSplices(int i, const string& f_sig, const string& s_sig) { m_exons[i].m_fsplice_sig = f_sig; m_exons[i].m_ssplice_sig = s_sig; }
415
416 void ReverseComplementModel();
417
418 void Remap(const CRangeMapper& mapper);
419 enum EClipMode { eRemoveExons, eDontRemoveExons };
420 virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant = true); // drops the score!!!!!!!!!
421 virtual void CutExons(TSignedSeqRange hole); // clip or remove exons, dangerous, should be completely in or outside the cds, should not cut an exon in two
422 void ExtendLeft(int amount);
423 void ExtendRight(int amount);
424 void Extend(const CGeneModel& a, bool ensure_cds_invariant = true);
425 void RemoveShortHolesAndRescore(const CGnomonEngine& gnomon); // removes holes shorter than min intron (may add frameshifts/stops)
426
427 TSignedSeqRange TranscriptExon(int i) const;
428
Limits() const429 TSignedSeqRange Limits() const { return m_range; }
430 TSignedSeqRange TranscriptLimits() const;
431 int AlignLen() const ;
432 void RecalculateLimits();
433
434 // ReadingFrame doesn't include start/stop. It's always on codon boundaries
ReadingFrame() const435 TSignedSeqRange ReadingFrame() const { return m_cds_info.ReadingFrame(); }
436 // CdsLimits include start/stop if any, goes to model limit if no start/stop
437 TSignedSeqRange RealCdsLimits() const;
438 int RealCdsLen() const ; // %3!=0 is possible
439 // MaxCdsLimits - longest cds. include start/stop if any,
440 // goes to 5' limit if no upstream stop, goes to 3' limit if no stop
441 TSignedSeqRange MaxCdsLimits() const;
442
GetCdsInfo() const443 const CCDSInfo& GetCdsInfo() const { return m_cds_info; }
444 void SetCdsInfo(const CCDSInfo& cds_info);
445 void SetCdsInfo(const CGeneModel& a);
446 void CombineCdsInfo(const CGeneModel& a, bool ensure_cds_invariant = true);
447 void CombineCdsInfo(const CCDSInfo& cds_info, bool ensure_cds_invariant = true);
448
IntersectingWith(const CGeneModel & a) const449 bool IntersectingWith(const CGeneModel& a) const
450 {
451 return Limits().IntersectingWith(a.Limits());
452 }
453
Ident() const454 double Ident() const { return m_ident; }
SetIdent(double i)455 void SetIdent(double i) { m_ident = i; }
456
Weight() const457 double Weight() const { return m_weight; }
SetWeight(double w)458 void SetWeight(double w) { m_weight = w; }
459
SetStrand(EStrand s)460 void SetStrand(EStrand s) { m_strand = s; }
Strand() const461 EStrand Strand() const { return m_strand; }
Orientation() const462 EStrand Orientation() const {
463 bool notreversed = (Status()&CGeneModel::eReversed) == 0;
464 bool plusstrand = Strand() == ePlus;
465 return (notreversed == plusstrand) ? ePlus : eMinus;
466 }
467
SetType(int t)468 void SetType(int t) { m_type = t; }
Type() const469 int Type() const { return m_type; }
GeneID() const470 Int8 GeneID() const { return m_geneid; }
SetGeneID(Int8 id)471 void SetGeneID(Int8 id) { m_geneid = id; }
RankInGene() const472 int RankInGene() const { return m_rank_in_gene; }
SetRankInGene(int rank)473 void SetRankInGene(int rank) { m_rank_in_gene = rank; }
ID() const474 Int8 ID() const { return m_id; }
SetID(Int8 id)475 void SetID(Int8 id) { m_id = id; }
Support() const476 const CSupportInfoSet& Support() const { return m_support; }
AddSupport(const CSupportInfo & support)477 bool AddSupport(const CSupportInfo& support) { return m_support.insert(support); }
ReplaceSupport(const CSupportInfoSet & support_set)478 void ReplaceSupport(const CSupportInfoSet& support_set) { m_support = support_set; }
ProteinHit() const479 const string& ProteinHit() const { return m_protein_hit; }
ProteinHit()480 string& ProteinHit() { return m_protein_hit; }
481
Status()482 unsigned int& Status() { return m_status; }
Status() const483 const unsigned int& Status() const { return m_status; }
ClearStatus()484 void ClearStatus() { m_status = 0; }
485
GetComment() const486 const string& GetComment() const { return m_comment; }
SetComment(const string & comment)487 void SetComment(const string& comment) { m_comment = comment; }
AddComment(const string & comment)488 void AddComment(const string& comment) { m_comment += " " + comment; }
489
operator <(const CGeneModel & a) const490 bool operator<(const CGeneModel& a) const { return Precede(Limits(),a.Limits()); }
491
Score() const492 double Score() const { return m_cds_info.Score(); }
493
Continuous() const494 bool Continuous() const // no "holes" in alignment
495 {
496 for(unsigned int i = 1; i < Exons().size(); ++i)
497 if (!Exons()[i-1].m_ssplice || !Exons()[i].m_fsplice)
498 return false;
499 return true;
500 }
HasStart() const501 bool HasStart() const { return m_cds_info.HasStart(); }
HasStop() const502 bool HasStop () const { return m_cds_info.HasStop (); }
LeftComplete() const503 bool LeftComplete() const { return Strand() == ePlus ? HasStart() : HasStop(); }
RightComplete() const504 bool RightComplete() const { return Strand() == ePlus ? HasStop() : HasStart(); }
FullCds() const505 bool FullCds() const { return HasStart() && HasStop() && Continuous(); }
CompleteCds() const506 bool CompleteCds() const { return FullCds() && (!Open5primeEnd() || ConfirmedStart()); }
507
GoodEnoughToBeAnnotation() const508 bool GoodEnoughToBeAnnotation() const
509 {
510 _ASSERT( !(OpenCds()&&ConfirmedStart()) );
511 return (ReadingFrame().Empty() || (!OpenCds() && FullCds()));
512 }
513
Open5primeEnd() const514 bool Open5primeEnd() const
515 {
516 return (Strand() == ePlus ? OpenLeftEnd() : OpenRightEnd());
517 }
OpenLeftEnd() const518 bool OpenLeftEnd() const
519 {
520 return ReadingFrame().NotEmpty() && GetCdsInfo().MaxCdsLimits().GetFrom()==TSignedSeqRange::GetWholeFrom();
521 }
OpenRightEnd() const522 bool OpenRightEnd() const
523 {
524 return ReadingFrame().NotEmpty() && GetCdsInfo().MaxCdsLimits().GetTo()==TSignedSeqRange::GetWholeTo();
525 }
526
OpenCds() const527 bool OpenCds() const { return m_cds_info.OpenCds(); } // "optimal" CDS is not internal
PStop(bool includeall=true) const528 bool PStop(bool includeall = true) const { return m_cds_info.PStop(includeall); } // has premature stop(s)
529
ConfirmedStart() const530 bool ConfirmedStart() const { return m_cds_info.ConfirmedStart(); } // start is confirmed by protein alignment
ConfirmedStop() const531 bool ConfirmedStop() const { return m_cds_info.ConfirmedStop(); } // stop is confirmed by protein alignment
532
533 bool isNMD(int limit = 50) const;
534
FrameShifts()535 TInDels& FrameShifts() { return m_fshifts; }
FrameShifts() const536 const TInDels& FrameShifts() const { return m_fshifts; }
537 TInDels FrameShifts(TSignedSeqPos a, TSignedSeqPos b) const;
538 TInDels GetInDels(bool fs_only) const;
539 TInDels GetInDels(TSignedSeqPos a, TSignedSeqPos b, bool fs_only) const;
540
541 int FShiftedLen(TSignedSeqRange ab, bool withextras = true) const; // won't work if a/b is insertion
FShiftedLen(TSignedSeqPos a,TSignedSeqPos b,bool withextras=true) const542 int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras = true) const { return FShiftedLen(TSignedSeqRange(a,b),withextras); }
543
544 // move along mrna skipping introns
545 TSignedSeqPos FShiftedMove(TSignedSeqPos pos, int len) const; // may retun <0 if hits a deletion at the end of move
546
547 virtual CAlignMap GetAlignMap() const;
548
549 string GetCdsDnaSequence (const CResidueVec& contig_sequence) const;
550 string GetProtein (const CResidueVec& contig_sequence) const;
551 string GetProtein (const CResidueVec& contig_sequence, const CGenetic_code* gencode) const;
552
553 // Below comparisons ignore CDS completely, first 3 assume that alignments are the same strand
554
555 int isCompatible(const CGeneModel& a) const; // returns 0 for notcompatible or (number of common splices)+1
556 bool IsSubAlignOf(const CGeneModel& a) const;
557 int MutualExtension(const CGeneModel& a) const; // returns 0 for notcompatible or (number of introns) + 1
558
IdenticalAlign(const CGeneModel & a) const559 bool IdenticalAlign(const CGeneModel& a) const
560 { return Strand()==a.Strand() && Limits()==a.Limits() && Exons() == a.Exons() && FrameShifts()==a.FrameShifts() &&
561 GetCdsInfo().PStops() == a.GetCdsInfo().PStops() && Type() == a.Type() && Status() == a.Status(); }
operator ==(const CGeneModel & a) const562 bool operator==(const CGeneModel& a) const
563 {
564 return IdenticalAlign(a) && Type()==a.Type() && m_id==a.m_id && m_support==a.m_support;
565 }
566
TrustedmRNA() const567 const list< CRef<CSeq_id> >& TrustedmRNA() const { return m_trusted_mrna; }
InsertTrustedmRNA(CRef<CSeq_id> g)568 void InsertTrustedmRNA(CRef<CSeq_id> g) { m_trusted_mrna.push_back(g); };
ClearTrustedmRNA()569 void ClearTrustedmRNA() { m_trusted_mrna.clear(); };
570
TrustedProt() const571 const list< CRef<CSeq_id> >& TrustedProt() const { return m_trusted_prot; }
InsertTrustedProt(CRef<CSeq_id> g)572 void InsertTrustedProt(CRef<CSeq_id> g) { m_trusted_prot.push_back(g); };
ClearTrustedProt()573 void ClearTrustedProt() { m_trusted_prot.clear(); };
574
GetEdgeReadingFrames() const575 const vector<CCDSInfo>* GetEdgeReadingFrames() const { return &m_edge_reading_frames; }
SetEdgeReadingFrames()576 vector<CCDSInfo>* SetEdgeReadingFrames() { return &m_edge_reading_frames; }
577
578
579 #ifdef _DEBUG
580 int oid;
581 #endif
582
583 private:
584 void RemoveExtraFShifts(int left, int right);
585 void TrimEdgesToFrameInOtherAlignGaps(const TExons& exons_with_gaps);
586
587 int m_type;
588 Int8 m_id;
589 unsigned int m_status;
590
591 double m_ident;
592 double m_weight;
593
594 TExons m_exons;
MyExons()595 TExons& MyExons() { return m_exons; }
596 bool m_expecting_hole;
597
598 TSignedSeqRange m_range;
599 EStrand m_strand;
600 TInDels m_fshifts;
601
602 CCDSInfo m_cds_info;
603 bool CdsInvariant(bool check_start_stop = true) const;
604
605 Int8 m_geneid;
606 int m_rank_in_gene;
607 CSupportInfoSet m_support;
608 string m_protein_hit;
609 string m_comment;
610 list< CRef<CSeq_id> > m_trusted_prot;
611 list< CRef<CSeq_id> > m_trusted_mrna;
612
613
614 vector<CCDSInfo> m_edge_reading_frames;
615
616 friend class CChain;
617 };
618
619
620 class CAlignMap {
621 public:
622 enum EEdgeType { eBoundary, eSplice, eInDel, eGgap };
623 enum ERangeEnd{ eLeftEnd, eRightEnd, eSinglePoint };
624
CAlignMap()625 CAlignMap() {};
CAlignMap(TSignedSeqPos orig_a,TSignedSeqPos orig_b)626 CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b) : m_orientation(ePlus) {
627 m_orig_ranges.push_back(SMapRange(SMapRangeEdge(orig_a), SMapRangeEdge(orig_b), kEmptyStr));
628 m_edited_ranges = m_orig_ranges;
629 m_target_len = FShiftedLen(orig_a, orig_b);
630 }
CAlignMap(TSignedSeqPos orig_a,TSignedSeqPos orig_b,TInDels::const_iterator fsi_begin,const TInDels::const_iterator fsi_end)631 CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TInDels::const_iterator fsi_begin, const TInDels::const_iterator fsi_end) : m_orientation(ePlus) {
632 EEdgeType atype = eBoundary;
633 EEdgeType btype = eBoundary;
634 if(fsi_begin != fsi_end) {
635 if(fsi_begin->Loc() == orig_a && !fsi_begin->IsMismatch()) {
636 _ASSERT(!fsi_begin->IsInsertion()); // no reason to have insertion
637 atype = eInDel;
638 }
639 TInDels::const_iterator fs = fsi_end-1;
640 if(fs->Loc() == orig_b+1 && fs->IsDeletion())
641 btype = eInDel;
642 }
643 InsertIndelRangesForInterval(orig_a, orig_b, 0, fsi_begin, fsi_end, atype, btype, "", "");
644 m_target_len = FShiftedLen(orig_a, orig_b);
645 }
646 CAlignMap(const CGeneModel::TExons& exons, const vector<TSignedSeqRange>& transcript_exons, const TInDels& indels, EStrand orientation, int targetlen ); //orientation == strand if not Reversed
647 CAlignMap(const CGeneModel::TExons& exons, const TInDels& frameshifts, EStrand strand, TSignedSeqRange lim = TSignedSeqRange::GetWhole(), int holelen = 0, int polyalen = 0);
648 TSignedSeqPos MapOrigToEdited(TSignedSeqPos orig_pos) const;
649 TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const;
650 TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const;
MapRangeOrigToEdited(TSignedSeqRange orig_range,bool withextras=true) const651 TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, bool withextras = true) const { return MapRangeOrigToEdited(orig_range, withextras?eLeftEnd:eSinglePoint, withextras?eRightEnd:eSinglePoint); }
652 TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras = true) const;
653 template <class Vec>
654 void EditedSequence(const Vec& original_sequence, Vec& edited_sequence, bool includeholes = false) const;
655 int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const;
656 int FShiftedLen(TSignedSeqRange ab, bool withextras = true) const;
FShiftedLen(TSignedSeqPos a,TSignedSeqPos b,bool withextras=true) const657 int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras = true) const { return FShiftedLen(TSignedSeqRange(a,b), withextras); }
658 //snap to codons works by analising transcript coordinates (MUST be a protein or reading frame cutout)
659 TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons = false) const;
660 TSignedSeqRange ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const;
661 TSignedSeqPos FShiftedMove(TSignedSeqPos orig_pos, int len) const; // may reurn < 0 if hits a gap
662 // TInDels GetInDels(bool fs_only) const;
663 // TInDels GetAllCorrections() const;
TargetLen() const664 int TargetLen() const { return m_target_len; }
Orientation() const665 EStrand Orientation() const { return m_orientation; }
MoveOrigin(TSignedSeqPos shift)666 void MoveOrigin(TSignedSeqPos shift) {
667 for(auto& mrange : m_orig_ranges)
668 mrange.MoveOrigin(shift);
669 }
670
671 // private: // breaks SMapRange on WorkShop. :-/
672 struct SMapRangeEdge {
SMapRangeEdgeCAlignMap::SMapRangeEdge673 SMapRangeEdge(TSignedSeqPos p, TSignedSeqPos e = 0, EEdgeType t = eBoundary, const string& seq = kEmptyStr) : m_pos(p), m_extra(e), m_edge_type(t), m_extra_seq(seq) {}
operator <CAlignMap::SMapRangeEdge674 bool operator<(const SMapRangeEdge& mre) const { return m_pos < mre.m_pos; }
operator ==CAlignMap::SMapRangeEdge675 bool operator==(const SMapRangeEdge& mre) const { return m_pos == mre.m_pos; }
676
677 TSignedSeqPos m_pos, m_extra;
678 EEdgeType m_edge_type;
679 string m_extra_seq;
680 };
681
682 class SMapRange {
683 public:
SMapRange(SMapRangeEdge from,SMapRangeEdge to,const string & mism)684 SMapRange(SMapRangeEdge from, SMapRangeEdge to, const string& mism) : m_from(from), m_to(to), m_mism_seq(mism) {}
GetEdgeFrom() const685 SMapRangeEdge GetEdgeFrom() const { return m_from; }
GetEdgeTo() const686 SMapRangeEdge GetEdgeTo() const { return m_to; }
SetEdgeFrom(SMapRangeEdge from)687 void SetEdgeFrom(SMapRangeEdge from) { m_from = from; }
SetEdgeTo(SMapRangeEdge to)688 void SetEdgeTo(SMapRangeEdge to) { m_to = to; }
MoveOrigin(TSignedSeqPos shift)689 void MoveOrigin(TSignedSeqPos shift) {
690 m_from.m_pos -= shift;
691 m_to.m_pos -= shift;
692 }
GetFrom() const693 TSignedSeqPos GetFrom() const { return m_from.m_pos; }
GetTo() const694 TSignedSeqPos GetTo() const { return m_to.m_pos; }
GetExtendedFrom() const695 TSignedSeqPos GetExtendedFrom() const { return m_from.m_pos - m_from.m_extra; }
GetExtendedTo() const696 TSignedSeqPos GetExtendedTo() const { return m_to.m_pos+m_to.m_extra; }
GetExtraFrom() const697 TSignedSeqPos GetExtraFrom() const { return m_from.m_extra; }
GetExtraSeqFrom() const698 string GetExtraSeqFrom() const { return m_from.m_extra_seq; }
GetExtraTo() const699 TSignedSeqPos GetExtraTo() const { return m_to.m_extra; }
GetExtraSeqTo() const700 string GetExtraSeqTo() const { return m_to.m_extra_seq; }
GetTypeFrom() const701 EEdgeType GetTypeFrom() const { return m_from.m_edge_type; }
GetTypeTo() const702 EEdgeType GetTypeTo() const { return m_to.m_edge_type; }
GetMismatch() const703 const string& GetMismatch() const { return m_mism_seq; }
operator <(const SMapRange & mr) const704 bool operator<(const SMapRange& mr) const {
705 if(m_from.m_pos == mr.m_from.m_pos) return m_to.m_pos < mr.m_to.m_pos;
706 else return m_from.m_pos < mr.m_from.m_pos;
707 }
708
709 private:
710 SMapRangeEdge m_from, m_to;
711 string m_mism_seq;
712 };
713
714 // static TInDels RemoveExtraIndels(const TInDels& indels, const CGeneModel::TExons& exons);
715
716 private:
717 static TSignedSeqPos MapAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqPos p, ERangeEnd move_mode);
718 static TSignedSeqRange MapRangeAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend);
MapRangeAtoB(const vector<CAlignMap::SMapRange> & a,const vector<CAlignMap::SMapRange> & b,TSignedSeqRange r,bool withextras)719 static TSignedSeqRange MapRangeAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqRange r, bool withextras ) {
720 return MapRangeAtoB(a, b, r, withextras?eLeftEnd:eSinglePoint, withextras?eRightEnd:eSinglePoint);
721 };
722 static int FindLowerRange(const vector<CAlignMap::SMapRange>& a, TSignedSeqPos p);
723
724 void InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, int len, const string& mism, int left_orige, int left_edite, int right_orige, int right_edite, EEdgeType left_type, EEdgeType right_type, const string& left_edit_extra_seq, const string& right_edit_extra_seq);
725 TSignedSeqPos InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin, TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string& gseq_a, const string& gseq_b);
726
727 vector<SMapRange> m_orig_ranges, m_edited_ranges;
728 EStrand m_orientation;
729 int m_target_len;
730 };
731
732
733
734
735 class NCBI_XALGOGNOMON_EXPORT CAlignModel : public CGeneModel {
736 public:
CAlignModel()737 CAlignModel() {}
738 CAlignModel(const objects::CSeq_align& seq_align);
739 CAlignModel(const CGeneModel& g, const CAlignMap& a);
GetAlignMap() const740 virtual CAlignMap GetAlignMap() const { return m_alignmap; }
741 void ResetAlignMap();
742
Clip(TSignedSeqRange limits,EClipMode mode,bool ensure_cds_invariant=true)743 virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant = true) { // drops the score!!!!!!!!!
744 CGeneModel::Clip(limits,mode,ensure_cds_invariant);
745 RecalculateAlignMap(limits.GetFrom(), limits.GetTo());
746 }
CutExons(TSignedSeqRange hole)747 virtual void CutExons(TSignedSeqRange hole) { // clip or remove exons, dangerous, should be completely in or outside the cds, should not cut an exon in two
748 CGeneModel::CutExons(hole);
749 RecalculateAlignMap(hole.GetTo()+1, hole.GetFrom()-1);
750 }
751
752 string TargetAccession() const;
SetTargetId(const objects::CSeq_id & id)753 void SetTargetId(const objects::CSeq_id& id) { m_target_id.Reset(&id); }
GetTargetId() const754 CConstRef<objects::CSeq_id> GetTargetId() const { return m_target_id; }
TargetLen() const755 int TargetLen() const { return m_alignmap.TargetLen(); }
756 int PolyALen() const;
757 CRef<objects::CSeq_align> MakeSeqAlign(const string& contig) const; // should be used for alignments only; for chains and models will produce a Splign alignment of mRNA
758
759 private:
760 void RecalculateAlignMap(int left, int right);
761 CAlignMap m_alignmap;
762 CConstRef<objects::CSeq_id> m_target_id;
763 };
764
765
766
767
768 struct NCBI_XALGOGNOMON_EXPORT setcontig {
769 const string& m_contig;
setcontigsetcontig770 explicit setcontig(const string& cntg) : m_contig(cntg) {}
771 };
772 struct NCBI_XALGOGNOMON_EXPORT getcontig {
773 string& m_contig;
getcontiggetcontig774 explicit getcontig(string& cntg) : m_contig(cntg) {}
775 };
776 NCBI_XALGOGNOMON_EXPORT CNcbiOstream& operator<<(CNcbiOstream& s, const setcontig& c);
777 NCBI_XALGOGNOMON_EXPORT CNcbiIstream& operator>>(CNcbiIstream& s, const getcontig& c);
778
779 NCBI_XALGOGNOMON_EXPORT CNcbiIstream& operator>>(CNcbiIstream& s, CAlignModel& a);
780 NCBI_XALGOGNOMON_EXPORT CNcbiOstream& operator<<(CNcbiOstream& s, const CAlignModel& a);
781 NCBI_XALGOGNOMON_EXPORT CNcbiOstream& operator<<(CNcbiOstream& s, const CGeneModel& a);
782
783
784 template<class Model>
785 class NCBI_XALGOGNOMON_EXPORT CModelCluster : public list<Model> {
786 public:
787 typedef Model TModel;
CModelCluster(int f=numeric_limits<int>::max (),int s=0)788 CModelCluster(int f = numeric_limits<int>::max(), int s = 0) : m_limits(f,s) {}
CModelCluster(TSignedSeqRange limits)789 CModelCluster(TSignedSeqRange limits) : m_limits(limits) {}
Insert(const Model & a)790 void Insert(const Model& a) {
791 m_limits.CombineWith(a.Limits());
792 this->push_back(a);
793 }
Splice(CModelCluster & c)794 void Splice(CModelCluster& c) { // elements removed from c and inserted into *this
795 m_limits.CombineWith(c.Limits());
796 this->splice(list<Model>::end(),c);
797 }
Limits() const798 TSignedSeqRange Limits() const { return m_limits; }
operator <(const CModelCluster & c) const799 bool operator<(const CModelCluster& c) const { return Precede(m_limits, c.m_limits); }
Init(TSignedSeqPos first,TSignedSeqPos second)800 void Init(TSignedSeqPos first, TSignedSeqPos second) {
801 list<Model>::clear();
802 m_limits.SetFrom( first );
803 m_limits.SetTo( second );
804 }
805
806 private:
807 TSignedSeqRange m_limits;
808 };
809
810 typedef CModelCluster<CGeneModel> TGeneModelCluster;
811 typedef CModelCluster<CAlignModel> TAlignModelCluster;
812
813 typedef list<CGeneModel> TGeneModelList;
814 typedef list<CAlignModel> TAlignModelList;
815
816
817 template<class Cluster>
818 class NCBI_XALGOGNOMON_EXPORT CModelClusterSet : public set<Cluster> {
819 public:
820 typedef typename set<Cluster>::iterator Titerator;
CModelClusterSet()821 CModelClusterSet() {}
Insert(const typename Cluster::TModel & a)822 void Insert(const typename Cluster::TModel& a) {
823 Cluster clust;
824 clust.Insert(a);
825 Titerator first = set<Cluster>::lower_bound(Cluster(a.Limits().GetFrom(),a.Limits().GetFrom()));
826 Titerator second = set<Cluster>::upper_bound(Cluster(a.Limits().GetTo(),a.Limits().GetTo()));
827 for(Titerator it = first; it != second;) {
828 clust.Splice(const_cast<Cluster&>(*it));
829 this->erase(it++);
830 }
831 const_cast<Cluster&>(*this->insert(second,Cluster(clust.Limits()))).Splice(clust);
832 }
833 };
834
835 typedef CModelClusterSet<TGeneModelCluster> TGeneModelClusterSet;
836 typedef CModelClusterSet<TAlignModelCluster> TAlignModelClusterSet;
837
838
839 enum EResidueNames { enA, enC, enG, enT, enN };
840
841 class EResidue {
842 public :
EResidue()843 EResidue() : data(enN) {}
EResidue(EResidueNames e)844 EResidue(EResidueNames e) : data(e) {}
845
operator int() const846 operator int() const { return int(data); }
847
848 private:
849 unsigned char data;
850 };
851
Complement(TResidue c)852 inline TResidue Complement(TResidue c)
853 {
854 switch(c)
855 {
856 case 'A':
857 return 'T';
858 case 'a':
859 return 't';
860 case 'C':
861 return 'G';
862 case 'c':
863 return 'g';
864 case 'G':
865 return 'C';
866 case 'g':
867 return 'c';
868 case 'T':
869 return 'A';
870 case 't':
871 return 'a';
872 default:
873 return 'N';
874 }
875 }
876
877 extern const EResidue k_toMinus[5];
878 extern const char *const k_aa_table;
879
Complement(EResidue c)880 inline EResidue Complement(EResidue c)
881 {
882 return k_toMinus[c];
883 }
884
885 template <class BidirectionalIterator>
ReverseComplement(const BidirectionalIterator & first,const BidirectionalIterator & last)886 void ReverseComplement(const BidirectionalIterator& first, const BidirectionalIterator& last)
887 {
888 for (BidirectionalIterator i( first ); i != last; ++i)
889 *i = Complement(*i);
890 reverse(first, last);
891 }
892
893 template<class Model>
GetAlignParts(const Model & algn,bool settrimflags)894 list<Model> GetAlignParts(const Model& algn, bool settrimflags) {
895 list<Model> parts;
896 int left = algn.Limits().GetFrom();
897 for(unsigned int i = 1; i < algn.Exons().size(); ++i) {
898 if (!algn.Exons()[i-1].m_ssplice || !algn.Exons()[i].m_fsplice) {
899 Model m = algn;
900 m.Status() &= ~CGeneModel::ePolyA;
901 m.Status() &= ~CGeneModel::eCap;
902 m.Clip(TSignedSeqRange(left,algn.Exons()[i-1].GetTo()),CGeneModel::eRemoveExons);
903 if(!parts.empty() && settrimflags) {
904 parts.back().Status() &= ~CGeneModel::eRightTrimmed;
905 m.Status() &= ~CGeneModel::eLeftTrimmed;
906 }
907 parts.push_back(m);
908 left = algn.Exons()[i].GetFrom();
909 }
910 }
911 if(!parts.empty()) {
912 Model m = algn;
913 m.Clip(TSignedSeqRange(left,algn.Limits().GetTo()),CGeneModel::eRemoveExons);
914 m.Status() &= ~CGeneModel::ePolyA;
915 m.Status() &= ~CGeneModel::eCap;
916 if(settrimflags) {
917 parts.back().Status() &= ~CGeneModel::eRightTrimmed;
918 m.Status() &= ~CGeneModel::eLeftTrimmed;
919 }
920 parts.push_back(m);
921
922 if(algn.Status()&CGeneModel::ePolyA) {
923 if(algn.Strand() == ePlus)
924 parts.back().Status() |= CGeneModel::ePolyA;
925 else
926 parts.front().Status() |= CGeneModel::ePolyA;
927 }
928 if(algn.Status()&CGeneModel::eCap) {
929 if(algn.Strand() == ePlus)
930 parts.front().Status() |= CGeneModel::eCap;
931 else
932 parts.back().Status() |= CGeneModel::eCap;
933 }
934 }
935
936 return parts;
937 }
938
939 void MapAlignsToOrigContig(TAlignModelList& aligns, const TInDels& corrections, int contig_size);
940
941
942
943 END_SCOPE(gnomon)
944 END_NCBI_SCOPE
945
946 #endif // ALGO_GNOMON___GNOMON_MODEL__HPP
947