1 #ifndef ALGO_GNOMON___SCORE__HPP
2 #define ALGO_GNOMON___SCORE__HPP
3 
4 /*  $Id: score.hpp 620759 2020-11-30 16:00:57Z souvorov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Alexandre Souvorov
30  *
31  * File Description:
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 
37 #include <algo/gnomon/chainer.hpp>
38 #include "gnomon_seq.hpp"
39 
40 BEGIN_NCBI_SCOPE
41 BEGIN_SCOPE(gnomon)
42 
43 class CTerminal;
44 class CCodingRegion;
45 class CNonCodingRegion;
46 class CIntronParameters;
47 class CIntergenicParameters;
48 
49 class CSeqScores {
50 public:
51     CSeqScores (const CTerminal& a, const CTerminal& d,const  CTerminal& stt, const CTerminal& stp,
52                 const CCodingRegion& cr, const CNonCodingRegion& ncr, const CNonCodingRegion& ing,
53                 const CIntronParameters& intron_params,
54                 TSignedSeqPos from, TSignedSeqPos to, const TGeneModelList& cls,
55                 const TInDels& initial_fshifts, double mpp, const CGnomonEngine& gnomon);
56     void Init(CResidueVec& original_sequence, bool leftwall,
57               bool rightwall, double consensuspenalty,
58               const CGnomonAnnotator_Base::TIntMap& notbridgeable_gaps_len,
59               const CGnomonAnnotator_Base::TGgapInfo& ggapinfo
60               );
61 
From() const62     TSignedSeqPos From() const { return m_chunk_start; }
To() const63     TSignedSeqPos To() const { return m_chunk_stop; }
AcceptorNumber(int strand) const64     int AcceptorNumber(int strand) const { return m_anum[strand]; }
DonorNumber(int strand) const65     int DonorNumber(int strand) const { return m_dnum[strand]; }
StartNumber(int strand) const66     int StartNumber(int strand) const { return m_sttnum[strand]; }
StopNumber(int strand) const67     int StopNumber(int strand) const { return m_stpnum[strand]; }
AcceptorScore(int i,int strand) const68     double AcceptorScore(int i, int strand) const { return m_ascr[strand][i]; }
DonorScore(int i,int strand) const69     double DonorScore(int i, int strand) const { return m_dscr[strand][i]; }
StartScore(int i,int strand) const70     double StartScore(int i, int strand) const { return m_sttscr[strand][i]; }
StopScore(int i,int strand) const71     double StopScore(int i, int strand) const { return m_stpscr[strand][i]; }
Acceptor() const72     const CTerminal& Acceptor() const { return m_acceptor; }
Donor() const73     const CTerminal& Donor() const { return m_donor; }
Start() const74     const CTerminal& Start() const { return m_start; }
Stop() const75     const CTerminal& Stop() const { return m_stop; }
Alignments() const76     const TGeneModelList& Alignments() const { return m_align_list; }
SeqTInDels() const77     const TInDels& SeqTInDels() const { return m_fshifts; }
FrameShiftedSeqMap() const78     const CAlignMap& FrameShiftedSeqMap() const { return m_map; }
79     bool StopInside(int a, int b, int strand, int frame) const;
80     bool OpenCodingRegion(int a, int b, int strand, int frame) const;
81     double CodingScore(int a, int b, int strand, int frame) const;
ProtNumber(int a,int b) const82     int ProtNumber(int a, int b) const { return (m_protnum[b]-m_protnum[a]); }
MultiProtPenalty() const83     double MultiProtPenalty() const { return m_mpp; }
84     bool OpenNonCodingRegion(int a, int b, int strand) const;
85     double NonCodingScore(int a, int b, int strand) const;
86     bool OpenIntergenicRegion(int a, int b) const;
LeftAlignmentBoundary(int b) const87     int LeftAlignmentBoundary(int b) const { return m_inalign[b]; }
88     double IntergenicScore(int a, int b, int strand) const;
SeqLen() const89     int SeqLen() const { return (int)m_seq[0].size(); }
SplittedStop(int id,int ia,int strand,int ph) const90     bool SplittedStop(int id, int ia, int strand, int ph) const
91     { return (m_dsplit[strand][ph][id]&m_asplit[strand][ph][ia]) != 0; }
92     bool isStart(int i, int strand) const;
93     bool isStop(int i, int strand) const;
94     bool isReadingFrameLeftEnd(int i, int strand) const;
95     bool isReadingFrameRightEnd(int i, int strand) const;
96     bool isAG(int i, int strand) const;
97     bool isGT(int i, int strand) const;
98     bool isConsensusIntron(int i, int j, int strand) const;
99     const EResidue* SeqPtr(int i, int strand) const;
100 
101 private:
102     CSeqScores& operator=(const CSeqScores&);
103     const CTerminal &m_acceptor, &m_donor, &m_start, &m_stop;
104     const CCodingRegion &m_cdr;
105     const CNonCodingRegion &m_ncdr, &m_intrg;
106     TGeneModelList m_align_list;
107     TInDels m_fshifts;
108     CEResidueVec m_seq[2];
109     TIVec m_laststop[2][3], m_notinexon[2][3], m_notinintron[2], m_notining;
110     CAlignMap m_map;
111     TDVec m_ascr[2], m_dscr[2], m_sttscr[2], m_stpscr[2], m_ncdrscr[2], m_ingscr[2], m_cdrscr[2][3];
112     TIVec m_asplit[2][2], m_dsplit[2][2];
113     TIVec m_inalign;
114     TIVec m_protnum;
115     int m_anum[2], m_dnum[2], m_sttnum[2], m_stpnum[2];
116     TSignedSeqPos m_chunk_start, m_chunk_stop;
117     double m_mpp;
118     CResidueVec ConstructSequenceAndMaps(const TGeneModelList& aligns, const CResidueVec& original_sequence);
119 };
120 
121 END_SCOPE(gnomon)
122 END_NCBI_SCOPE
123 
124 #endif  // ALGO_GNOMON___SCORE__HPP
125