1 #ifndef ALGO_GNOMON___HMM_INLINES__HPP
2 #define ALGO_GNOMON___HMM_INLINES__HPP
3
4 /* $Id: hmm_inlines.hpp 119814 2008-02-14 16:37:06Z chetvern $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Authors: Alexandre Souvorov
30 *
31 * File Description:
32 *
33 */
34
35 #include <corelib/ncbistd.hpp>
36 #include <algo/gnomon/gnomon_exception.hpp>
37 #include "score.hpp"
38
39 BEGIN_NCBI_SCOPE
40 BEGIN_SCOPE(gnomon)
41
42 USING_SCOPE(objects);
43
InitScore(const CMarkov_chain_params & from)44 template<int order> void CMarkovChain<order>::InitScore(const CMarkov_chain_params& from)
45 {
46 Init(from);
47 toScore();
48 }
49
Init(const CMarkov_chain_params & from)50 template<int order> void CMarkovChain<order>::Init(const CMarkov_chain_params& from)
51 {
52 if (from.GetOrder() != order)
53 CInputModel::Error("Wrong Markov Chain order");
54 CMarkov_chain_params::TProbabilities::const_iterator i = from.GetProbabilities().begin();
55 m_next[enA].Init((*i++)->GetPrev_order());
56 m_next[enC].Init((*i++)->GetPrev_order());
57 m_next[enG].Init((*i++)->GetPrev_order());
58 m_next[enT].Init((*i++)->GetPrev_order());
59 if (i != from.GetProbabilities().end())
60 CInputModel::Error("Too many values in Markov Chain");
61 m_next[enN].Average(m_next[enA],m_next[enC],m_next[enG],m_next[enT]);
62 }
63
Average(Type & mc0,Type & mc1,Type & mc2,Type & mc3)64 template<int order> void CMarkovChain<order>::Average(Type& mc0, Type& mc1, Type& mc2, Type& mc3)
65 {
66 m_next[enA].Average(mc0.m_next[enA],mc1.m_next[enA],mc2.m_next[enA],mc3.m_next[enA]);
67 m_next[enC].Average(mc0.m_next[enC],mc1.m_next[enC],mc2.m_next[enC],mc3.m_next[enC]);
68 m_next[enG].Average(mc0.m_next[enG],mc1.m_next[enG],mc2.m_next[enG],mc3.m_next[enG]);
69 m_next[enT].Average(mc0.m_next[enT],mc1.m_next[enT],mc2.m_next[enT],mc3.m_next[enT]);
70 m_next[enN].Average(m_next[enA],m_next[enC],m_next[enG],m_next[enT]);
71 }
72
toScore()73 template<int order> void CMarkovChain<order>::toScore()
74 {
75 m_next[enA].toScore();
76 m_next[enC].toScore();
77 m_next[enG].toScore();
78 m_next[enT].toScore();
79 m_next[enN].toScore();
80 }
81
Score(const EResidue * seq) const82 template<int order> double CMarkovChainArray<order>::Score(const EResidue* seq) const
83 {
84 double score = 0;
85 for(int i = 0; i < m_length; ++i)
86 {
87 double s = m_mc[i].Score(seq+i);
88 if(s == BadScore()) return BadScore();
89 score += s;
90 }
91 return score;
92 }
93
InitScore(int l,const CMarkov_chain_array & from)94 template<int order> void CMarkovChainArray<order>::InitScore(int l, const CMarkov_chain_array& from)
95 {
96 m_length = l;
97 m_mc.resize(m_length);
98 CMarkov_chain_array::TMatrix::const_iterator mc = from.GetMatrix().begin();
99 for(int i = 0; i < m_length; ++i) m_mc[i].InitScore(**mc++);
100 if (mc != from.GetMatrix().end())
101 CInputModel::Error("Too many elements in Markov Chain array");
102 }
103
104 template<int order>
Score(const CEResidueVec & seq,int i) const105 double CWAM_Donor<order>::Score(const CEResidueVec& seq, int i) const
106 {
107 int first = i-m_left+1;
108 int last = i+m_right;
109 if(first-order < 0 || last >= (int)seq.size()) return BadScore(); // out of range
110 if(seq[i+1] != enG || (seq[i+2] != enT && seq[i+2] != enC)) return BadScore(); // no GT/GC
111 // if(seq[i+1] != enG || seq[i+2] != enT) return BadScore(); // no GT
112
113 return m_matrix.Score(&seq[first]);
114 }
115
116 template<int order>
CWAM_Donor(const CGnomon_param::C_Param & from)117 CWAM_Donor<order>::CWAM_Donor(const CGnomon_param::C_Param& from)
118 {
119 m_inexon = from.GetDonor().GetIn_exon();
120 m_inintron = from.GetDonor().GetIn_intron();
121
122 m_left = m_inexon;
123 m_right = m_inintron;
124
125 m_matrix.InitScore(m_inexon+m_inintron,from.GetDonor());
126 }
127
128
129 template<int order>
Score(const CEResidueVec & seq,int i) const130 double CWAM_Acceptor<order>::Score(const CEResidueVec& seq, int i) const
131 {
132 int first = i-m_left+1;
133 int last = i+m_right;
134 if(first-order < 0 || last >= (int)seq.size()) return BadScore(); // out of range
135 if(seq[i-1] != enA || seq[i] != enG) return BadScore(); // no AG
136
137 return m_matrix.Score(&seq[first]);
138 }
139
140 template<int order>
CWAM_Acceptor(const CGnomon_param::C_Param & from)141 CWAM_Acceptor<order>::CWAM_Acceptor(const CGnomon_param::C_Param& from)
142 {
143 m_inexon = from.GetAcceptor().GetIn_exon();
144 m_inintron = from.GetAcceptor().GetIn_intron();
145
146 m_left = m_inintron;
147 m_right = m_inexon;
148
149 m_matrix.InitScore(m_inexon+m_inintron,from.GetAcceptor());
150 }
151
152
153 template<int order>
CMC3_CodingRegion(const CGnomon_param::C_Param & from)154 CMC3_CodingRegion<order>::CMC3_CodingRegion(const CGnomon_param::C_Param& from)
155 {
156 int i=0;
157 ITERATE(CGnomon_param::C_Param::TCoding_region, p, from.GetCoding_region()) {
158 if (i < 3)
159 m_matrix[i++].InitScore(**p);
160 else
161 Error(class_id());
162 }
163 }
164
165 template<int order>
Score(const CEResidueVec & seq,int i,int codonshift) const166 double CMC3_CodingRegion<order>::Score(const CEResidueVec& seq, int i, int codonshift) const
167 {
168 if(i-order < 0) return BadScore(); // out of range
169 // else if(seq[i] == enN) return -2.; // discourage making exons of Ns
170 else return m_matrix[codonshift].Score(&seq[i]);
171 }
172
173 template<int order>
CMC_NonCodingRegion(const CGnomon_param::C_Param & from)174 CMC_NonCodingRegion<order>::CMC_NonCodingRegion(const CGnomon_param::C_Param& from)
175 {
176 m_matrix.InitScore(from.GetNon_coding_region());
177 }
178
179 template<int order>
Score(const CEResidueVec & seq,int i) const180 double CMC_NonCodingRegion<order>::Score(const CEResidueVec& seq, int i) const
181 {
182 if(i-order < 0) return BadScore(); // out of range
183 return m_matrix.Score(&seq[i]);
184 }
185
EvaluateInitialScore(State & r)186 template<class State> void EvaluateInitialScore(State& r)
187 {
188 int len = r.Stop()-r.Start()+1;
189 if(len >= r.MaxLen() || r.StopInside()) return; // >= makes sence here
190
191 int minlen = 1;
192 /*
193 if(!r.NoRightEnd())
194 {
195 if(r.isPlus()) minlen += r.TerminalPtr()->Left();
196 else minlen += r.TerminalPtr()->Right();
197 }
198 */
199 if(len < minlen) return; // states can go beyon the start
200 // so we shouldn't enforce MinLen
201
202 double score;
203 if(r.NoRightEnd())
204 {
205 score = r.ThroughLengthScore();
206 }
207 else
208 {
209 score = r.InitialLengthScore();
210 }
211 if(score == BadScore()) return;
212
213 double scr;
214 scr = r.RgnScore();
215 if(scr == BadScore()) return;
216 score += scr;
217
218 if(!r.NoRightEnd())
219 {
220 scr = r.TermScore();
221 if(scr == BadScore()) return;
222 score += scr;
223 }
224
225 if(r.OpenRgn()) r.UpdateScore(score);
226 }
227
228 END_SCOPE(gnomon)
229 END_NCBI_SCOPE
230
231 #endif // ALGO_GNOMON___HMM_INLINES__HPP
232