1 /*  $Id: hmm.cpp 620759 2020-11-30 16:00:57Z souvorov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software / database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software / database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Alexandre Souvorov
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <algo/gnomon/gnomon_exception.hpp>
34 #include <algo/gnomon/gnomon.hpp>
35 #include "hmm.hpp"
36 #include "hmm_inlines.hpp"
37 #include <serial/serial.hpp>
38 #include <serial/objistr.hpp>
39 
40 BEGIN_NCBI_SCOPE
41 BEGIN_SCOPE(gnomon)
42 
43 USING_SCOPE(objects);
44 
45 const double kLnHalf = log(0.5);
46 const double kLnThree = log(3.);
47 
ClosingScore(int l) const48 double CLorentz::ClosingScore(int l) const
49 {
50     if(l >= MaxLen()) return BadScore();
51     int i = (l-1)/m_step;
52     int delx = min((i+1)*m_step,MaxLen())-l;
53     double dely = (i == 0 ? 1 : m_clscore[i-1])-m_clscore[i];
54     return log(dely/m_step*delx+m_clscore[i]);
55 }
56 
CHMM_State(EStrand strn,int point,const CSeqScores & seqscr)57 CHMM_State::CHMM_State(EStrand strn, int point, const CSeqScores& seqscr)
58     : m_stop(point), m_strand(strn),
59       m_score(BadScore()), m_leftstate(0), m_terminal(0),
60       m_seqscr(&seqscr)
61 {
62 }
63 
64 /*
65 int CHMM_State::MinLen() const
66 {
67     int minlen = 1;
68 
69     if(!NoLeftEnd())
70     {
71         if(isPlus()) minlen += m_leftstate->m_terminal->Right();
72         else minlen += m_leftstate->m_terminal->Left();
73     }
74 
75     if(!NoRightEnd())
76     {
77         if(isPlus()) minlen += m_terminal->Left();
78         else  minlen += m_terminal->Right();
79     }
80 
81     return minlen;
82 }
83 */
84 
RegionStart() const85 int CHMM_State::RegionStart() const
86 {
87     if(NoLeftEnd())  return 0;
88     else
89     {
90         int a = m_leftstate->m_stop+1;
91         if(isPlus()) a += m_leftstate->m_terminal->Right();
92         else a += m_leftstate->m_terminal->Left();
93         return min(a,m_seqscr->SeqLen()-1);
94     }
95 }
96 
RegionStop() const97 int CHMM_State::RegionStop() const
98 {
99     if(NoRightEnd()) return m_seqscr->SeqLen()-1;
100     else
101     {
102         int b = m_stop;
103         if(isPlus()) b -= m_terminal->Left();
104         else  b -= m_terminal->Right();
105         return max(0,b);
106     }
107 }
108 
CExon(EStrand strn,int point,int ph,const CSeqScores & seqscr,const CExonParameters & exon_params)109 CExon::CExon(EStrand strn, int point, int ph, const CSeqScores& seqscr, const CExonParameters& exon_params)
110     : CHMM_State(strn,point,seqscr), m_phase(ph),
111       m_prevexon(0), m_mscore(BadScore()),
112       m_param(&exon_params)
113 {
114     if(!m_param || !m_param->m_initialised)
115         CInputModel::Error("CExon is not initialised\n");
116 }
117 
StopInside() const118 bool CExon::StopInside() const
119 {
120     int frame;
121     if(isPlus())
122     {
123         frame = (Phase()-Stop())%3;    // Stop()-Phase() is left codon end
124         if(frame < 0) frame += 3;
125     }
126     else
127     {
128         frame = (Phase()+Stop())%3;    // Stop()+Phase() is right codon end
129     }
130 
131     return m_seqscr->StopInside(Start(),Stop(),Strand(),frame);
132 }
133 
OpenRgn() const134 bool CExon::OpenRgn() const
135 {
136     int frame;
137     if(isPlus())
138     {
139         frame = (Phase()-Stop())%3;    // Stop()-Phase() is left codon end
140         if(frame < 0) frame += 3;
141     }
142     else
143     {
144         frame = (Phase()+Stop())%3;    // Stop()+Phase() is right codon end
145     }
146 
147     return m_seqscr->OpenCodingRegion(Start(),Stop(),Strand(),frame);
148 }
149 
RgnScore() const150 double CExon::RgnScore() const
151 {
152     int frame;
153     if(isPlus())
154     {
155         frame = (Phase()-Stop())%3;
156         if(frame < 0) frame += 3;
157     }
158     else
159     {
160         frame = (Phase()+Stop())%3;
161     }
162 
163     double score = m_seqscr->CodingScore(RegionStart(),RegionStop(),Strand(),frame);
164 
165     return score;
166 }
167 
UpdatePrevExon(const CExon & e)168 void CExon::UpdatePrevExon(const CExon& e)
169 {
170     m_mscore = max(e.Score(),e.MScore());
171     m_prevexon = &e;
172     while(m_prevexon != 0 && m_prevexon->Score() <= Score()) m_prevexon = m_prevexon->m_prevexon;
173 }
174 
CSingleExon(EStrand strn,int point,const CSeqScores & seqscr,const CExonParameters & exon_params)175 CSingleExon::CSingleExon(EStrand strn, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
176     : CExon(strn,point,2,seqscr,exon_params)
177 {
178     m_terminal = isPlus() ? &m_seqscr->Stop() : &m_seqscr->Start();
179     if(isMinus()) m_phase = 0;
180 
181     EvaluateInitialScore(*this);
182 }
183 
LengthScore() const184 double CSingleExon::LengthScore() const
185 {
186     return m_param->m_singlelen.Score(Stop()-Start()+1)+kLnThree;
187 }
188 
TermScore() const189 double CSingleExon::TermScore() const
190 {
191     if(isPlus()) return m_seqscr->StopScore(Stop(),Strand());
192     else return m_seqscr->StartScore(Stop(),Strand());
193 }
194 
BranchScore(const CIntergenic &) const195 double CSingleExon::BranchScore(const CIntergenic&) const
196 {
197     if(isPlus() || (Stop()-Start())%3 == 2) return kLnHalf;
198     else return BadScore();
199 }
200 
CFirstExon(EStrand strn,int ph,int point,const CSeqScores & seqscr,const CExonParameters & exon_params)201 CFirstExon::CFirstExon(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
202     : CExon(strn,point,ph,seqscr,exon_params)
203 {
204     if(isPlus())
205     {
206         m_terminal = &m_seqscr->Donor();
207     }
208     else
209     {
210         m_phase = 0;
211         m_terminal = &m_seqscr->Start();
212     }
213 
214     EvaluateInitialScore(*this);
215 }
216 
LengthScore() const217 double CFirstExon::LengthScore() const
218 {
219     int last = Stop()-Start();
220     return m_param->m_firstlen.Score(last+1)+kLnThree+m_param->m_firstphase[last%3];
221 }
222 
TermScore() const223 double CFirstExon::TermScore() const
224 {
225     if(isPlus()) return m_seqscr->DonorScore(Stop(),Strand());
226     else return m_seqscr->StartScore(Stop(),Strand());
227 }
228 
BranchScore(const CIntron & next) const229 double CFirstExon::BranchScore(const CIntron& next) const
230 {
231     if(Strand() != next.Strand()) return BadScore();
232 
233     int ph = isPlus() ? Phase() : Phase()+Stop()-Start();
234 
235     if((ph+1)%3 == next.Phase()) return 0;
236     else return BadScore();
237 }
238 
CInternalExon(EStrand strn,int ph,int point,const CSeqScores & seqscr,const CExonParameters & exon_params)239 CInternalExon::CInternalExon(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
240     : CExon(strn,point,ph,seqscr,exon_params)
241 {
242     m_terminal = isPlus() ? &m_seqscr->Donor() : &m_seqscr->Acceptor();
243 
244     EvaluateInitialScore(*this);
245 }
246 
LengthScore() const247 double CInternalExon::LengthScore() const
248 {
249     int ph0,ph1;
250     int last = Stop()-Start();
251     if(isPlus())
252     {
253         ph1 = Phase();
254         ph0 = (ph1-last)%3;
255         if(ph0 < 0) ph0 += 3;
256     }
257     else
258     {
259         ph0 = Phase();
260         ph1 = (ph0+last)%3;
261     }
262 
263     return m_param->m_internallen.Score(last+1)+kLnThree+m_param->m_internalphase[ph0][ph1];
264 }
265 
TermScore() const266 double CInternalExon::TermScore() const
267 {
268     if(isPlus()) return m_seqscr->DonorScore(Stop(),Strand());
269     else return m_seqscr->AcceptorScore(Stop(),Strand());
270 }
271 
CLastExon(EStrand strn,int ph,int point,const CSeqScores & seqscr,const CExonParameters & exon_params)272 CLastExon::CLastExon(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
273     : CExon(strn,point,ph,seqscr,exon_params)
274 {
275     if(isPlus())
276     {
277         m_phase = 2;
278         m_terminal = &m_seqscr->Stop();
279     }
280     else
281     {
282         m_terminal = &m_seqscr->Acceptor();
283     }
284 
285     EvaluateInitialScore(*this);
286 }
287 
LengthScore() const288 double CLastExon::LengthScore() const
289 {
290     int last = Stop()-Start();
291     return m_param->m_lastlen.Score(last+1)+kLnThree;
292 }
293 
TermScore() const294 double CLastExon::TermScore() const
295 {
296     if(isPlus()) return m_seqscr->StopScore(Stop(),Strand());
297     else return m_seqscr->AcceptorScore(Stop(),Strand());
298 }
299 
BranchScore(const CIntergenic &) const300 double CLastExon::BranchScore(const CIntergenic&) const
301 {
302     if(isPlus() || (Phase()+Stop()-Start())%3 == 2) return kLnHalf;
303     else return BadScore();
304 }
305 
CIntron(EStrand strn,int ph,int point,const CSeqScores & seqscr,const CIntronParameters & intron_params)306 CIntron::CIntron(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CIntronParameters& intron_params)
307     : CHMM_State(strn,point,seqscr), m_phase(ph),
308       m_param(&intron_params)
309 {
310     if(!m_param || !m_param->m_initialised) CInputModel::Error("Intron is not initialised\n");
311     m_terminal = isPlus() ? &m_seqscr->Acceptor() : &m_seqscr->Donor();
312 
313     EvaluateInitialScore(*this);
314 }
315 
ClosingLengthScore() const316 double CIntron::ClosingLengthScore() const
317 {
318     return m_param->m_intronlen.ClosingScore(Stop()-Start()+1);
319 }
320 
BranchScore(const CLastExon & next) const321 double CIntron::BranchScore(const CLastExon& next) const
322 {
323     if(Strand() != next.Strand()) return BadScore();
324 
325     if(isPlus())
326     {
327         int shift = next.Stop()-next.Start();
328         if((Phase()+shift)%3 == next.Phase()) return m_param->m_lnTerminal;
329     }
330     else if(Phase() == next.Phase()) return m_param->m_lnTerminal;
331 
332     return BadScore();
333 }
334 
RgnScore() const335 double CIntron::RgnScore() const {
336     return 0;    // Intron scores are substructed from all others
337 }
338 
339 
CIntergenic(EStrand strn,int point,const CSeqScores & seqscr,const CIntergenicParameters & intergenic_params)340 CIntergenic::CIntergenic(EStrand strn, int point, const CSeqScores& seqscr, const CIntergenicParameters& intergenic_params)
341     : CHMM_State(strn,point,seqscr),
342       m_param(&intergenic_params)
343 {
344     if(!m_param || !m_param->m_initialised) CInputModel::Error("Intergenic is not initialised\n");
345     m_terminal = isPlus() ? &m_seqscr->Start() : &m_seqscr->Stop();
346 
347     EvaluateInitialScore(*this);
348 }
349 
OpenRgn() const350 bool CIntergenic::OpenRgn() const
351 {
352     return m_seqscr->OpenIntergenicRegion(Start(),Stop());
353 }
354 
RgnScore() const355 double CIntergenic::RgnScore() const
356 {
357     return m_seqscr->IntergenicScore(RegionStart(),RegionStop(),Strand());
358 }
359 
TermScore() const360 double CIntergenic::TermScore() const
361 {
362     if(isPlus()) return m_seqscr->StartScore(Stop(),Strand());
363     else return m_seqscr->StopScore(Stop(),Strand());
364 }
365 
BranchScore(const CFirstExon & next) const366 double CIntergenic::BranchScore(const CFirstExon& next) const
367 {
368     if(&next == m_leftstate)
369     {
370         if(next.isMinus()) return m_param->m_lnMulti;
371         else return BadScore();
372     }
373     else if(isPlus() && next.isPlus())
374     {
375         if((next.Stop()-next.Start())%3 == next.Phase()) return m_param->m_lnMulti;
376         else return BadScore();
377     }
378     else return BadScore();
379 }
380 
BranchScore(const CSingleExon & next) const381 double CIntergenic::BranchScore(const CSingleExon& next) const
382 {
383     if(&next == m_leftstate)
384     {
385         if(next.isMinus()) return m_param->m_lnSingle;
386         else return BadScore();
387     }
388     else if(isPlus() && next.isPlus() &&
389               (next.Stop()-next.Start())%3 == 2) return m_param->m_lnSingle;
390     else return BadScore();
391 }
392 
InitScore(const CMarkov_chain_params & from)393 void CMarkovChain<0>::InitScore(const CMarkov_chain_params& from)
394 {
395     Init(from);
396     toScore();
397 }
398 
Init(const CMarkov_chain_params & from)399 void CMarkovChain<0>::Init(const CMarkov_chain_params& from)
400 {
401     if (from.GetOrder() != 0)
402         CInputModel::Error("Wrong Markov Chain order");
403     CMarkov_chain_params::TProbabilities::const_iterator i = from.GetProbabilities().begin();
404     m_score[enA] = (*i++)->GetValue();
405     m_score[enC] = (*i++)->GetValue();
406     m_score[enG] = (*i++)->GetValue();
407     m_score[enT] = (*i++)->GetValue();
408     if (i != from.GetProbabilities().end())
409         CInputModel::Error("Too many values in Markov Chain");
410     m_score[enN] = 0.25*(m_score[enA]+m_score[enC]+m_score[enG]+m_score[enT]);
411 }
412 
Average(Type & mc0,Type & mc1,Type & mc2,Type & mc3)413 void CMarkovChain<0>::Average(Type& mc0, Type& mc1, Type& mc2, Type& mc3)
414 {
415     m_score[enA] = 0.25*(mc0.m_score[enA]+mc1.m_score[enA]+mc2.m_score[enA]+mc3.m_score[enA]);
416     m_score[enC] = 0.25*(mc0.m_score[enC]+mc1.m_score[enC]+mc2.m_score[enC]+mc3.m_score[enC]);
417     m_score[enG] = 0.25*(mc0.m_score[enG]+mc1.m_score[enG]+mc2.m_score[enG]+mc3.m_score[enG]);
418     m_score[enT] = 0.25*(mc0.m_score[enT]+mc1.m_score[enT]+mc2.m_score[enT]+mc3.m_score[enT]);
419     m_score[enN] = 0.25*(m_score[enA]+m_score[enC]+m_score[enG]+m_score[enT]);
420 }
421 
toScore()422 void CMarkovChain<0>::toScore()
423 {
424     m_score[enA] = (m_score[enA] <= 0) ? BadScore() : log(4*m_score[enA]);
425     m_score[enC] = (m_score[enC] <= 0) ? BadScore() : log(4*m_score[enC]);
426     m_score[enG] = (m_score[enG] <= 0) ? BadScore() : log(4*m_score[enG]);
427     m_score[enT] = (m_score[enT] <= 0) ? BadScore() : log(4*m_score[enT]);
428     m_score[enN] = (m_score[enN] <= 0) ? BadScore() : log(4*m_score[enN]);
429 }
430 
~CInputModel()431 CInputModel::~CInputModel() {}
432 
CWMM_Start(const CGnomon_param::C_Param & from)433 CWMM_Start::CWMM_Start(const CGnomon_param::C_Param& from)
434 {
435     m_inexon = from.GetStart().GetIn_exon();
436     m_inintron = from.GetStart().GetIn_intron();
437 
438     m_left = m_inintron;
439     m_right = m_inexon;
440 
441     m_matrix.InitScore(m_inexon+m_inintron,from.GetStart());
442 }
443 
Score(const CEResidueVec & seq,int i) const444 double CWMM_Start::Score(const CEResidueVec& seq, int i) const
445 {
446     int first = i-m_left+1;
447     int last = i+m_right;
448     if(first < 0 || last >= (int)seq.size()) return BadScore();  // out of range
449     if(seq[i-2] != enA || seq[i-1] != enT || seq[i] != enG) return BadScore(); // no ATG
450 
451     return m_matrix.Score(&seq[first]);
452 }
453 
CWAM_Stop(const CGnomon_param::C_Param & from)454 CWAM_Stop::CWAM_Stop(const CGnomon_param::C_Param& from)
455 {
456     m_inexon = from.GetStop().GetIn_exon();
457     m_inintron = from.GetStop().GetIn_intron();
458 
459     m_left = m_inexon;
460     m_right = m_inintron;
461 
462     m_matrix.InitScore(m_inexon+m_inintron,from.GetStop());
463 }
464 
Score(const CEResidueVec & seq,int i) const465 double CWAM_Stop::Score(const CEResidueVec& seq, int i) const
466 {
467     int first = i-m_left+1;
468     int last = i+m_right;
469     if(first-1 < 0 || last >= (int)seq.size()) return BadScore();  // out of range
470     if((seq[i+1] != enT || seq[i+2] != enA || seq[i+3] != enA) &&
471         (seq[i+1] != enT || seq[i+2] != enA || seq[i+3] != enG) &&
472         (seq[i+1] != enT || seq[i+2] != enG || seq[i+3] != enA)) return BadScore(); // no stop-codon
473 
474     return m_matrix.Score(&seq[first]);
475 }
476 
477 
Init(const CLength_distribution_params & from)478 void CLorentz::Init(const CLength_distribution_params& from)
479 {
480     m_A = from.GetA();
481     m_L = from.GetL();
482     m_minl = from.GetRange().GetMin();
483     m_maxl = from.GetRange().GetMax();
484     m_step = from.GetStep();
485 
486     int num = (m_maxl-1)/m_step+1;
487     m_maxl = (num-1)*m_step+1;
488 
489     try
490     {
491         m_score.resize(num,0);
492         m_clscore.resize(num,0);
493     }
494     catch(bad_alloc)
495     {
496         NCBI_THROW(CGnomonException, eMemoryLimit, "Not enough memory for CLorentz");
497     }
498 
499     int i = 0;
500     ITERATE(CLength_distribution_params::TP, s, from.GetP()) {
501         m_score[i] = *s;
502         if((i+1)*m_step < m_minl)
503             m_score[i] = 0;
504         ++i;
505     }
506     while(i < num)
507     {
508         m_score[i] = m_A/(m_L*m_L+pow((i+0.5)*m_step,2));
509         ++i;
510     }
511 
512     double sum = 0;
513     m_avlen = 0;
514     for(int l = m_minl; l <= m_maxl; ++l)
515     {
516         double scr = m_score[(l-1)/m_step];
517         sum += scr;
518         m_avlen += l*scr;
519     }
520 
521     m_avlen /= sum;
522     for(int i = 0; i < num; ++i) m_score[i] /= sum;
523 
524     m_clscore[num-1] = 0;
525     for(int i = num-2; i >= 0; --i)
526         m_clscore[i] = m_clscore[i+1]+m_score[i+1]*m_step;
527 
528     for(int i = 0; i < num; ++i)
529         m_score[i] = (m_score[i] == 0) ? -25 : log(m_score[i]);
530 }
531 
Through(int seqlen) const532 double CLorentz::Through(int seqlen) const
533 {
534     if(seqlen >= MaxLen()) return BadScore();
535 
536     double through = 0;
537 
538     if(seqlen >= MinLen()) {
539         int ifirst = (MinLen()-1)/m_step;
540         if(m_score[ifirst] != BadScore()) {
541             int w = (ifirst+1)*m_step-MinLen()+1;
542             int l2 = (MinLen()+(ifirst+1)*m_step);
543             through = (l2-2*seqlen)*w/2*exp(m_score[ifirst]);
544         }
545 
546         int ilast = (seqlen-1)/m_step;
547         for(int i = 0; i < ilast; ++i) {
548             if(m_score[i] == BadScore()) continue;
549             int l2 = (i+1)*m_step+i*m_step+1;
550             through += (l2-2*seqlen)*m_step/2*exp(m_score[i]);
551         }
552 
553         if(m_score[ilast] != BadScore()) {
554             int w = seqlen-ilast*m_step;
555             int l2 = ilast*m_step+1+seqlen;
556             through += (l2-2*seqlen)*w/2*exp(m_score[ilast]);
557         }
558     }
559 
560     through = (AvLen()-seqlen-through)/AvLen();
561     through = (through <= 0) ? BadScore() : log(through);
562     return through;
563 }
564 
CExonParameters(const CGnomon_param::C_Param & from)565 CExonParameters::CExonParameters(const CGnomon_param::C_Param& from)
566 {
567     string label = class_id();
568 
569     {
570         int i = 0;
571         ITERATE(CExon_params::TFirst_exon_phase_probabilities, p, from.GetExon().GetFirst_exon_phase_probabilities()) {
572             if (i<3)
573                 m_firstphase[i] = log(*p);
574             else
575                 Error(label+" Too long First_exon_phase_probabilities");
576             ++i;
577         }
578     }
579 
580     {
581         CExon_params::TInternal_exon_phase_probabilities::const_iterator p = from.GetExon().GetInternal_exon_phase_probabilities().begin();
582         for(int i = 0; i < 3; ++i) {
583             for(int j = 0; j < 3; ++j)  {
584                 m_internalphase[i][j] = log(*p++);
585             }
586         }
587         if (p != from.GetExon().GetInternal_exon_phase_probabilities().end())
588             Error(label+" Too long Internal_exon_phase_probabilities");
589     }
590 
591     m_firstlen.Init(from.GetExon().GetFirst_exon_length());
592     m_internallen.Init(from.GetExon().GetInternal_exon_length());
593     m_lastlen.Init(from.GetExon().GetLast_exon_length());
594     m_singlelen.Init(from.GetExon().GetSingle_exon_length());
595 
596     m_initialised = true;
597 }
598 
599 
CIntronParameters(const CGnomon_param::C_Param & from)600 CIntronParameters::CIntronParameters(const CGnomon_param::C_Param& from)
601 {
602     m_initp = from.GetIntron().GetInitp();
603     m_initp = m_initp/2;      // two strands
604 
605     int i = 0;
606     ITERATE(CIntron_params::TPhase_probabilities, p, from.GetIntron().GetPhase_probabilities()) {
607         if (i<3)
608             m_phasep[i] = *p;
609         else
610             Error(class_id()+" Too long Phase_probabilities");
611         ++i;
612     }
613 
614     double toterm =  from.GetIntron().GetTo_term();
615     m_lnTerminal = log(toterm);
616     m_lnInternal = log(1-toterm);
617 
618     m_intronlen.Init(from.GetIntron().GetLength());
619 }
620 
SetSeqLen(int seqlen) const621 void CIntronParameters::SetSeqLen(int seqlen) const
622 {
623     double lnthrough = m_intronlen.Through(seqlen);
624     for(int i = 0; i < 3; ++i)
625     {
626         m_lnDen[i] = log(m_initp*m_phasep[i]);
627         m_lnThrough[i] = (lnthrough == BadScore()) ? BadScore() : m_lnDen[i]+lnthrough;
628     }
629 
630     m_initialised = true;
631 }
632 
633 
CIntergenicParameters(const CGnomon_param::C_Param & from)634 CIntergenicParameters::CIntergenicParameters(const CGnomon_param::C_Param& from)
635 {
636     m_initp = from.GetIntergenic().GetInitp();
637     m_initp = m_initp/2;      // two strands
638 
639     double tosingle =  from.GetIntergenic().GetTo_single();
640 
641     m_lnSingle = log(tosingle);
642     m_lnMulti = log(1-tosingle);
643 
644     m_intergeniclen.Init(from.GetIntergenic().GetLength());
645 }
646 
SetSeqLen(int seqlen) const647 void CIntergenicParameters::SetSeqLen(int seqlen) const
648 {
649     double lnthrough = m_intergeniclen.Through(seqlen);
650     m_lnDen = log(m_initp);
651     m_lnThrough = (lnthrough == BadScore()) ? BadScore() : m_lnDen+lnthrough;
652 
653     m_initialised = true;
654 }
655 
~SDetails()656 CHMMParameters::SDetails::~SDetails()
657 {
658     DeleteAllCreatedModels();
659 }
DeleteAllCreatedModels()660 void CHMMParameters::SDetails::DeleteAllCreatedModels()
661 {
662     ITERATE(vector<CInputModel*>, i, all_created_models)
663         delete *i;
664     all_created_models.clear();
665     params.clear();
666 }
667 
GetCGList(const string & type)668 CHMMParameters::SDetails::TCGContentList& CHMMParameters::SDetails::GetCGList(const string& type)
669 {
670     TParamMap::iterator i = params.insert(make_pair(type,TCGContentList())).first;
671     if (i->second.empty())
672         i->second.push_back(make_pair<int,CInputModel*>(101,NULL));
673 
674     return i->second;
675 }
676 
StoreParam(const string & type,CInputModel * input_model,int low,int high)677 void CHMMParameters::SDetails::StoreParam( const string& type, CInputModel* input_model, int low, int high )
678 {
679     TCGContentList& list = GetCGList(type);
680 
681     int lowest = 0;
682     TCGContentList::iterator i = list.begin();
683     while( i->first <= low ) {
684         lowest = i->first;
685         ++i;
686     }
687 
688     if (lowest < low) {
689         i = list.insert(i, *i );
690         i->first = low;
691         ++i;
692     }
693 
694     if (high < i->first) {
695         i = list.insert(i, *i );
696         i->first = high;
697     } else if (high > i->first) {
698         CInputModel::Error(type);
699     }
700 
701     i->second = input_model;
702 }
703 
CHMMParameters(const CGnomon_params & hmm_params_asn)704 CHMMParameters::CHMMParameters(const CGnomon_params& hmm_params_asn) : m_details( new SDetails(hmm_params_asn) )
705 {
706 }
707 
CHMMParameters(CNcbiIstream & hmm_params_istr,ESerialDataFormat format)708 CHMMParameters::CHMMParameters(CNcbiIstream& hmm_params_istr, ESerialDataFormat format)
709 {
710     auto_ptr<CObjectIStream> inp(CObjectIStream::Open(format,hmm_params_istr));
711     CRef<CGnomon_params> params_asn(new CGnomon_params);
712     *inp >> *params_asn;
713     m_details.Reset( new SDetails(*params_asn) );
714 }
715 
~CHMMParameters()716 CHMMParameters::~CHMMParameters()
717 {
718 }
719 
720 template <class CParam>
ReadParameters(const CGnomon_params & hmm_params_asn,CGnomon_param::C_Param::E_Choice choice)721 void CHMMParameters::SDetails::ReadParameters(const CGnomon_params& hmm_params_asn, CGnomon_param::C_Param::E_Choice choice)
722 {
723     ITERATE(CGnomon_params::Tdata, p, hmm_params_asn.Get()) {
724         if ((*p)->GetParam().Which() != choice)
725             continue;
726 
727         int low = (*p)->GetGc_content_range().GetFrom();
728         int high = (*p)->GetGc_content_range().GetTo();
729         if (!( 0<=low && low < high && high <= 100) )
730             CInputModel::Error(CParam::class_id());
731 
732         CInputModel* input_model( new CParam((*p)->GetParam()) );
733         if (input_model==NULL)
734             continue;
735 
736         all_created_models.push_back(input_model);
737 
738         StoreParam(CParam::class_id(), input_model, low, high);
739     }
740 }
741 
SDetails(const CGnomon_params & hmm_params_asn)742 CHMMParameters::SDetails::SDetails(const CGnomon_params& hmm_params_asn)
743 {
744     DeleteAllCreatedModels();
745 
746     ReadParameters<CWAM_Donor<2> >(hmm_params_asn, CGnomon_param::C_Param::e_Donor);
747     ReadParameters<CWAM_Acceptor<2> >(hmm_params_asn, CGnomon_param::C_Param::e_Acceptor);
748     ReadParameters<CWMM_Start>(hmm_params_asn, CGnomon_param::C_Param::e_Start);
749     ReadParameters<CWAM_Stop>(hmm_params_asn, CGnomon_param::C_Param::e_Stop);
750     ReadParameters<CMC3_CodingRegion<5> >(hmm_params_asn, CGnomon_param::C_Param::e_Coding_region);
751     ReadParameters<CMC_NonCodingRegion<5> >(hmm_params_asn, CGnomon_param::C_Param::e_Non_coding_region);
752     ReadParameters<CIntronParameters>(hmm_params_asn, CGnomon_param::C_Param::e_Intron);
753     ReadParameters<CIntergenicParameters>(hmm_params_asn, CGnomon_param::C_Param::e_Intergenic);
754     ReadParameters<CExonParameters>(hmm_params_asn, CGnomon_param::C_Param::e_Exon);
755 
756 }
757 
GetParameter(const string & type,int cgcontent) const758 const CInputModel& CHMMParameters::GetParameter(const string& type, int cgcontent) const
759 {
760     return m_details->GetParameter(type, cgcontent);
761 }
762 
GetParameter(const string & type,int cgcontent) const763 const CInputModel& CHMMParameters::SDetails::GetParameter(const string& type, int cgcontent) const
764 {
765     if (cgcontent < 0)
766         cgcontent = 0;
767     else if (cgcontent >= 100)
768         cgcontent = 99;
769 
770     TParamMap::const_iterator i_param = params.find(type);
771     if (i_param == params.end())
772         CInputModel::Error( type );
773 
774     ITERATE( TCGContentList, i, i_param->second) {
775         if (cgcontent < i->first) {
776             if (i->second == NULL) {
777                 CInputModel::Error( type );
778             }
779             return *i->second;
780         }
781     }
782 
783     _ASSERT( false);
784     CInputModel::Error( type );
785     return *params.begin()->second.front().second;
786 }
787 
788 
789 END_SCOPE(gnomon)
790 END_NCBI_SCOPE
791