1 /* $Id: hmm.cpp 620759 2020-11-30 16:00:57Z souvorov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software / database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software / database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Alexandre Souvorov
27 *
28 * File Description:
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33 #include <algo/gnomon/gnomon_exception.hpp>
34 #include <algo/gnomon/gnomon.hpp>
35 #include "hmm.hpp"
36 #include "hmm_inlines.hpp"
37 #include <serial/serial.hpp>
38 #include <serial/objistr.hpp>
39
40 BEGIN_NCBI_SCOPE
41 BEGIN_SCOPE(gnomon)
42
43 USING_SCOPE(objects);
44
45 const double kLnHalf = log(0.5);
46 const double kLnThree = log(3.);
47
ClosingScore(int l) const48 double CLorentz::ClosingScore(int l) const
49 {
50 if(l >= MaxLen()) return BadScore();
51 int i = (l-1)/m_step;
52 int delx = min((i+1)*m_step,MaxLen())-l;
53 double dely = (i == 0 ? 1 : m_clscore[i-1])-m_clscore[i];
54 return log(dely/m_step*delx+m_clscore[i]);
55 }
56
CHMM_State(EStrand strn,int point,const CSeqScores & seqscr)57 CHMM_State::CHMM_State(EStrand strn, int point, const CSeqScores& seqscr)
58 : m_stop(point), m_strand(strn),
59 m_score(BadScore()), m_leftstate(0), m_terminal(0),
60 m_seqscr(&seqscr)
61 {
62 }
63
64 /*
65 int CHMM_State::MinLen() const
66 {
67 int minlen = 1;
68
69 if(!NoLeftEnd())
70 {
71 if(isPlus()) minlen += m_leftstate->m_terminal->Right();
72 else minlen += m_leftstate->m_terminal->Left();
73 }
74
75 if(!NoRightEnd())
76 {
77 if(isPlus()) minlen += m_terminal->Left();
78 else minlen += m_terminal->Right();
79 }
80
81 return minlen;
82 }
83 */
84
RegionStart() const85 int CHMM_State::RegionStart() const
86 {
87 if(NoLeftEnd()) return 0;
88 else
89 {
90 int a = m_leftstate->m_stop+1;
91 if(isPlus()) a += m_leftstate->m_terminal->Right();
92 else a += m_leftstate->m_terminal->Left();
93 return min(a,m_seqscr->SeqLen()-1);
94 }
95 }
96
RegionStop() const97 int CHMM_State::RegionStop() const
98 {
99 if(NoRightEnd()) return m_seqscr->SeqLen()-1;
100 else
101 {
102 int b = m_stop;
103 if(isPlus()) b -= m_terminal->Left();
104 else b -= m_terminal->Right();
105 return max(0,b);
106 }
107 }
108
CExon(EStrand strn,int point,int ph,const CSeqScores & seqscr,const CExonParameters & exon_params)109 CExon::CExon(EStrand strn, int point, int ph, const CSeqScores& seqscr, const CExonParameters& exon_params)
110 : CHMM_State(strn,point,seqscr), m_phase(ph),
111 m_prevexon(0), m_mscore(BadScore()),
112 m_param(&exon_params)
113 {
114 if(!m_param || !m_param->m_initialised)
115 CInputModel::Error("CExon is not initialised\n");
116 }
117
StopInside() const118 bool CExon::StopInside() const
119 {
120 int frame;
121 if(isPlus())
122 {
123 frame = (Phase()-Stop())%3; // Stop()-Phase() is left codon end
124 if(frame < 0) frame += 3;
125 }
126 else
127 {
128 frame = (Phase()+Stop())%3; // Stop()+Phase() is right codon end
129 }
130
131 return m_seqscr->StopInside(Start(),Stop(),Strand(),frame);
132 }
133
OpenRgn() const134 bool CExon::OpenRgn() const
135 {
136 int frame;
137 if(isPlus())
138 {
139 frame = (Phase()-Stop())%3; // Stop()-Phase() is left codon end
140 if(frame < 0) frame += 3;
141 }
142 else
143 {
144 frame = (Phase()+Stop())%3; // Stop()+Phase() is right codon end
145 }
146
147 return m_seqscr->OpenCodingRegion(Start(),Stop(),Strand(),frame);
148 }
149
RgnScore() const150 double CExon::RgnScore() const
151 {
152 int frame;
153 if(isPlus())
154 {
155 frame = (Phase()-Stop())%3;
156 if(frame < 0) frame += 3;
157 }
158 else
159 {
160 frame = (Phase()+Stop())%3;
161 }
162
163 double score = m_seqscr->CodingScore(RegionStart(),RegionStop(),Strand(),frame);
164
165 return score;
166 }
167
UpdatePrevExon(const CExon & e)168 void CExon::UpdatePrevExon(const CExon& e)
169 {
170 m_mscore = max(e.Score(),e.MScore());
171 m_prevexon = &e;
172 while(m_prevexon != 0 && m_prevexon->Score() <= Score()) m_prevexon = m_prevexon->m_prevexon;
173 }
174
CSingleExon(EStrand strn,int point,const CSeqScores & seqscr,const CExonParameters & exon_params)175 CSingleExon::CSingleExon(EStrand strn, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
176 : CExon(strn,point,2,seqscr,exon_params)
177 {
178 m_terminal = isPlus() ? &m_seqscr->Stop() : &m_seqscr->Start();
179 if(isMinus()) m_phase = 0;
180
181 EvaluateInitialScore(*this);
182 }
183
LengthScore() const184 double CSingleExon::LengthScore() const
185 {
186 return m_param->m_singlelen.Score(Stop()-Start()+1)+kLnThree;
187 }
188
TermScore() const189 double CSingleExon::TermScore() const
190 {
191 if(isPlus()) return m_seqscr->StopScore(Stop(),Strand());
192 else return m_seqscr->StartScore(Stop(),Strand());
193 }
194
BranchScore(const CIntergenic &) const195 double CSingleExon::BranchScore(const CIntergenic&) const
196 {
197 if(isPlus() || (Stop()-Start())%3 == 2) return kLnHalf;
198 else return BadScore();
199 }
200
CFirstExon(EStrand strn,int ph,int point,const CSeqScores & seqscr,const CExonParameters & exon_params)201 CFirstExon::CFirstExon(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
202 : CExon(strn,point,ph,seqscr,exon_params)
203 {
204 if(isPlus())
205 {
206 m_terminal = &m_seqscr->Donor();
207 }
208 else
209 {
210 m_phase = 0;
211 m_terminal = &m_seqscr->Start();
212 }
213
214 EvaluateInitialScore(*this);
215 }
216
LengthScore() const217 double CFirstExon::LengthScore() const
218 {
219 int last = Stop()-Start();
220 return m_param->m_firstlen.Score(last+1)+kLnThree+m_param->m_firstphase[last%3];
221 }
222
TermScore() const223 double CFirstExon::TermScore() const
224 {
225 if(isPlus()) return m_seqscr->DonorScore(Stop(),Strand());
226 else return m_seqscr->StartScore(Stop(),Strand());
227 }
228
BranchScore(const CIntron & next) const229 double CFirstExon::BranchScore(const CIntron& next) const
230 {
231 if(Strand() != next.Strand()) return BadScore();
232
233 int ph = isPlus() ? Phase() : Phase()+Stop()-Start();
234
235 if((ph+1)%3 == next.Phase()) return 0;
236 else return BadScore();
237 }
238
CInternalExon(EStrand strn,int ph,int point,const CSeqScores & seqscr,const CExonParameters & exon_params)239 CInternalExon::CInternalExon(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
240 : CExon(strn,point,ph,seqscr,exon_params)
241 {
242 m_terminal = isPlus() ? &m_seqscr->Donor() : &m_seqscr->Acceptor();
243
244 EvaluateInitialScore(*this);
245 }
246
LengthScore() const247 double CInternalExon::LengthScore() const
248 {
249 int ph0,ph1;
250 int last = Stop()-Start();
251 if(isPlus())
252 {
253 ph1 = Phase();
254 ph0 = (ph1-last)%3;
255 if(ph0 < 0) ph0 += 3;
256 }
257 else
258 {
259 ph0 = Phase();
260 ph1 = (ph0+last)%3;
261 }
262
263 return m_param->m_internallen.Score(last+1)+kLnThree+m_param->m_internalphase[ph0][ph1];
264 }
265
TermScore() const266 double CInternalExon::TermScore() const
267 {
268 if(isPlus()) return m_seqscr->DonorScore(Stop(),Strand());
269 else return m_seqscr->AcceptorScore(Stop(),Strand());
270 }
271
CLastExon(EStrand strn,int ph,int point,const CSeqScores & seqscr,const CExonParameters & exon_params)272 CLastExon::CLastExon(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
273 : CExon(strn,point,ph,seqscr,exon_params)
274 {
275 if(isPlus())
276 {
277 m_phase = 2;
278 m_terminal = &m_seqscr->Stop();
279 }
280 else
281 {
282 m_terminal = &m_seqscr->Acceptor();
283 }
284
285 EvaluateInitialScore(*this);
286 }
287
LengthScore() const288 double CLastExon::LengthScore() const
289 {
290 int last = Stop()-Start();
291 return m_param->m_lastlen.Score(last+1)+kLnThree;
292 }
293
TermScore() const294 double CLastExon::TermScore() const
295 {
296 if(isPlus()) return m_seqscr->StopScore(Stop(),Strand());
297 else return m_seqscr->AcceptorScore(Stop(),Strand());
298 }
299
BranchScore(const CIntergenic &) const300 double CLastExon::BranchScore(const CIntergenic&) const
301 {
302 if(isPlus() || (Phase()+Stop()-Start())%3 == 2) return kLnHalf;
303 else return BadScore();
304 }
305
CIntron(EStrand strn,int ph,int point,const CSeqScores & seqscr,const CIntronParameters & intron_params)306 CIntron::CIntron(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CIntronParameters& intron_params)
307 : CHMM_State(strn,point,seqscr), m_phase(ph),
308 m_param(&intron_params)
309 {
310 if(!m_param || !m_param->m_initialised) CInputModel::Error("Intron is not initialised\n");
311 m_terminal = isPlus() ? &m_seqscr->Acceptor() : &m_seqscr->Donor();
312
313 EvaluateInitialScore(*this);
314 }
315
ClosingLengthScore() const316 double CIntron::ClosingLengthScore() const
317 {
318 return m_param->m_intronlen.ClosingScore(Stop()-Start()+1);
319 }
320
BranchScore(const CLastExon & next) const321 double CIntron::BranchScore(const CLastExon& next) const
322 {
323 if(Strand() != next.Strand()) return BadScore();
324
325 if(isPlus())
326 {
327 int shift = next.Stop()-next.Start();
328 if((Phase()+shift)%3 == next.Phase()) return m_param->m_lnTerminal;
329 }
330 else if(Phase() == next.Phase()) return m_param->m_lnTerminal;
331
332 return BadScore();
333 }
334
RgnScore() const335 double CIntron::RgnScore() const {
336 return 0; // Intron scores are substructed from all others
337 }
338
339
CIntergenic(EStrand strn,int point,const CSeqScores & seqscr,const CIntergenicParameters & intergenic_params)340 CIntergenic::CIntergenic(EStrand strn, int point, const CSeqScores& seqscr, const CIntergenicParameters& intergenic_params)
341 : CHMM_State(strn,point,seqscr),
342 m_param(&intergenic_params)
343 {
344 if(!m_param || !m_param->m_initialised) CInputModel::Error("Intergenic is not initialised\n");
345 m_terminal = isPlus() ? &m_seqscr->Start() : &m_seqscr->Stop();
346
347 EvaluateInitialScore(*this);
348 }
349
OpenRgn() const350 bool CIntergenic::OpenRgn() const
351 {
352 return m_seqscr->OpenIntergenicRegion(Start(),Stop());
353 }
354
RgnScore() const355 double CIntergenic::RgnScore() const
356 {
357 return m_seqscr->IntergenicScore(RegionStart(),RegionStop(),Strand());
358 }
359
TermScore() const360 double CIntergenic::TermScore() const
361 {
362 if(isPlus()) return m_seqscr->StartScore(Stop(),Strand());
363 else return m_seqscr->StopScore(Stop(),Strand());
364 }
365
BranchScore(const CFirstExon & next) const366 double CIntergenic::BranchScore(const CFirstExon& next) const
367 {
368 if(&next == m_leftstate)
369 {
370 if(next.isMinus()) return m_param->m_lnMulti;
371 else return BadScore();
372 }
373 else if(isPlus() && next.isPlus())
374 {
375 if((next.Stop()-next.Start())%3 == next.Phase()) return m_param->m_lnMulti;
376 else return BadScore();
377 }
378 else return BadScore();
379 }
380
BranchScore(const CSingleExon & next) const381 double CIntergenic::BranchScore(const CSingleExon& next) const
382 {
383 if(&next == m_leftstate)
384 {
385 if(next.isMinus()) return m_param->m_lnSingle;
386 else return BadScore();
387 }
388 else if(isPlus() && next.isPlus() &&
389 (next.Stop()-next.Start())%3 == 2) return m_param->m_lnSingle;
390 else return BadScore();
391 }
392
InitScore(const CMarkov_chain_params & from)393 void CMarkovChain<0>::InitScore(const CMarkov_chain_params& from)
394 {
395 Init(from);
396 toScore();
397 }
398
Init(const CMarkov_chain_params & from)399 void CMarkovChain<0>::Init(const CMarkov_chain_params& from)
400 {
401 if (from.GetOrder() != 0)
402 CInputModel::Error("Wrong Markov Chain order");
403 CMarkov_chain_params::TProbabilities::const_iterator i = from.GetProbabilities().begin();
404 m_score[enA] = (*i++)->GetValue();
405 m_score[enC] = (*i++)->GetValue();
406 m_score[enG] = (*i++)->GetValue();
407 m_score[enT] = (*i++)->GetValue();
408 if (i != from.GetProbabilities().end())
409 CInputModel::Error("Too many values in Markov Chain");
410 m_score[enN] = 0.25*(m_score[enA]+m_score[enC]+m_score[enG]+m_score[enT]);
411 }
412
Average(Type & mc0,Type & mc1,Type & mc2,Type & mc3)413 void CMarkovChain<0>::Average(Type& mc0, Type& mc1, Type& mc2, Type& mc3)
414 {
415 m_score[enA] = 0.25*(mc0.m_score[enA]+mc1.m_score[enA]+mc2.m_score[enA]+mc3.m_score[enA]);
416 m_score[enC] = 0.25*(mc0.m_score[enC]+mc1.m_score[enC]+mc2.m_score[enC]+mc3.m_score[enC]);
417 m_score[enG] = 0.25*(mc0.m_score[enG]+mc1.m_score[enG]+mc2.m_score[enG]+mc3.m_score[enG]);
418 m_score[enT] = 0.25*(mc0.m_score[enT]+mc1.m_score[enT]+mc2.m_score[enT]+mc3.m_score[enT]);
419 m_score[enN] = 0.25*(m_score[enA]+m_score[enC]+m_score[enG]+m_score[enT]);
420 }
421
toScore()422 void CMarkovChain<0>::toScore()
423 {
424 m_score[enA] = (m_score[enA] <= 0) ? BadScore() : log(4*m_score[enA]);
425 m_score[enC] = (m_score[enC] <= 0) ? BadScore() : log(4*m_score[enC]);
426 m_score[enG] = (m_score[enG] <= 0) ? BadScore() : log(4*m_score[enG]);
427 m_score[enT] = (m_score[enT] <= 0) ? BadScore() : log(4*m_score[enT]);
428 m_score[enN] = (m_score[enN] <= 0) ? BadScore() : log(4*m_score[enN]);
429 }
430
~CInputModel()431 CInputModel::~CInputModel() {}
432
CWMM_Start(const CGnomon_param::C_Param & from)433 CWMM_Start::CWMM_Start(const CGnomon_param::C_Param& from)
434 {
435 m_inexon = from.GetStart().GetIn_exon();
436 m_inintron = from.GetStart().GetIn_intron();
437
438 m_left = m_inintron;
439 m_right = m_inexon;
440
441 m_matrix.InitScore(m_inexon+m_inintron,from.GetStart());
442 }
443
Score(const CEResidueVec & seq,int i) const444 double CWMM_Start::Score(const CEResidueVec& seq, int i) const
445 {
446 int first = i-m_left+1;
447 int last = i+m_right;
448 if(first < 0 || last >= (int)seq.size()) return BadScore(); // out of range
449 if(seq[i-2] != enA || seq[i-1] != enT || seq[i] != enG) return BadScore(); // no ATG
450
451 return m_matrix.Score(&seq[first]);
452 }
453
CWAM_Stop(const CGnomon_param::C_Param & from)454 CWAM_Stop::CWAM_Stop(const CGnomon_param::C_Param& from)
455 {
456 m_inexon = from.GetStop().GetIn_exon();
457 m_inintron = from.GetStop().GetIn_intron();
458
459 m_left = m_inexon;
460 m_right = m_inintron;
461
462 m_matrix.InitScore(m_inexon+m_inintron,from.GetStop());
463 }
464
Score(const CEResidueVec & seq,int i) const465 double CWAM_Stop::Score(const CEResidueVec& seq, int i) const
466 {
467 int first = i-m_left+1;
468 int last = i+m_right;
469 if(first-1 < 0 || last >= (int)seq.size()) return BadScore(); // out of range
470 if((seq[i+1] != enT || seq[i+2] != enA || seq[i+3] != enA) &&
471 (seq[i+1] != enT || seq[i+2] != enA || seq[i+3] != enG) &&
472 (seq[i+1] != enT || seq[i+2] != enG || seq[i+3] != enA)) return BadScore(); // no stop-codon
473
474 return m_matrix.Score(&seq[first]);
475 }
476
477
Init(const CLength_distribution_params & from)478 void CLorentz::Init(const CLength_distribution_params& from)
479 {
480 m_A = from.GetA();
481 m_L = from.GetL();
482 m_minl = from.GetRange().GetMin();
483 m_maxl = from.GetRange().GetMax();
484 m_step = from.GetStep();
485
486 int num = (m_maxl-1)/m_step+1;
487 m_maxl = (num-1)*m_step+1;
488
489 try
490 {
491 m_score.resize(num,0);
492 m_clscore.resize(num,0);
493 }
494 catch(bad_alloc)
495 {
496 NCBI_THROW(CGnomonException, eMemoryLimit, "Not enough memory for CLorentz");
497 }
498
499 int i = 0;
500 ITERATE(CLength_distribution_params::TP, s, from.GetP()) {
501 m_score[i] = *s;
502 if((i+1)*m_step < m_minl)
503 m_score[i] = 0;
504 ++i;
505 }
506 while(i < num)
507 {
508 m_score[i] = m_A/(m_L*m_L+pow((i+0.5)*m_step,2));
509 ++i;
510 }
511
512 double sum = 0;
513 m_avlen = 0;
514 for(int l = m_minl; l <= m_maxl; ++l)
515 {
516 double scr = m_score[(l-1)/m_step];
517 sum += scr;
518 m_avlen += l*scr;
519 }
520
521 m_avlen /= sum;
522 for(int i = 0; i < num; ++i) m_score[i] /= sum;
523
524 m_clscore[num-1] = 0;
525 for(int i = num-2; i >= 0; --i)
526 m_clscore[i] = m_clscore[i+1]+m_score[i+1]*m_step;
527
528 for(int i = 0; i < num; ++i)
529 m_score[i] = (m_score[i] == 0) ? -25 : log(m_score[i]);
530 }
531
Through(int seqlen) const532 double CLorentz::Through(int seqlen) const
533 {
534 if(seqlen >= MaxLen()) return BadScore();
535
536 double through = 0;
537
538 if(seqlen >= MinLen()) {
539 int ifirst = (MinLen()-1)/m_step;
540 if(m_score[ifirst] != BadScore()) {
541 int w = (ifirst+1)*m_step-MinLen()+1;
542 int l2 = (MinLen()+(ifirst+1)*m_step);
543 through = (l2-2*seqlen)*w/2*exp(m_score[ifirst]);
544 }
545
546 int ilast = (seqlen-1)/m_step;
547 for(int i = 0; i < ilast; ++i) {
548 if(m_score[i] == BadScore()) continue;
549 int l2 = (i+1)*m_step+i*m_step+1;
550 through += (l2-2*seqlen)*m_step/2*exp(m_score[i]);
551 }
552
553 if(m_score[ilast] != BadScore()) {
554 int w = seqlen-ilast*m_step;
555 int l2 = ilast*m_step+1+seqlen;
556 through += (l2-2*seqlen)*w/2*exp(m_score[ilast]);
557 }
558 }
559
560 through = (AvLen()-seqlen-through)/AvLen();
561 through = (through <= 0) ? BadScore() : log(through);
562 return through;
563 }
564
CExonParameters(const CGnomon_param::C_Param & from)565 CExonParameters::CExonParameters(const CGnomon_param::C_Param& from)
566 {
567 string label = class_id();
568
569 {
570 int i = 0;
571 ITERATE(CExon_params::TFirst_exon_phase_probabilities, p, from.GetExon().GetFirst_exon_phase_probabilities()) {
572 if (i<3)
573 m_firstphase[i] = log(*p);
574 else
575 Error(label+" Too long First_exon_phase_probabilities");
576 ++i;
577 }
578 }
579
580 {
581 CExon_params::TInternal_exon_phase_probabilities::const_iterator p = from.GetExon().GetInternal_exon_phase_probabilities().begin();
582 for(int i = 0; i < 3; ++i) {
583 for(int j = 0; j < 3; ++j) {
584 m_internalphase[i][j] = log(*p++);
585 }
586 }
587 if (p != from.GetExon().GetInternal_exon_phase_probabilities().end())
588 Error(label+" Too long Internal_exon_phase_probabilities");
589 }
590
591 m_firstlen.Init(from.GetExon().GetFirst_exon_length());
592 m_internallen.Init(from.GetExon().GetInternal_exon_length());
593 m_lastlen.Init(from.GetExon().GetLast_exon_length());
594 m_singlelen.Init(from.GetExon().GetSingle_exon_length());
595
596 m_initialised = true;
597 }
598
599
CIntronParameters(const CGnomon_param::C_Param & from)600 CIntronParameters::CIntronParameters(const CGnomon_param::C_Param& from)
601 {
602 m_initp = from.GetIntron().GetInitp();
603 m_initp = m_initp/2; // two strands
604
605 int i = 0;
606 ITERATE(CIntron_params::TPhase_probabilities, p, from.GetIntron().GetPhase_probabilities()) {
607 if (i<3)
608 m_phasep[i] = *p;
609 else
610 Error(class_id()+" Too long Phase_probabilities");
611 ++i;
612 }
613
614 double toterm = from.GetIntron().GetTo_term();
615 m_lnTerminal = log(toterm);
616 m_lnInternal = log(1-toterm);
617
618 m_intronlen.Init(from.GetIntron().GetLength());
619 }
620
SetSeqLen(int seqlen) const621 void CIntronParameters::SetSeqLen(int seqlen) const
622 {
623 double lnthrough = m_intronlen.Through(seqlen);
624 for(int i = 0; i < 3; ++i)
625 {
626 m_lnDen[i] = log(m_initp*m_phasep[i]);
627 m_lnThrough[i] = (lnthrough == BadScore()) ? BadScore() : m_lnDen[i]+lnthrough;
628 }
629
630 m_initialised = true;
631 }
632
633
CIntergenicParameters(const CGnomon_param::C_Param & from)634 CIntergenicParameters::CIntergenicParameters(const CGnomon_param::C_Param& from)
635 {
636 m_initp = from.GetIntergenic().GetInitp();
637 m_initp = m_initp/2; // two strands
638
639 double tosingle = from.GetIntergenic().GetTo_single();
640
641 m_lnSingle = log(tosingle);
642 m_lnMulti = log(1-tosingle);
643
644 m_intergeniclen.Init(from.GetIntergenic().GetLength());
645 }
646
SetSeqLen(int seqlen) const647 void CIntergenicParameters::SetSeqLen(int seqlen) const
648 {
649 double lnthrough = m_intergeniclen.Through(seqlen);
650 m_lnDen = log(m_initp);
651 m_lnThrough = (lnthrough == BadScore()) ? BadScore() : m_lnDen+lnthrough;
652
653 m_initialised = true;
654 }
655
~SDetails()656 CHMMParameters::SDetails::~SDetails()
657 {
658 DeleteAllCreatedModels();
659 }
DeleteAllCreatedModels()660 void CHMMParameters::SDetails::DeleteAllCreatedModels()
661 {
662 ITERATE(vector<CInputModel*>, i, all_created_models)
663 delete *i;
664 all_created_models.clear();
665 params.clear();
666 }
667
GetCGList(const string & type)668 CHMMParameters::SDetails::TCGContentList& CHMMParameters::SDetails::GetCGList(const string& type)
669 {
670 TParamMap::iterator i = params.insert(make_pair(type,TCGContentList())).first;
671 if (i->second.empty())
672 i->second.push_back(make_pair<int,CInputModel*>(101,NULL));
673
674 return i->second;
675 }
676
StoreParam(const string & type,CInputModel * input_model,int low,int high)677 void CHMMParameters::SDetails::StoreParam( const string& type, CInputModel* input_model, int low, int high )
678 {
679 TCGContentList& list = GetCGList(type);
680
681 int lowest = 0;
682 TCGContentList::iterator i = list.begin();
683 while( i->first <= low ) {
684 lowest = i->first;
685 ++i;
686 }
687
688 if (lowest < low) {
689 i = list.insert(i, *i );
690 i->first = low;
691 ++i;
692 }
693
694 if (high < i->first) {
695 i = list.insert(i, *i );
696 i->first = high;
697 } else if (high > i->first) {
698 CInputModel::Error(type);
699 }
700
701 i->second = input_model;
702 }
703
CHMMParameters(const CGnomon_params & hmm_params_asn)704 CHMMParameters::CHMMParameters(const CGnomon_params& hmm_params_asn) : m_details( new SDetails(hmm_params_asn) )
705 {
706 }
707
CHMMParameters(CNcbiIstream & hmm_params_istr,ESerialDataFormat format)708 CHMMParameters::CHMMParameters(CNcbiIstream& hmm_params_istr, ESerialDataFormat format)
709 {
710 auto_ptr<CObjectIStream> inp(CObjectIStream::Open(format,hmm_params_istr));
711 CRef<CGnomon_params> params_asn(new CGnomon_params);
712 *inp >> *params_asn;
713 m_details.Reset( new SDetails(*params_asn) );
714 }
715
~CHMMParameters()716 CHMMParameters::~CHMMParameters()
717 {
718 }
719
720 template <class CParam>
ReadParameters(const CGnomon_params & hmm_params_asn,CGnomon_param::C_Param::E_Choice choice)721 void CHMMParameters::SDetails::ReadParameters(const CGnomon_params& hmm_params_asn, CGnomon_param::C_Param::E_Choice choice)
722 {
723 ITERATE(CGnomon_params::Tdata, p, hmm_params_asn.Get()) {
724 if ((*p)->GetParam().Which() != choice)
725 continue;
726
727 int low = (*p)->GetGc_content_range().GetFrom();
728 int high = (*p)->GetGc_content_range().GetTo();
729 if (!( 0<=low && low < high && high <= 100) )
730 CInputModel::Error(CParam::class_id());
731
732 CInputModel* input_model( new CParam((*p)->GetParam()) );
733 if (input_model==NULL)
734 continue;
735
736 all_created_models.push_back(input_model);
737
738 StoreParam(CParam::class_id(), input_model, low, high);
739 }
740 }
741
SDetails(const CGnomon_params & hmm_params_asn)742 CHMMParameters::SDetails::SDetails(const CGnomon_params& hmm_params_asn)
743 {
744 DeleteAllCreatedModels();
745
746 ReadParameters<CWAM_Donor<2> >(hmm_params_asn, CGnomon_param::C_Param::e_Donor);
747 ReadParameters<CWAM_Acceptor<2> >(hmm_params_asn, CGnomon_param::C_Param::e_Acceptor);
748 ReadParameters<CWMM_Start>(hmm_params_asn, CGnomon_param::C_Param::e_Start);
749 ReadParameters<CWAM_Stop>(hmm_params_asn, CGnomon_param::C_Param::e_Stop);
750 ReadParameters<CMC3_CodingRegion<5> >(hmm_params_asn, CGnomon_param::C_Param::e_Coding_region);
751 ReadParameters<CMC_NonCodingRegion<5> >(hmm_params_asn, CGnomon_param::C_Param::e_Non_coding_region);
752 ReadParameters<CIntronParameters>(hmm_params_asn, CGnomon_param::C_Param::e_Intron);
753 ReadParameters<CIntergenicParameters>(hmm_params_asn, CGnomon_param::C_Param::e_Intergenic);
754 ReadParameters<CExonParameters>(hmm_params_asn, CGnomon_param::C_Param::e_Exon);
755
756 }
757
GetParameter(const string & type,int cgcontent) const758 const CInputModel& CHMMParameters::GetParameter(const string& type, int cgcontent) const
759 {
760 return m_details->GetParameter(type, cgcontent);
761 }
762
GetParameter(const string & type,int cgcontent) const763 const CInputModel& CHMMParameters::SDetails::GetParameter(const string& type, int cgcontent) const
764 {
765 if (cgcontent < 0)
766 cgcontent = 0;
767 else if (cgcontent >= 100)
768 cgcontent = 99;
769
770 TParamMap::const_iterator i_param = params.find(type);
771 if (i_param == params.end())
772 CInputModel::Error( type );
773
774 ITERATE( TCGContentList, i, i_param->second) {
775 if (cgcontent < i->first) {
776 if (i->second == NULL) {
777 CInputModel::Error( type );
778 }
779 return *i->second;
780 }
781 }
782
783 _ASSERT( false);
784 CInputModel::Error( type );
785 return *params.begin()->second.front().second;
786 }
787
788
789 END_SCOPE(gnomon)
790 END_NCBI_SCOPE
791