1 /* $Id: njn_localmaxstat.cpp 319891 2011-07-26 15:10:40Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25
26 /*****************************************************************************
27
28 File name: njn_localmaxstat.cpp
29
30 Author: John Spouge
31
32 Contents:
33
34 ******************************************************************************/
35
36 #include <ncbi_pch.hpp>
37
38 #include <corelib/ncbistl.hpp>
39
40 #include "sls_alp_data.hpp"
41 #include "njn_localmaxstat.hpp"
42 #include "njn_memutil.hpp"
43 #include "njn_dynprogproblim.hpp"
44 #include "njn_function.hpp"
45 #include "njn_integer.hpp"
46 #include "njn_localmaxstatutil.hpp"
47
48 USING_NCBI_SCOPE;
49 USING_SCOPE(blast);
50
51 USING_SCOPE(Njn);
52
53 double LocalMaxStat::s_time = 0.0;
54
init(size_t dimension_)55 void LocalMaxStat::init (size_t dimension_)
56 {
57 if (dimension_ > 0)
58 {
59 d_score_p = new Int4 [dimension_];
60 d_prob_p = new double [dimension_];
61 }
62
63 d_dimension = dimension_;
64 }
65
free()66 void LocalMaxStat::free ()
67 {
68 if (getDimension () > 0)
69 {
70 delete [] d_score_p; d_score_p = 0;
71 delete [] d_prob_p; d_prob_p = 0;
72 }
73
74 d_dimension = 0;
75 }
76
clear()77 void LocalMaxStat::clear ()
78 {
79 free ();
80 init (0);
81
82 d_lambda = 0.0;
83 d_k = 0.0;
84 d_c = 0.0;
85 d_thetaMin = 0.0;
86 d_rMin = 0.0;
87 d_delta = 0;
88 d_thetaMinusDelta = 0.0;
89 d_mu = 0.0;
90 d_sigma = 0.0;
91 d_muAssoc = 0.0;
92 d_sigmaAssoc = 0.0;
93 d_meanWDLE = 0.0;
94 d_terminated = false;
95 }
96
copy(size_t dimension_,const Int4 * score_,const double * prob_,double lambda_,double k_,double c_,double thetaMin_,double rMin_,Int4 delta_,double thetaMinusDelta_,double mu_,double sigma_,double muAssoc_,double sigmaAssoc_,double meanLength_,bool terminated_)97 void LocalMaxStat::copy (
98 size_t dimension_, // #(distinct values) of scores & probabilities (which are paired)
99 const Int4 *score_, // scores
100 const double *prob_, // probabilities
101 double lambda_, // lambda for associated random walk
102 double k_, // k for random walk : exponential prefactor
103 double c_, // c for random walk : exponential prefactor (global alignment)
104 double thetaMin_, // theta for minimum expectation (exp (theta * score))
105 double rMin_, // minimum expectation (exp (theta * score))
106 Int4 delta_, // span
107 double thetaMinusDelta_, // renewal span parameter
108 double mu_, // n_step mean for random walk
109 double sigma_, // n_step standard deviation for random walk
110 double muAssoc_, // n_step mean for associated random walk (relative entropy)
111 double sigmaAssoc_, // n_step standard deviation for associated random walk
112 double meanLength_, // expected renewal length
113 bool terminated_) // ? Was the dynamic programming computation terminated prematurely ?
114 {
115 free ();
116 init (dimension_);
117
118 memcpy (d_score_p, score_, sizeof (Int4) * getDimension ());
119 memcpy (d_prob_p, prob_, sizeof (double) * getDimension ());
120
121 d_lambda = lambda_;
122 d_k = k_;
123 d_c = c_;
124 d_thetaMin = thetaMin_;
125 d_rMin = rMin_;
126 d_delta = delta_;
127 d_thetaMinusDelta = thetaMinusDelta_;
128 d_mu = mu_;
129 d_sigma = sigma_;
130 d_muAssoc = muAssoc_;
131 d_sigmaAssoc = sigmaAssoc_;
132 d_meanWDLE = meanLength_;
133 d_terminated = terminated_;
134 }
135
copy(size_t dimension_,const Int4 * score_,const double * prob_)136 void LocalMaxStat::copy (
137 size_t dimension_, // #(distinct values) of scores & probabilities (which are paired)
138 const Int4 *score_, // scores in increasing order
139 const double *prob_) // corresponding probabilities
140 {
141 if (dimension_ == 0)
142 {
143 clear ();
144 return;
145 }
146
147 if (! LocalMaxStatUtil::isLogarithmic (dimension_, score_, prob_))
148 {
149 //IoUtil::abort ("LocalMaxStat::copy : ! isLogarithmic");
150 throw Sls::error("The regime is not logarithmic\n",3);
151 }
152
153 size_t i = 0;
154
155 free ();
156 init (dimension_);
157
158 memcpy (d_score_p, score_, sizeof (Int4) * getDimension ());
159 memcpy (d_prob_p, prob_, sizeof (double) * getDimension ());
160
161 d_mu = LocalMaxStatUtil::mu (getDimension (), getScore (), getProb ());
162 d_sigma = 0.0;
163
164 for (i = 0; i < dimension_; i++)
165 {
166 d_sigma += static_cast <double> (score_ [i]) * static_cast <double> (score_ [i]) * prob_ [i];
167 }
168
169 d_sigma -= getMu () * getMu ();
170 d_sigma = Function::psqrt (getSigma ());
171
172 // calculate lambda
173
174 d_lambda = LocalMaxStatUtil::lambda (getDimension (), getScore (), getProb ());
175 d_muAssoc = LocalMaxStatUtil::muAssoc (getDimension (), getScore (), getProb (), getLambda ());
176 d_sigmaAssoc = 0.0;
177
178 for (i = 0; i < getDimension (); i++)
179 {
180 d_sigmaAssoc += static_cast <double> (getScore () [i]) * static_cast <double> (getScore () [i]) *
181 getProb () [i] * exp (getLambda () * static_cast <double> (getScore () [i]));
182 }
183
184 d_sigmaAssoc -= getMuAssoc () * getMuAssoc ();
185 d_sigmaAssoc = Function::psqrt (d_sigmaAssoc);
186
187 d_thetaMin = LocalMaxStatUtil::thetaMin (getDimension (), getScore (), getProb (), getLambda ());
188 d_rMin = LocalMaxStatUtil::rMin (getDimension (), getScore (), getProb (), getLambda (), getThetaMin ());
189
190 d_delta = LocalMaxStatUtil::delta (getDimension (), getScore ());
191 d_thetaMinusDelta = LocalMaxStatUtil::thetaMinusDelta (getLambda (), getDimension (), getScore ());
192
193 dynProgCalc ();
194 }
195
getR(double theta_) const196 double LocalMaxStat::getR (double theta_) const
197 {
198 return LocalMaxStatUtil::r (d_dimension, d_score_p, d_prob_p, theta_);
199 }
200
dynProgCalc()201 void LocalMaxStat::dynProgCalc ()
202 // k for random walk : exponential prefactor
203 // expected renewal length for weak ladder epochs
204 {
205 double eSumAlpha_ = 0.0;
206 double eOneMinusExpSumAlpha_ = 0.0;
207 LocalMaxStatUtil::descendingLadderEpoch (getDimension (), getScore (), getProb (),
208 &eSumAlpha_, &eOneMinusExpSumAlpha_, false,
209 getLambda (), getMu (), getMuAssoc (), getThetaMin (), getRMin (), getTime (), &d_terminated);
210
211 if (getTerminated ()) return;
212
213 // fluctuation sum quantities
214 double ratio = eOneMinusExpSumAlpha_ / eSumAlpha_;
215 d_k = getMu () * getMu () / getThetaMinusDelta () / getMuAssoc () * ratio * ratio;
216 d_meanWDLE = eSumAlpha_ / getMu ();
217 d_c = getK () * getMeanWDLE () / eOneMinusExpSumAlpha_;
218 }
219
220