1 #ifndef INCLUDED_NJN_LOCALMAXSTAT 2 #define INCLUDED_NJN_LOCALMAXSTAT 3 4 /* $Id: $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's offical duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * ===========================================================================*/ 28 29 /***************************************************************************** 30 31 File name: njn_localmaxstat.hpp 32 33 Author: John Spouge 34 35 Contents: Random walk parameters 36 37 ******************************************************************************/ 38 39 #include <math.h> 40 #include <vector> 41 #include <assert.h> 42 #include <ostream> 43 44 45 46 namespace Njn { 47 48 49 class LocalMaxStat { 50 51 // calculates the statistical parameters for the local maximum in a random walk 52 // 53 // The scores are uniqued and 54 // with the correspondence to probabilities maintained, placed in ascending order. 55 56 public: 57 58 // The following subroutines control the time for the dynamic programming computation. 59 // The default time_ = 0.0 permits the computation to run forever. setTime(double time_=0.0)60 static void setTime (double time_ = 0.0) {assert (time_ >= 0.0); s_time = time_;} // set time for the dynamic programming computation getTime()61 static double getTime () {return s_time;} // get time for the dynamic programming computation 62 // For an object o, 63 // if the computation is terminated before it finishes, 64 // o.getTerminated () == true. 65 LocalMaxStat(size_t dimension_=0,const long int * score_=0,const double * prob_=0)66 inline LocalMaxStat ( 67 size_t dimension_ = 0, // #(distinct values) 68 const long int *score_ = 0, // scores in increasing order 69 const double *prob_ = 0) // probability of corresponding value 70 : d_dimension (0), d_score_p (0), d_prob_p (0), 71 d_lambda (0.0), d_k (0.0), d_c (0.0), d_thetaMin (0.0), d_rMin (0.0), 72 d_delta (0), d_thetaMinusDelta (0.0), 73 d_mu (0.0), d_sigma (0.0), d_muAssoc (0.0), d_sigmaAssoc (0.0), 74 d_meanWDLE (0.0), d_terminated (false) 75 { 76 copy (dimension_, score_, prob_); 77 } 78 LocalMaxStat(const LocalMaxStat & localMaxStat_)79 inline LocalMaxStat (const LocalMaxStat &localMaxStat_) // random walk parameters 80 : d_dimension (0), d_score_p (0), d_prob_p (0), 81 d_lambda (0.0), d_k (0.0), d_c (0.0), d_thetaMin (0.0), d_rMin (0.0), 82 d_delta (0), d_thetaMinusDelta (0.0), 83 d_mu (0.0), d_sigma (0.0), d_muAssoc (0.0), d_sigmaAssoc (0.0), 84 d_meanWDLE (0.0), d_terminated (false) 85 { 86 copy (localMaxStat_); 87 } 88 ~LocalMaxStat()89 inline ~LocalMaxStat () {free2 ();} 90 operator bool() const91 inline operator bool () // ? is the object ready for computation ? 92 const { 93 return d_dimension != 0; 94 } 95 operator =(const LocalMaxStat & localMaxStat_)96 inline LocalMaxStat &operator= (const LocalMaxStat &localMaxStat_) // random walk parameters 97 { 98 if (this != &localMaxStat_) copy (localMaxStat_); 99 return *this; 100 } 101 102 void copy ( 103 size_t dimension_, // #(distinct values) of scores & probabilities (which are paired) 104 const long int *score_, // scores in increasing order 105 const double *prob_); // probabilities 106 copy(const LocalMaxStat & localMaxStat_)107 inline void copy (const LocalMaxStat &localMaxStat_) 108 { 109 copy (localMaxStat_.getDimension (), localMaxStat_.getScore (), localMaxStat_.getProb (), 110 localMaxStat_.getLambda (), localMaxStat_.getK (), localMaxStat_.getC (), 111 localMaxStat_.getThetaMin (), localMaxStat_.getRMin (), 112 localMaxStat_.getDelta (), localMaxStat_.getThetaMinusDelta (), 113 localMaxStat_.getMu (), localMaxStat_.getSigma (), localMaxStat_.getMuAssoc (), localMaxStat_.getSigmaAssoc (), 114 localMaxStat_.getMeanWDLE (), localMaxStat_.getTerminated ()); 115 } 116 117 void copy ( 118 size_t dimension_, // #(distinct values) of scores & probabilities (which are paired) 119 const long int *score_, // scores in increasing order 120 const double *prob_, // probabilities 121 double lambda_, // lambda for associated random walk 122 double k_, // k for random walk : exponential prefactor 123 double c_, // c for random walk : exponential prefactor (global alignment) 124 double thetaMin_, // theta for minimum expectation (exp (theta * score)) 125 double rMin_, // minimum expectation (exp (theta * score)) 126 long int delta_, // span 127 double thetaMinusDelta_, // renewal span parameter 128 double mu_, // step mean for random walk 129 double sigma_, // step standard deviation for random walk 130 double muAssoc_, // step mean for associated random walk (relative entropy) 131 double sigmaAssoc_, // step standard deviation for associated random walk 132 double meanDLE_, // expected renewal length 133 bool terminated_ = false); // ? Was the dynamic programming computation terminated prematurely ? 134 out(std::ostream & ostr_) const135 inline std::ostream &out (std::ostream &ostr_) const {return ostr_;} // output 136 137 double getR (double theta_) const; // r (theta_) : dominant eigenvalue for theta_ 138 getA() const139 double getA () const {return getMuAssoc () == 0 ? HUGE_VAL : 1.0 / getMuAssoc ();} // expected [length / y] for achieving y 140 getAlpha() const141 double getAlpha () const {return getSigmaAssoc () * getSigmaAssoc () * getA () * getA () * getA ();} // var [length] / y for achieving y 142 getDimension() const143 inline size_t getDimension () const {return d_dimension;} // #(distinct values) of scores & probabilities (which are paired) getScore() const144 inline const long int *getScore () const {return d_score_p;} // scores in increasing order getProb() const145 inline const double *getProb () const {return d_prob_p;} // probabilities getLambda() const146 inline double getLambda () const {return d_lambda;} // lambda for associated random walk getK() const147 inline double getK () const {return d_k;} // k for random walk : exponential prefactor getC() const148 inline double getC () const {return d_c;} // c for random walk : exponential prefactor (global alignment) getThetaMin() const149 inline double getThetaMin () const {return d_thetaMin;} // theta for minimum expectation (exp (theta * score)) getRMin() const150 inline double getRMin () const {return d_rMin;} // minimum expectation (exp (theta * score)) getDelta() const151 inline long int getDelta () const {return d_delta;} // span getThetaMinusDelta() const152 inline double getThetaMinusDelta () const {return d_thetaMinusDelta;} // renewal span parameter getMu() const153 inline double getMu () const {return d_mu;} // step mean for random walk getSigma() const154 inline double getSigma () const {return d_sigma;} // step standard deviation for random walk getMuAssoc() const155 inline double getMuAssoc () const {return d_muAssoc;} // step mean for associated random walk (relative entropy) getSigmaAssoc() const156 inline double getSigmaAssoc () const {return d_sigmaAssoc;} // step standard deviation for associated random walk getMeanWDLE() const157 inline double getMeanWDLE () const {return d_meanWDLE;} // expected renewal length for weak ladder epochs getTerminated() const158 inline bool getTerminated () const {return d_terminated;} // ? Was the dynamic programming computation terminated prematurely ? 159 160 private: 161 162 // random walk distribution 163 size_t d_dimension; // #(distinct values) of scores & probabilities (which are paired) 164 long int *d_score_p; // scores in increasing order 165 double *d_prob_p; // probabilities 166 167 // Karlin-Altschul parameters 168 double d_lambda; // lambda for associated random walk 169 double d_k; // k for random walk : exponential prefactor 170 double d_c; // c for random walk : exponential prefactor (global alignment) 171 double d_thetaMin; // theta for minimum expectation (exp (theta * score)) 172 double d_rMin; // minimum expectation (exp (theta * score)) 173 long int d_delta; // span 174 double d_thetaMinusDelta; // renewal span parameter 175 double d_mu; // step mean for random walk 176 double d_sigma; // step standard deviation for random walk 177 double d_muAssoc; // step mean for associated random walk (relative entropy) 178 double d_sigmaAssoc; // step standard deviation for associated random walk 179 double d_meanWDLE; // expected renewal length for weak ladder epochs 180 bool d_terminated; // ? Was the dynamic programming computation terminated prematurely ? 181 182 void init (size_t dimension_); 183 void free2 (); 184 185 void clear (); 186 187 void dynProgCalc (); 188 // k for random walk : exponential prefactor 189 // expected renewal length for weak ladder epochs 190 191 static double s_time; 192 193 }; 194 195 } 196 197 #endif 198 199