1 /*************************************************************************** 2 * Copyright (C) 2009 by BUI Quang Minh * 3 * minh.bui@univie.ac.at * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 2 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, write to the * 17 * Free Software Foundation, Inc., * 18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 19 ***************************************************************************/ 20 #ifndef RATEGAMMA_H 21 #define RATEGAMMA_H 22 23 #include "rateheterogeneity.h" 24 25 const int GAMMA_CUT_MEDIAN = 1; // 2 discrete Gamma approximations (mean or median) of Yang 1994 26 const int GAMMA_CUT_MEAN = 2; 27 28 class PhyloTree; 29 /** 30 Discrete gamma distributed site-rate model from Yang 1994 31 32 @author BUI Quang Minh <minh.bui@univie.ac.at> 33 */ 34 class RateGamma: virtual public RateHeterogeneity 35 { 36 37 friend class RateGammaInvar; 38 39 public: 40 /** 41 constructor 42 @param ncat number of rate categories 43 @param shape Gamma shape parameter 44 @param tree associated phylogenetic tree 45 */ 46 RateGamma(int ncat, double shape, bool median, PhyloTree *tree); 47 48 /** 49 destructor 50 */ 51 virtual ~RateGamma(); 52 53 /** 54 start structure for checkpointing 55 */ 56 virtual void startCheckpoint(); 57 58 /** 59 save object into the checkpoint 60 */ 61 virtual void saveCheckpoint(); 62 63 /** 64 restore object from the checkpoint 65 */ 66 virtual void restoreCheckpoint(); 67 68 /** 69 @return true if this is a Gamma model (default: false) 70 */ isGammaRate()71 virtual int isGammaRate() { 72 if (cut_median) return GAMMA_CUT_MEDIAN; 73 return GAMMA_CUT_MEAN; 74 } 75 getGammaShape()76 virtual double getGammaShape() { return gamma_shape; } 77 78 virtual void setGammaShape(double gs); 79 80 /** 81 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} 82 */ 83 virtual string getNameParams(); 84 85 /** 86 @return TRUE to use median rate for discrete categories, FALSE to use mean rate instead 87 OBSOLETE, see isGammaRate() 88 */ 89 // bool isCutMedian() { return cut_median; } 90 91 /** 92 @return the number of rate categories 93 */ getNRate()94 virtual int getNRate() { return ncategory; } 95 96 /** 97 get the number of rate categories for site-specific category model 98 @return the number of rate categories 99 */ getNDiscreteRate()100 virtual int getNDiscreteRate() { return ncategory; } 101 102 /** 103 @param category category ID from 0 to #category-1 104 @return the rate of the specified category 105 */ getRate(int category)106 virtual double getRate(int category) { return rates[category]; } 107 108 /** 109 set the rate of a specified category. 110 @param category category ID from 0 to #category-1 111 @param value the rate of the specified category 112 */ setRate(int category,double value)113 virtual void setRate(int category, double value) { rates[category] = value; } 114 115 /** 116 get the proportion of sites under a specified category. 117 @param category category ID from 0 to #category-1 118 @return the proportion of the specified category 119 */ getProp(int category)120 virtual double getProp(int category) { return 1.0/ncategory; } 121 122 /** 123 * return pointer to the rate array 124 */ getRates()125 virtual double* getRates() { return rates; } 126 127 /** discrete Gamma according to Yang 1994 (JME 39:306-314) and using median cutting point 128 It takes 'ncategory' and 'gamma_shape' variables as input. On output, it write to 'rates' variable. 129 */ 130 void computeRates(); 131 132 /** discrete Gamma according to Yang 1994 (JME 39:306-314) and using mean of the portion of gamma distribution 133 It takes 'ncategory' and 'gamma_shape' variables as input. On output, it write to 'rates' variable. 134 */ 135 void computeRatesMean (); 136 137 /** 138 Compute site-specific rates. Override this for Gamma model 139 @param pattern_rates (OUT) pattern rates. Resizing if necesary 140 @return total number of categories 141 */ 142 virtual int computePatternRates(DoubleVector &pattern_rates, IntVector &pattern_cat); 143 144 /** 145 * setup the bounds for joint optimization with BFGS 146 */ 147 virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check); 148 149 /** 150 the target function which needs to be optimized 151 @param x the input vector x 152 @return the function value at x 153 */ 154 virtual double targetFunk(double x[]); 155 156 /** 157 optimize parameters. Default is to optimize gamma shape 158 @return the best likelihood 159 */ 160 virtual double optimizeParameters(double gradient_epsilon); 161 162 /** 163 * Same as above but add parameters to control gamma bounds 164 */ 165 virtual double optimizeParameters(double gradient_epsilon, double min_gamma, double max_gamma); 166 167 /** 168 override function from Optimization class, used by the minimizeOneDimen() to optimize 169 gamma shape parameter 170 */ 171 virtual double computeFunction(double shape); 172 173 174 /** 175 return the number of dimensions 176 */ getNDim()177 virtual int getNDim() { return !fix_gamma_shape; } 178 179 /** 180 write information 181 @param out output stream 182 */ 183 virtual void writeInfo(ostream &out); 184 185 /** 186 write parameters, used with modeltest 187 @param out output stream 188 */ 189 virtual void writeParameters(ostream &out); 190 isFixGammaShape()191 virtual bool isFixGammaShape() const { 192 return fix_gamma_shape; 193 } 194 setFixGammaShape(bool fixGammaShape)195 virtual void setFixGammaShape(bool fixGammaShape) { 196 fix_gamma_shape = fixGammaShape; 197 } 198 199 /** 200 set number of rate categories 201 @param ncat #categories 202 */ 203 virtual void setNCategory(int ncat); 204 205 protected: 206 207 /** 208 this function is served for the multi-dimension optimization. It should pack the model parameters 209 into a vector that is index from 1 (NOTE: not from 0) 210 @param variables (OUT) vector of variables, indexed from 1 211 */ 212 virtual void setVariables(double *variables); 213 214 /** 215 this function is served for the multi-dimension optimization. It should assign the model parameters 216 from a vector of variables that is index from 1 (NOTE: not from 0) 217 @param variables vector of variables, indexed from 1 218 @return TRUE if parameters are changed, FALSE otherwise (2015-10-20) 219 */ 220 virtual bool getVariables(double *variables); 221 222 /** 223 number of rate categories 224 */ 225 int ncategory; 226 227 /** 228 rates, containing ncategory elements 229 */ 230 double *rates; 231 232 233 /** 234 the gamma shape parameter 'alpha' 235 */ 236 double gamma_shape; 237 238 /** 239 TRUE to fix the gamma shape parameter 240 */ 241 bool fix_gamma_shape; 242 243 /** 244 TRUE to use median rate for discrete categories, FALSE to use mean rate instead 245 */ 246 bool cut_median; 247 248 public: 249 250 //Normally, beta is assigned equal to alpha 251 //double cmpPerPointGamma (const double prob, const double shape); 252 253 /*********************************************************** 254 NUMERICAL SUBROUTINES 255 THE FOLLOWING CODE COMES FROM tools.c in Yang's PAML package 256 ***********************************************************/ 257 /** returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places. 258 Stirling's formula is used for the central polynomial part of the procedure. 259 Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function. 260 Communications of the Association for Computing Machinery, 9:684 261 262 */ 263 static double cmpLnGamma (double alpha); 264 265 /** returns the incomplete gamma ratio I(x,alpha) where x is the upper 266 limit of the integration and alpha is the shape parameter. 267 returns (-1) if in error 268 (1) series expansion if (alpha>x || x<=1) 269 (2) continued fraction otherwise 270 RATNEST FORTRAN by 271 Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics, 272 19: 285-287 (AS32) 273 */ 274 static double cmpIncompleteGamma (double x, double alpha, double ln_gamma_alpha); 275 276 /** functions concerning the CDF and percentage points of the gamma and 277 Chi2 distribution 278 returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12)<prob<1-(1e-12) 279 returns (-9999) if in error 280 Odeh RE & Evans JO (1974) The percentage points of the normal distribution. 281 Applied Statistics 22: 96-97 (AS70) 282 283 Newer methods: 284 Wichura MJ (1988) Algorithm AS 241: the percentage points of the 285 normal distribution. 37: 477-484. 286 Beasley JD & Springer SG (1977). Algorithm AS 111: the percentage 287 points of the normal distribution. 26: 118-121. 288 */ 289 static double cmpPointNormal (double prob); 290 291 292 /** returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v 293 returns -1 if in error. 0.000002<prob<0.999998 294 RATNEST FORTRAN by 295 Best DJ & Roberts DE (1975) The percentage points of the 296 Chi2 distribution. Applied Statistics 24: 385-388. (AS91) 297 Converted into C by Ziheng Yang, Oct. 1993. 298 */ 299 static double cmpPointChi2 (double prob, double v); 300 301 /* THE END OF THE CODES COMMING FROM tools.c in Yang's PAML package */ 302 303 }; 304 305 306 307 308 #endif 309