1 /***************************************************************************
2  *   Copyright (C) 2009 by BUI Quang Minh   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #ifndef RATEGAMMA_H
21 #define RATEGAMMA_H
22 
23 #include "rateheterogeneity.h"
24 
25 const int GAMMA_CUT_MEDIAN = 1; // 2 discrete Gamma approximations (mean or median) of Yang 1994
26 const int GAMMA_CUT_MEAN   = 2;
27 
28 class PhyloTree;
29 /**
30 Discrete gamma distributed site-rate model from Yang 1994
31 
32 	@author BUI Quang Minh <minh.bui@univie.ac.at>
33 */
34 class RateGamma: virtual public RateHeterogeneity
35 {
36 
37 	friend class RateGammaInvar;
38 
39 public:
40 	/**
41 		constructor
42 		@param ncat number of rate categories
43 		@param shape Gamma shape parameter
44 		@param tree associated phylogenetic tree
45 	*/
46     RateGamma(int ncat, double shape, bool median, PhyloTree *tree);
47 
48 	/**
49 		destructor
50 	*/
51     virtual ~RateGamma();
52 
53     /**
54         start structure for checkpointing
55     */
56     virtual void startCheckpoint();
57 
58     /**
59         save object into the checkpoint
60     */
61     virtual void saveCheckpoint();
62 
63     /**
64         restore object from the checkpoint
65     */
66     virtual void restoreCheckpoint();
67 
68 	/**
69 		@return true if this is a Gamma model (default: false)
70 	*/
isGammaRate()71     virtual int isGammaRate() {
72         if (cut_median) return GAMMA_CUT_MEDIAN;
73         return GAMMA_CUT_MEAN;
74     }
75 
getGammaShape()76 	virtual double getGammaShape() { return gamma_shape; }
77 
78 	virtual void setGammaShape(double gs);
79 
80 	/**
81 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
82 	 */
83 	virtual string getNameParams();
84 
85 	/**
86 		@return TRUE to use median rate for discrete categories, FALSE to use mean rate instead
87         OBSOLETE, see isGammaRate()
88 	*/
89 //	bool isCutMedian() { return cut_median; }
90 
91 	/**
92 		@return the number of rate categories
93 	*/
getNRate()94 	virtual int getNRate() { return ncategory; }
95 
96 	/**
97 		get the number of rate categories for site-specific category model
98 		@return the number of rate categories
99 	*/
getNDiscreteRate()100 	virtual int getNDiscreteRate() { return ncategory; }
101 
102 	/**
103 		@param category category ID from 0 to #category-1
104 		@return the rate of the specified category
105 	*/
getRate(int category)106 	virtual double getRate(int category) { return rates[category]; }
107 
108 	/**
109 		set the rate of a specified category.
110 		@param category category ID from 0 to #category-1
111 		@param value the rate of the specified category
112 	*/
setRate(int category,double value)113 	virtual void setRate(int category, double value) { rates[category] = value; }
114 
115 	/**
116 		get the proportion of sites under a specified category.
117 		@param category category ID from 0 to #category-1
118 		@return the proportion of the specified category
119 	*/
getProp(int category)120 	virtual double getProp(int category) { return 1.0/ncategory; }
121 
122 	/**
123 	 * 	return pointer to the rate array
124 	 */
getRates()125 	virtual double* getRates() { return rates; }
126 
127 	/** discrete Gamma according to Yang 1994 (JME 39:306-314) and using median cutting point
128 		It takes 'ncategory' and 'gamma_shape' variables as input. On output, it write to 'rates' variable.
129 	*/
130 	void computeRates();
131 
132 	/** discrete Gamma according to Yang 1994 (JME 39:306-314) and using mean of the portion of gamma distribution
133 		It takes 'ncategory' and 'gamma_shape' variables as input. On output, it write to 'rates' variable.
134 	*/
135 	void computeRatesMean ();
136 
137 	/**
138 		Compute site-specific rates. Override this for Gamma model
139 		@param pattern_rates (OUT) pattern rates. Resizing if necesary
140         @return total number of categories
141 	*/
142 	virtual int computePatternRates(DoubleVector &pattern_rates, IntVector &pattern_cat);
143 
144 	/**
145 	 * setup the bounds for joint optimization with BFGS
146 	 */
147 	virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check);
148 
149 	/**
150 		the target function which needs to be optimized
151 		@param x the input vector x
152 		@return the function value at x
153 	*/
154 	virtual double targetFunk(double x[]);
155 
156 	/**
157 		optimize parameters. Default is to optimize gamma shape
158 		@return the best likelihood
159 	*/
160 	virtual double optimizeParameters(double gradient_epsilon);
161 
162     /**
163      *  Same as above but add parameters to control gamma bounds
164      */
165 	virtual double optimizeParameters(double gradient_epsilon, double min_gamma, double max_gamma);
166 
167 	/**
168 		override function from Optimization class, used by the minimizeOneDimen() to optimize
169 		gamma shape parameter
170 	*/
171 	virtual double computeFunction(double shape);
172 
173 
174 	/**
175 		return the number of dimensions
176 	*/
getNDim()177 	virtual int getNDim() { return !fix_gamma_shape; }
178 
179 	/**
180 		write information
181 		@param out output stream
182 	*/
183 	virtual void writeInfo(ostream &out);
184 
185 	/**
186 		write parameters, used with modeltest
187 		@param out output stream
188 	*/
189 	virtual void writeParameters(ostream &out);
190 
isFixGammaShape()191 	virtual bool isFixGammaShape() const {
192 		return fix_gamma_shape;
193 	}
194 
setFixGammaShape(bool fixGammaShape)195 	virtual void setFixGammaShape(bool fixGammaShape) {
196 		fix_gamma_shape = fixGammaShape;
197 	}
198 
199     /**
200         set number of rate categories
201         @param ncat #categories
202     */
203 	virtual void setNCategory(int ncat);
204 
205 protected:
206 
207 	/**
208 		this function is served for the multi-dimension optimization. It should pack the model parameters
209 		into a vector that is index from 1 (NOTE: not from 0)
210 		@param variables (OUT) vector of variables, indexed from 1
211 	*/
212 	virtual void setVariables(double *variables);
213 
214 	/**
215 		this function is served for the multi-dimension optimization. It should assign the model parameters
216 		from a vector of variables that is index from 1 (NOTE: not from 0)
217 		@param variables vector of variables, indexed from 1
218 		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
219 	*/
220 	virtual bool getVariables(double *variables);
221 
222 	/**
223 		number of rate categories
224 	*/
225 	int ncategory;
226 
227 	/**
228 		rates, containing ncategory elements
229 	*/
230 	double *rates;
231 
232 
233 	/**
234 		the gamma shape parameter 'alpha'
235 	*/
236 	double gamma_shape;
237 
238 	/**
239 		TRUE to fix the gamma shape parameter
240 	*/
241 	bool fix_gamma_shape;
242 
243 	/**
244 		TRUE to use median rate for discrete categories, FALSE to use mean rate instead
245 	*/
246 	bool cut_median;
247 
248 public:
249 
250 	//Normally, beta is assigned equal to alpha
251 	//double cmpPerPointGamma (const double prob, const double shape);
252 
253 	/***********************************************************
254 	NUMERICAL SUBROUTINES
255 	THE FOLLOWING CODE COMES FROM tools.c in Yang's PAML package
256 	***********************************************************/
257 	/** returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.
258 	   Stirling's formula is used for the central polynomial part of the procedure.
259 	   Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.
260 	   Communications of the Association for Computing Machinery, 9:684
261 
262 	*/
263 	static double cmpLnGamma (double alpha);
264 
265 	/** returns the incomplete gamma ratio I(x,alpha) where x is the upper
266 	   limit of the integration and alpha is the shape parameter.
267 	   returns (-1) if in error
268 	   (1) series expansion     if (alpha>x || x<=1)
269 	   (2) continued fraction   otherwise
270 	   RATNEST FORTRAN by
271 	   Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,
272 	   19: 285-287 (AS32)
273 	*/
274 	static double cmpIncompleteGamma (double x, double alpha, double ln_gamma_alpha);
275 
276 	/** functions concerning the CDF and percentage points of the gamma and
277 	   Chi2 distribution
278 	   returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12)<prob<1-(1e-12)
279 	   returns (-9999) if in error
280 	   Odeh RE & Evans JO (1974) The percentage points of the normal distribution.
281 	   Applied Statistics 22: 96-97 (AS70)
282 
283 	   Newer methods:
284 	     Wichura MJ (1988) Algorithm AS 241: the percentage points of the
285 	       normal distribution.  37: 477-484.
286 	     Beasley JD & Springer SG  (1977).  Algorithm AS 111: the percentage
287 	       points of the normal distribution.  26: 118-121.
288 	*/
289 	static double cmpPointNormal (double prob);
290 
291 
292 	/** returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v
293 	   returns -1 if in error.   0.000002<prob<0.999998
294 	   RATNEST FORTRAN by
295 	       Best DJ & Roberts DE (1975) The percentage points of the
296 	       Chi2 distribution.  Applied Statistics 24: 385-388.  (AS91)
297 	   Converted into C by Ziheng Yang, Oct. 1993.
298 	*/
299 	static double cmpPointChi2 (double prob, double v);
300 
301 	/* THE END OF THE CODES COMMING FROM tools.c in Yang's PAML package */
302 
303 };
304 
305 
306 
307 
308 #endif
309