1 /***************************************************************************
2  *   Copyright (C) 2009 by BUI Quang Minh   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #ifndef RATEHETEROGENEITY_H
21 #define RATEHETEROGENEITY_H
22 
23 
24 #include "utils/optimization.h"
25 #include <string>
26 #include "utils/tools.h"
27 #include "utils/checkpoint.h"
28 
29 using namespace std;
30 
31 class PhyloTree;
32 
33 const double MIN_SITE_RATE = 1e-6;
34 const double MAX_SITE_RATE = 100.0;
35 const double TOL_SITE_RATE = 1e-6;
36 
37 
38 /**
39 class for among-site rate heterogeneity, the default is homogeneous (equal) rate model
40 
41 	@author BUI Quang Minh <minh.bui@univie.ac.at>
42 */
43 
44 class RateHeterogeneity : public Optimization, public CheckpointFactory
45 {
46 	friend class ModelFactory;
47 	friend class ModelPoMoMixture;
48 
49 public:
50 	/**
51 		constructor
52 	*/
53     RateHeterogeneity();
54 
55 	/**
56 		destructor
57 	*/
58     virtual ~RateHeterogeneity();
59 
60     /**
61         start structure for checkpointing
62     */
63     virtual void startCheckpoint();
64 
65     /**
66         save object into the checkpoint
67     */
68     virtual void saveCheckpoint();
69 
70     /**
71         restore object from the checkpoint
72     */
73     virtual void restoreCheckpoint();
74 
75 	/**
76 		set phylogenetic tree
77 		@param tree associated phyogenetic tree
78 	*/
79 	void setTree(PhyloTree *tree);
80 
81 	/**
82 		set phylogenetic tree
83 		@param tree associated phyogenetic tree
84 	*/
getTree()85 	PhyloTree *getTree() { return phylo_tree; }
86 
87 	/**
88 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
89 	 */
getNameParams()90 	virtual string getNameParams() { return name; }
91 
92 	/**
93 		@return false by default. True if rates are site-specific (Meyer and von Haeseler (2003) model)
94 	*/
isSiteSpecificRate()95 	virtual bool isSiteSpecificRate() { return false; }
96 
97     /**
98         @return TRUE if this is a heterotachy model, default: FALSE
99     */
isHeterotachy()100     virtual bool isHeterotachy() { return false; }
101 
102 	/**
103 		get the number of rate categories. The default returns 1 category since it is homogeneous model
104 		@return the number of rate categories
105 	*/
getNRate()106 	virtual int getNRate() { return 1; }
107 
108 	/**
109 		set the number of rate categories. The default raises assertion since it is homogeneous model
110 	*/
setNCategory(int ncat)111 	virtual void setNCategory(int ncat) { ASSERT(0); }
112 
113     /**
114         initialize from checkpoint rates and prop from rate model with #category-1
115     */
initFromCatMinusOne()116     virtual void initFromCatMinusOne() {}
117 
118 	/**
119 		get the number of rate categories for site-specific category model
120 		The default returns 1 category since it is homogeneous model
121 		@return the number of rate categories
122 	*/
getNDiscreteRate()123 	virtual int getNDiscreteRate() { return 1; }
124 
125 	/**
126 		get the rate of a specified category. Default returns 1.0 since it is homogeneous model
127 		@param category category ID from 0 to #category-1
128 		@return the rate of the specified category
129 	*/
getRate(int category)130 	virtual double getRate(int category) { return 1.0; }
131 
132 	/**
133 		set the rate of a specified category.
134 		@param category category ID from 0 to #category-1
135 		@param value the rate of the specified category
136 	*/
setRate(int category,double value)137 	virtual void setRate(int category, double value) { }
138 
139 	/**
140 		get the proportion of a specified category. Default returns 1.0 since it is homogeneous model
141 		@param category category ID from 0 to #category-1
142 		@return the proportion of the specified category
143 	*/
getProp(int category)144 	virtual double getProp(int category) { return 1.0; }
145 
146 	/**
147 		set the proportion of a specified category.
148 		@param category category ID from 0 to #category-1
149 		@return the proportion of the specified category
150 	*/
setProp(int category,double value)151 	virtual void setProp(int category, double value) {}
152 
153 	/**
154 		get the rate of a specified site-pattern. Default returns 1.0 since it is homogeneous model
155 		@param ptn pattern ID
156 		@return the rate of the specified site-pattern
157 	*/
getPtnRate(int ptn)158 	virtual double getPtnRate(int ptn) { return 1.0; }
159 
160 	/**
161 		get rate category of a specified site-pattern. Default returns -1 since it is homogeneous model
162 		@param ptn pattern ID
163 		@return the rate category of the specified site-pattern
164 	*/
getPtnCat(int ptn)165 	virtual int getPtnCat(int ptn) { return -1; }
166 
167 	/**
168 		get the proportion of invariable sites. Default returns 0.0 since it is homogeneous model
169 		@return the proportion of invariable sites
170 	*/
getPInvar()171 	virtual double getPInvar() { return 0.0; }
172 
173 	/**
174 		set the proportion of invariable sites. Default: do nothing
175 		@param pinv the proportion of invariable sites
176 	*/
setPInvar(double pinv)177 	virtual void setPInvar(double pinv) { }
178 
isFixPInvar()179 	virtual bool isFixPInvar() const {
180 		return true;
181 	}
182 
183 	/**
184         set whether to fix p_invar
185     */
setFixPInvar(bool fixPInvar)186 	virtual void setFixPInvar(bool fixPInvar) {}
187 
188 	/**
189 		Set whether or not to optimize p_invar
190 		@param opt TRUE to optimize p_invar, FALSE otherwise
191 	*/
192 //	virtual void setOptimizePInvar(bool opt) { }
193 
194 	/**
195 		get the Gamma shape. Default returns 0.0 since it is homogeneous model
196 		@return Gamma shape
197 	*/
getGammaShape()198 	virtual double getGammaShape() { return 0.0; }
199 
200 	/**
201 		set the Gamma shape. Default: nothing
202 		@param gs Gamma shape
203 	*/
setGammaShape(double gs)204 	virtual void setGammaShape(double gs) {}
205 
isFixGammaShape()206 	virtual bool isFixGammaShape() const {
207 		return true;
208 	}
209 
210 	/**
211         set whether to fix gamma shape
212     */
setFixGammaShape(bool fixGammaShape)213 	virtual void setFixGammaShape(bool fixGammaShape) {}
214 
215 	/**
216 		@return >0 if this is a Gamma model (default: 0)
217 	*/
isGammaRate()218     virtual int isGammaRate() { return 0; }
219 
220     /**
221         fix parameters, so that no optimization done
222         @param mode some input mode
223     */
getFixParams()224     virtual int getFixParams() { return 0; }
225 
226     /**
227         fix parameters, so that no optimization done
228         @param mode some input mode
229     */
setFixParams(int mode)230     virtual void setFixParams(int mode) {}
231 
232     /**
233      *  check whether +I+G is used
234      */
isGammai()235     virtual bool isGammai() const {
236         return false;
237     }
238 
239 	/**
240 		the target function which needs to be optimized
241 		@param x the input vector x
242 		@return the function value at x
243 	*/
244 	virtual double targetFunk(double x[]);
245 
246 	/**
247 	 * setup the bounds for joint optimization with BFGS
248 	 */
setBounds(double * lower_bound,double * upper_bound,bool * bound_check)249 	virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check) {}
250 
251     /**
252         set number of optimization steps
253         @param opt_steps number of optimization steps
254     */
setOptimizeSteps(int optimize_steps)255     virtual void setOptimizeSteps(int optimize_steps) { }
256 
257 	/**
258 		optimize parameters. Default does nothing
259 		@return the best likelihood
260 	*/
optimizeParameters(double gradient_epsilon)261 	virtual double optimizeParameters(double gradient_epsilon) { return 0.0; }
262 
263 	/**
264 		classify rates into categories, this is meant for the discrete MH model.
265 		The default just return tree_lh
266 		@param tree_lh the current tree log-likelihood
267 	*/
classifyRates(double tree_lh)268 	virtual double classifyRates(double tree_lh) { return tree_lh; }
269 
270 	/**
271 	 * used to normal branch lengths if mean rate is not equal to 1 (e.g. FreeRate model)
272 	 * @return mean rate, default = 1
273 	 */
meanRates()274 	virtual double meanRates() { return 1.0; }
275 
276 	/**
277 	 * rescale rates s.t. mean rate is equal to 1, useful for FreeRate model
278 	 * @return rescaling factor
279 	 */
rescaleRates()280 	virtual double rescaleRates() { return 1.0; }
281 
282 	/**
283 		write information
284 		@param out output stream
285 	*/
writeInfo(ostream & out)286 	virtual void writeInfo(ostream &out) {}
287 
288 	/**
289 		write parameters, used with modeltest
290 		@param out output stream
291 	*/
writeParameters(ostream & out)292 	virtual void writeParameters(ostream &out) {}
293 
294 	/**
295 		Compute site-specific rates. Override this for Gamma model
296 		@param pattern_rates (OUT) pattern rates. Resizing if necesary
297         @return total number of categories
298 	*/
computePatternRates(DoubleVector & pattern_rates,IntVector & pattern_cat)299 	virtual int computePatternRates(DoubleVector &pattern_rates, IntVector &pattern_cat) { return 1; }
300 
301 	/**
302 		name of the rate heterogeneity type
303 	*/
304 	string name;
305 
306 
307     /**
308      *  Specify whether the initial starting value of the gamma shape and p_inv
309      *  has already been tested.
310      */
311     bool testParamDone;
312 
313 	/**
314 		full name of the rate heterogeneity type
315 	*/
316 	string full_name;
317 
318 	/**
319 		phylogenetic tree associated
320 	*/
321 	PhyloTree *phylo_tree;
322 
323 protected:
324 
325 	/**
326 		this function is served for the multi-dimension optimization. It should pack the model parameters
327 		into a vector that is index from 1 (NOTE: not from 0)
328 		@param variables (OUT) vector of variables, indexed from 1
329 	*/
setVariables(double * variables)330 	virtual void setVariables(double *variables) {}
331 
332 	/**
333 		this function is served for the multi-dimension optimization. It should assign the model parameters
334 		from a vector of variables that is index from 1 (NOTE: not from 0)
335 		@param variables vector of variables, indexed from 1
336 		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
337 	*/
getVariables(double * variables)338 	virtual bool getVariables(double *variables) { return false; }
339 
340 
341 };
342 #endif
343