1 /*
2  * modelmixture.h
3  *
4  *  Created on: Nov 29, 2014
5  *      Author: minh
6  */
7 
8 #ifndef MODELMIXTURE_H_
9 #define MODELMIXTURE_H_
10 
11 #include "tree/phylotree.h"
12 #include "modelsubst.h"
13 #include "modelmarkov.h"
14 #include "nclextra/modelsblock.h"
15 
16 
17 const char OPEN_BRACKET = '{';
18 const char CLOSE_BRACKET = '}';
19 
20 extern const char* builtin_mixmodels_definition;
21 
22 /**
23  * create a substitution model
24  * @param model_str model nme
25  * @param freq_type state frequency type
26  * @param freq_params frequency parameters
27  * @param tree associated phylo tree
28  * @param count_rates TRUE to assign rates counted from alignment, FALSE to not initialize rates
29  * @return substitution model created
30  */
31 ModelSubst *createModel(string model_str, ModelsBlock *models_block, StateFreqType freq_type, string freq_params,
32                         PhyloTree *tree);
33 
34 
35 /**
36  * mixture model
37  */
38 class ModelMixture: virtual public ModelMarkov, public vector<ModelMarkov*> {
39 public:
40 
41 	/**
42 		constructor
43 		@param model_name model name, e.g., JC, HKY.
44 		@param freq state frequency type
45 		@param tree associated phylogenetic tree
46 	*/
47     ModelMixture(string orig_model_name, string model_name, string model_list, ModelsBlock *models_block,
48     		StateFreqType freq, string freq_params, PhyloTree *tree, bool optimize_weights);
49 
50     void initMixture(string orig_model_name, string model_name, string model_list, ModelsBlock *models_block,
51     		StateFreqType freq, string freq_params, PhyloTree *tree, bool optimize_weights);
52 
53     void initMem();
54 
55     /**
56 		constructor
57 		@param tree associated tree for the model
58 	*/
59     ModelMixture(PhyloTree *tree);
60 
61 
62     virtual ~ModelMixture();
63 
64     /**
65         set checkpoint object
66         @param checkpoint
67     */
68     virtual void setCheckpoint(Checkpoint *checkpoint);
69 
70     /**
71         start structure for checkpointing
72     */
73     virtual void startCheckpoint();
74 
75     /**
76         save object into the checkpoint
77     */
78     virtual void saveCheckpoint();
79 
80     /**
81         restore object from the checkpoint
82     */
83     virtual void restoreCheckpoint();
84 
85 
86 	/**
87 	 * @return TRUE if this is a mixture model, FALSE otherwise
88 	 */
isMixture()89 	virtual bool isMixture() { return true; }
90 
91 
92 	/**
93 	 * @return the number of mixture model components
94 	 */
getNMixtures()95 	virtual int getNMixtures() {return size(); }
96 
97  	/**
98 	 * @param cat mixture class
99 	 * @return weight of a mixture model component
100 	 */
getMixtureWeight(int cat)101 	virtual double getMixtureWeight(int cat) { return prop[cat]; }
102 
103 	/**
104 	 * @param cat mixture class
105 	 * @return weight of a mixture model component
106 	 */
setMixtureWeight(int cat,double weight)107 	virtual void setMixtureWeight(int cat, double weight) { prop[cat] = weight; }
108 
109 	/**
110 	 * @param cat mixture class
111 	 * @return weight of a mixture model component
112 	 */
setFixMixtureWeight(bool fix_prop)113 	virtual void setFixMixtureWeight(bool fix_prop) { this->fix_prop = fix_prop; }
114 
115 	/**
116 	 * @param cat mixture class ID
117 	 * @return corresponding mixture model component
118 	 */
getMixtureClass(int cat)119     virtual ModelSubst* getMixtureClass(int cat) { return at(cat); }
120 
121 	/**
122 	 * @param cat mixture class ID
123 	 * @param m mixture model class to set
124 	 */
setMixtureClass(int cat,ModelSubst * m)125     virtual void setMixtureClass(int cat, ModelSubst* m) { at(cat) = (ModelMarkov*)m; }
126 
127 	/**
128 		compute the state frequency vector
129         @param mixture (optional) class for mixture model.
130             -1 to get weighted sum of class state frequency vector
131 		@param state_freq (OUT) state frequency vector. Assume state_freq has size of num_states
132 	*/
133 	virtual void getStateFrequency(double *state_freq, int mixture = 0);
134 
135 	/**
136 		compute the transition probability matrix. One should override this function when defining new model.
137 		The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc)
138 		@param time time between two events
139         @param mixture (optional) class for mixture model
140 		@param trans_matrix (OUT) the transition matrix between all pairs of states.
141 			Assume trans_matrix has size of num_states * num_states.
142 	*/
143 	virtual void computeTransMatrix(double time, double *trans_matrix, int mixture = 0);
144 
145 
146 	/**
147 		compute the transition probability matrix.and the derivative 1 and 2
148 		@param time time between two events
149         @param mixture (optional) class for mixture model
150 		@param trans_matrix (OUT) the transition matrix between all pairs of states.
151 			Assume trans_matrix has size of num_states * num_states.
152 		@param trans_derv1 (OUT) the 1st derivative matrix between all pairs of states.
153 		@param trans_derv2 (OUT) the 2nd derivative matrix between all pairs of states.
154 	*/
155 	virtual void computeTransDerv(double time, double *trans_matrix,
156 		double *trans_derv1, double *trans_derv2, int mixture = 0);
157 
158 	/**
159 		@return the number of dimensions
160 	*/
161 	virtual int getNDim();
162 
163 	/**
164 		@return the number of dimensions corresponding to state frequencies
165 	*/
166 	virtual int getNDimFreq();
167 
168 	/**
169 		the target function which needs to be optimized
170 		@param x the input vector x
171 		@return the function value at x
172 	*/
173 	virtual double targetFunk(double x[]);
174 
175     /**
176         optimize mixture weights using EM algorithm
177         @return log-likelihood of optimized weights
178     */
179     double optimizeWeights();
180 
181     /**
182         optimize rate parameters using EM algorithm
183         @param gradient_epsilon
184         @return log-likelihood of optimized parameters
185     */
186     double optimizeWithEM(double gradient_epsilon);
187 
188 
189     /**
190         set number of optimization steps
191         @param opt_steps number of optimization steps
192     */
setOptimizeSteps(int optimize_steps)193     virtual void setOptimizeSteps(int optimize_steps) { this->optimize_steps = optimize_steps; }
194 
195     /** @return true if model is fused with site_rate */
196     bool isFused();
197 
198 	/**
199 		optimize model parameters
200 		@return the best likelihood
201 	*/
202 	virtual double optimizeParameters(double gradient_epsilon);
203 
204 	/**
205 	 * @return TRUE if parameters are at the boundary that may cause numerical unstability
206 	 */
207 	virtual bool isUnstableParameters();
208 
209 	/**
210 		decompose the rate matrix into eigenvalues and eigenvectors
211 	*/
212 	virtual void decomposeRateMatrix();
213 
214 	/**
215 	 * setup the bounds for joint optimization with BFGS
216 	 */
217 	virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check);
218 
219 	/**
220 		write information
221 		@param out output stream
222 	*/
223 	virtual void writeInfo(ostream &out);
224 
225 	/**
226 		write parameters, used with modeltest
227 		@param out output stream
228 	*/
229 	virtual void writeParameters(ostream &out);
230 
231 	/**
232 	 * @return model name
233 	 */
234 	virtual string getName();
235 
236 	/**
237 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
238 	 */
239 	virtual string getNameParams();
240 
241     /**
242      * compute the memory size for the model, can be large for site-specific models
243      * @return memory size required in bytes
244      */
getMemoryRequired()245     virtual uint64_t getMemoryRequired() {
246     	uint64_t mem = ModelMarkov::getMemoryRequired();
247     	for (iterator it = begin(); it != end(); it++)
248     		mem += (*it)->getMemoryRequired();
249     	return mem;
250     }
251 
252 	/**
253 		rates of mixture components
254 	*/
255 //	double *mix_rates;
256 
257 	/**
258 	 * weight of each sub-model (must sum to 1)
259 	 */
260 	double *prop;
261 
262 	/**
263 	 * TRUE to fix model weights
264 	 */
265 	bool fix_prop;
266 
267 protected:
268 
269 	bool optimizing_submodels;
270 
271     /** number of optimization steps, default: ncategory*2 */
272     int optimize_steps;
273 
274 	/**
275 		this function is served for the multi-dimension optimization. It should pack the model parameters
276 		into a vector that is index from 1 (NOTE: not from 0)
277 		@param variables (OUT) vector of variables, indexed from 1
278 	*/
279 	virtual void setVariables(double *variables);
280 
281 	/**
282 		this function is served for the multi-dimension optimization. It should assign the model parameters
283 		from a vector of variables that is index from 1 (NOTE: not from 0)
284 		@param variables vector of variables, indexed from 1
285 		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
286 	*/
287 	virtual bool getVariables(double *variables);
288 
289 };
290 
291 #endif /* MODELMIXTURE_H_ */
292