1 //
2 // C++ Interface: substmodel
3 //
4 // Description:
5 //
6 //
7 // Author: BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui@univie.ac.at>, (C) 2008
8 //
9 // Copyright: See COPYING file that comes with this distribution
10 //
11 //
12 #ifndef SUBSTMODEL_H
13 #define SUBSTMODEL_H
14 
15 #include <string>
16 #include "utils/tools.h"
17 #include "utils/optimization.h"
18 #include "utils/checkpoint.h"
19 
20 using namespace std;
21 
22 /**
23 Substitution model abstract class
24 
25 	@author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui@univie.ac.at>
26 */
27 class ModelSubst: public Optimization, public CheckpointFactory
28 {
29 	friend class ModelFactory;
30     friend class PartitionModel;
31 
32 public:
33 	/**
34 		constructor
35 		@param nstates number of states, e.g. 4 for DNA, 20 for proteins.
36 	*/
37     ModelSubst(int nstates);
38 
39 
40 	/**
41 		@return the number of dimensions
42 	*/
getNDim()43 	virtual int getNDim() { return 0; }
44 
45 	/**
46 		@return the number of dimensions corresponding to state frequencies
47 	*/
getNDimFreq()48 	virtual int getNDimFreq() { return 0; }
49 
50 	/**
51 	 * @return model name
52 	 */
getName()53 	virtual string getName() { return name; }
54 
55 	/**
56 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
57 	 */
getNameParams()58 	virtual string getNameParams() { return name; }
59 
60 	/**
61 		@return TRUE if model is time-reversible, FALSE otherwise
62 	*/
isReversible()63 	virtual bool isReversible() { return true; };
64 
65     /**
66         fix parameters of the model
67         @param fix true to fix, false to not fix
68         @return the current state of fixing parameters
69      */
fixParameters(bool fix)70     virtual bool fixParameters(bool fix) {
71         bool current = fixed_parameters;
72         fixed_parameters = fix;
73         return current;
74     }
75 
76 	/**
77 	 * @return TRUE if this is a site-specific model, FALSE otherwise
78 	 */
isSiteSpecificModel()79 	virtual bool isSiteSpecificModel() { return false; }
80 
81 	/**
82 	 * @return TRUE if this is a mixture model, FALSE otherwise
83 	 */
isMixture()84 	virtual bool isMixture() { return false; }
85 
86     /**
87      * Confer to modelpomo.h.
88      *
89      * @return TRUE if PoMo is being used, FALSE otherise.
90      */
isPolymorphismAware()91     virtual bool isPolymorphismAware() { return false; }
92 
93 	/**
94 	 * @return the number of mixture model components
95 	 */
getNMixtures()96 	virtual int getNMixtures() { return 1; }
97 
98  	/**
99 	 * @param cat mixture class
100 	 * @return weight of a mixture model component
101 	 */
getMixtureWeight(int cat)102 	virtual double getMixtureWeight(int cat) { return 1.0; }
103 
104 	/**
105 	 * @param cat mixture class
106 	 * @return weight of a mixture model component
107 	 */
setMixtureWeight(int cat,double weight)108 	virtual void setMixtureWeight(int cat, double weight) {}
109 
110 	/**
111 	 * @param cat mixture class
112 	 * @return weight of a mixture model component
113 	 */
setFixMixtureWeight(bool fix_prop)114 	virtual void setFixMixtureWeight(bool fix_prop) {}
115 
116 	/**
117 	 * @param cat mixture class ID
118 	 * @return corresponding mixture model component
119 	 */
getMixtureClass(int cat)120     virtual ModelSubst* getMixtureClass(int cat) { return NULL; }
121 
122 	/**
123 	 * @param cat mixture class ID
124 	 * @param m mixture model class to set
125 	 */
setMixtureClass(int cat,ModelSubst * m)126     virtual void setMixtureClass(int cat, ModelSubst* m) { }
127 
128 	/**
129 		@return the number of rate entries, equal to the number of elements
130 			in the upper-diagonal of the rate matrix (since model is reversible)
131 	*/
getNumRateEntries()132 	virtual int getNumRateEntries() { return num_states*(num_states-1)/2; }
133 
134     /**
135      set num_params variable
136      */
setNParams(int num_params)137     virtual void setNParams(int num_params) {}
138 
139     /**
140      get num_params variable
141      */
getNParams()142     virtual int getNParams() {
143         return 0;
144     }
145 
146 	/**
147 	 * get the size of transition matrix, default is num_states*num_states.
148 	 * can be changed for e.g. site-specific model
149 	 */
getTransMatrixSize()150 	virtual int getTransMatrixSize() { return num_states * num_states; }
151 
152 	/**
153 		compute the transition probability matrix. One should override this function when defining new model.
154 		The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc)
155 		@param time time between two events
156         @param mixture (optional) class for mixture model
157 		@param trans_matrix (OUT) the transition matrix between all pairs of states.
158 			Assume trans_matrix has size of num_states * num_states.
159 	*/
160 	virtual void computeTransMatrix(double time, double *trans_matrix, int mixture = 0);
161 
162 	/**
163 		compute the transition probability between two states.
164 		One should override this function when defining new model.
165 		The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc)
166 		@param time time between two events
167 		@param state1 first state
168 		@param state2 second state
169 	*/
170 	virtual double computeTrans(double time, int state1, int state2);
171 
172 	/**
173 		compute the transition probability between two states at a specific model ID, useful for partition model
174 		One should override this function when defining new model.
175 		The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc)
176 		@param time time between two events
177 		@param model_id model ID
178 		@param state1 first state
179 		@param state2 second state
180 	*/
181 	virtual double computeTrans(double time, int model_id, int state1, int state2);
182 
183 	/**
184 		compute the transition probability and its 1st and 2nd derivatives between two states.
185 		One should override this function when defining new model.
186 		The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc)
187 		@param time time between two events
188 		@param state1 first state
189 		@param state2 second state
190 		@param derv1 (OUT) 1st derivative
191 		@param derv2 (OUT) 2nd derivative
192 	*/
193 	virtual double computeTrans(double time, int state1, int state2, double &derv1, double &derv2);
194 
195 	/**
196 		compute the transition probability and its 1st and 2nd derivatives between two states at a specific model ID
197 		One should override this function when defining new model.
198 		The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc)
199 		@param time time between two events
200 		@param model_id model ID
201 		@param state1 first state
202 		@param state2 second state
203 		@param derv1 (OUT) 1st derivative
204 		@param derv2 (OUT) 2nd derivative
205 	*/
206 	virtual double computeTrans(double time, int model_id, int state1, int state2, double &derv1, double &derv2);
207 
208 	/**
209 	 * @return pattern ID to model ID map, useful for e.g., partition model
210 	 * @param ptn pattern ID of the alignment
211 	 */
getPtnModelID(int ptn)212 	virtual int getPtnModelID(int ptn) { return 0; }
213 
214 
215 	/**
216 	 * Get the rate parameters like a,b,c,d,e,f for DNA model!!!
217 		Get the above-diagonal entries of the rate matrix, assuming that the last element is 1.
218 		ONE SHOULD OVERRIDE THIS FUNCTION WHEN DEFINING NEW MODEL!!!
219 		The default is equal rate of 1 (JC Model), valid for all kind of data.
220 		@param rate_mat (OUT) upper-triangle rate matrix. Assume rate_mat has size of num_states*(num_states-1)/2
221 	*/
222 
223 	virtual void getRateMatrix(double *rate_mat);
224 
225 	/**
226 		Get the rate matrix Q. One should override this function when defining new model.
227 		The default is equal rate of 1 (JC Model), valid for all kind of data.
228 		@param rate_mat (OUT) upper-triagle rate matrix. Assume rate_mat has size of num_states*(num_states-1)/2
229 	*/
230 	virtual void getQMatrix(double *q_mat);
231 
232 	/**
233 		compute the state frequency vector. One should override this function when defining new model.
234 		The default is equal state sequency, valid for all kind of data.
235         @param mixture (optional) class for mixture model
236 		@param[out] state_freq state frequency vector. Assume state_freq has size of num_states
237 	*/
238 	virtual void getStateFrequency(double *state_freq, int mixture = 0);
239 
240     /**
241      set the state frequency vector.
242      @param state_freq state frequency vector. Assume state_freq has size of num_states
243      */
244     virtual void setStateFrequency(double *state_freq);
245 
246 	/**
247 		get frequency type
248 		@return frequency type
249 	*/
getFreqType()250 	virtual StateFreqType getFreqType() { return FREQ_EQUAL; }
251 
252 
253 	/**
254 		allocate memory for a transition matrix. One should override this function when defining new model
255 		such as Gamma model. The default is to allocate a double vector of size num_states * num_states. This
256 		is equivalent to the memory needed by a square matrix.
257 		@return the pointer to the newly allocated transition matrix
258 	*/
259 	virtual double *newTransMatrix();
260 
261 
262 	/**
263 		compute the transition probability matrix.and the derivative 1 and 2
264 		@param time time between two events
265         @param mixture (optional) class for mixture model
266 		@param trans_matrix (OUT) the transition matrix between all pairs of states.
267 			Assume trans_matrix has size of num_states * num_states.
268 		@param trans_derv1 (OUT) the 1st derivative matrix between all pairs of states.
269 		@param trans_derv2 (OUT) the 2nd derivative matrix between all pairs of states.
270 	*/
271 	virtual void computeTransDerv(double time, double *trans_matrix,
272 		double *trans_derv1, double *trans_derv2, int mixture = 0);
273 
274 	/**
275 		decompose the rate matrix into eigenvalues and eigenvectors
276 	*/
decomposeRateMatrix()277 	virtual void decomposeRateMatrix() {}
278 
279 
280     /**
281         set number of optimization steps
282         @param opt_steps number of optimization steps
283     */
setOptimizeSteps(int optimize_steps)284     virtual void setOptimizeSteps(int optimize_steps) { }
285 
286 	/**
287 		optimize model parameters. One should override this function when defining new model.
288 		The default does nothing since it is a Juke-Cantor type model, hence no parameters involved.
289 		@param epsilon accuracy of the parameters during optimization
290 		@return the best likelihood
291 	*/
optimizeParameters(double gradient_epsilon)292 	virtual double optimizeParameters(double gradient_epsilon) { return 0.0; }
293 
294 	/**
295 	 * @return TRUE if parameters are at the boundary that may cause numerical unstability
296 	 */
isUnstableParameters()297 	virtual bool isUnstableParameters() { return false; }
298 
299 	/**
300 		write information
301 		@param out output stream
302 	*/
writeInfo(ostream & out)303 	virtual void writeInfo(ostream &out) {}
304 
305 	/**
306 		report model
307 		@param out output stream
308 	*/
report(ostream & out)309     virtual void report(ostream &out) {}
310 
getEigenvalues()311 	virtual double *getEigenvalues() const {
312 		return NULL;
313 	}
314 
getEigenvectors()315 	virtual double *getEigenvectors() const {
316 		return NULL;
317 	}
318 
getInverseEigenvectors()319 	virtual double *getInverseEigenvectors() const {
320 		return NULL;
321 	}
322 
323     /**
324      * compute the memory size for the model, can be large for site-specific models
325      * @return memory size required in bytes
326      */
getMemoryRequired()327     virtual uint64_t getMemoryRequired() {
328     	return num_states*sizeof(double);
329     }
330 
331     /**
332     * get the underlying mutation model, used with PoMo model
333     */
getMutationModel()334     virtual ModelSubst *getMutationModel() { return this; }
335 
336 	/*****************************************************
337 		Checkpointing facility
338 	*****************************************************/
339 
340     /**
341         start structure for checkpointing
342     */
343     virtual void startCheckpoint();
344 
345     /**
346         save object into the checkpoint
347     */
348     virtual void saveCheckpoint();
349 
350     /**
351         restore object from the checkpoint
352     */
353     virtual void restoreCheckpoint();
354 
355 	/**
356 		number of states
357 	*/
358 	int num_states;
359 
360 	/**
361 		name of the model
362 	*/
363 	string name;
364 
365 
366 	/**
367 		full name of the model
368 	*/
369 	string full_name;
370 
371     /** true to fix parameters, otherwise false */
372     bool fixed_parameters;
373 
374 	/**
375 	 state frequencies
376 	 */
377 	double *state_freq;
378 
379 
380 	/**
381 		state frequency type
382 	*/
383 	StateFreqType freq_type;
384 
385     /** state set for each sequence in the alignment */
386     //vector<vector<int> > seq_states;
387 
388 	/**
389 		destructor
390 	*/
391     virtual ~ModelSubst();
392 
393 protected:
394 
395 	/**
396 		this function is served for the multi-dimension optimization. It should pack the model parameters
397 		into a vector that is index from 1 (NOTE: not from 0)
398 		@param variables (OUT) vector of variables, indexed from 1
399 	*/
setVariables(double * variables)400 	virtual void setVariables(double *variables) {}
401 
402 	/**
403 		this function is served for the multi-dimension optimization. It should assign the model parameters
404 		from a vector of variables that is index from 1 (NOTE: not from 0)
405 		@param variables vector of variables, indexed from 1
406 		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
407 	*/
getVariables(double * variables)408 	virtual bool getVariables(double *variables) { return false; }
409 
410 
411 };
412 
413 #endif
414