1 // 2 // C++ Interface: substmodel 3 // 4 // Description: 5 // 6 // 7 // Author: BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui@univie.ac.at>, (C) 2008 8 // 9 // Copyright: See COPYING file that comes with this distribution 10 // 11 // 12 #ifndef SUBSTMODEL_H 13 #define SUBSTMODEL_H 14 15 #include <string> 16 #include "utils/tools.h" 17 #include "utils/optimization.h" 18 #include "utils/checkpoint.h" 19 20 using namespace std; 21 22 /** 23 Substitution model abstract class 24 25 @author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui@univie.ac.at> 26 */ 27 class ModelSubst: public Optimization, public CheckpointFactory 28 { 29 friend class ModelFactory; 30 friend class PartitionModel; 31 32 public: 33 /** 34 constructor 35 @param nstates number of states, e.g. 4 for DNA, 20 for proteins. 36 */ 37 ModelSubst(int nstates); 38 39 40 /** 41 @return the number of dimensions 42 */ getNDim()43 virtual int getNDim() { return 0; } 44 45 /** 46 @return the number of dimensions corresponding to state frequencies 47 */ getNDimFreq()48 virtual int getNDimFreq() { return 0; } 49 50 /** 51 * @return model name 52 */ getName()53 virtual string getName() { return name; } 54 55 /** 56 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} 57 */ getNameParams()58 virtual string getNameParams() { return name; } 59 60 /** 61 @return TRUE if model is time-reversible, FALSE otherwise 62 */ isReversible()63 virtual bool isReversible() { return true; }; 64 65 /** 66 fix parameters of the model 67 @param fix true to fix, false to not fix 68 @return the current state of fixing parameters 69 */ fixParameters(bool fix)70 virtual bool fixParameters(bool fix) { 71 bool current = fixed_parameters; 72 fixed_parameters = fix; 73 return current; 74 } 75 76 /** 77 * @return TRUE if this is a site-specific model, FALSE otherwise 78 */ isSiteSpecificModel()79 virtual bool isSiteSpecificModel() { return false; } 80 81 /** 82 * @return TRUE if this is a mixture model, FALSE otherwise 83 */ isMixture()84 virtual bool isMixture() { return false; } 85 86 /** 87 * Confer to modelpomo.h. 88 * 89 * @return TRUE if PoMo is being used, FALSE otherise. 90 */ isPolymorphismAware()91 virtual bool isPolymorphismAware() { return false; } 92 93 /** 94 * @return the number of mixture model components 95 */ getNMixtures()96 virtual int getNMixtures() { return 1; } 97 98 /** 99 * @param cat mixture class 100 * @return weight of a mixture model component 101 */ getMixtureWeight(int cat)102 virtual double getMixtureWeight(int cat) { return 1.0; } 103 104 /** 105 * @param cat mixture class 106 * @return weight of a mixture model component 107 */ setMixtureWeight(int cat,double weight)108 virtual void setMixtureWeight(int cat, double weight) {} 109 110 /** 111 * @param cat mixture class 112 * @return weight of a mixture model component 113 */ setFixMixtureWeight(bool fix_prop)114 virtual void setFixMixtureWeight(bool fix_prop) {} 115 116 /** 117 * @param cat mixture class ID 118 * @return corresponding mixture model component 119 */ getMixtureClass(int cat)120 virtual ModelSubst* getMixtureClass(int cat) { return NULL; } 121 122 /** 123 * @param cat mixture class ID 124 * @param m mixture model class to set 125 */ setMixtureClass(int cat,ModelSubst * m)126 virtual void setMixtureClass(int cat, ModelSubst* m) { } 127 128 /** 129 @return the number of rate entries, equal to the number of elements 130 in the upper-diagonal of the rate matrix (since model is reversible) 131 */ getNumRateEntries()132 virtual int getNumRateEntries() { return num_states*(num_states-1)/2; } 133 134 /** 135 set num_params variable 136 */ setNParams(int num_params)137 virtual void setNParams(int num_params) {} 138 139 /** 140 get num_params variable 141 */ getNParams()142 virtual int getNParams() { 143 return 0; 144 } 145 146 /** 147 * get the size of transition matrix, default is num_states*num_states. 148 * can be changed for e.g. site-specific model 149 */ getTransMatrixSize()150 virtual int getTransMatrixSize() { return num_states * num_states; } 151 152 /** 153 compute the transition probability matrix. One should override this function when defining new model. 154 The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc) 155 @param time time between two events 156 @param mixture (optional) class for mixture model 157 @param trans_matrix (OUT) the transition matrix between all pairs of states. 158 Assume trans_matrix has size of num_states * num_states. 159 */ 160 virtual void computeTransMatrix(double time, double *trans_matrix, int mixture = 0); 161 162 /** 163 compute the transition probability between two states. 164 One should override this function when defining new model. 165 The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc) 166 @param time time between two events 167 @param state1 first state 168 @param state2 second state 169 */ 170 virtual double computeTrans(double time, int state1, int state2); 171 172 /** 173 compute the transition probability between two states at a specific model ID, useful for partition model 174 One should override this function when defining new model. 175 The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc) 176 @param time time between two events 177 @param model_id model ID 178 @param state1 first state 179 @param state2 second state 180 */ 181 virtual double computeTrans(double time, int model_id, int state1, int state2); 182 183 /** 184 compute the transition probability and its 1st and 2nd derivatives between two states. 185 One should override this function when defining new model. 186 The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc) 187 @param time time between two events 188 @param state1 first state 189 @param state2 second state 190 @param derv1 (OUT) 1st derivative 191 @param derv2 (OUT) 2nd derivative 192 */ 193 virtual double computeTrans(double time, int state1, int state2, double &derv1, double &derv2); 194 195 /** 196 compute the transition probability and its 1st and 2nd derivatives between two states at a specific model ID 197 One should override this function when defining new model. 198 The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc) 199 @param time time between two events 200 @param model_id model ID 201 @param state1 first state 202 @param state2 second state 203 @param derv1 (OUT) 1st derivative 204 @param derv2 (OUT) 2nd derivative 205 */ 206 virtual double computeTrans(double time, int model_id, int state1, int state2, double &derv1, double &derv2); 207 208 /** 209 * @return pattern ID to model ID map, useful for e.g., partition model 210 * @param ptn pattern ID of the alignment 211 */ getPtnModelID(int ptn)212 virtual int getPtnModelID(int ptn) { return 0; } 213 214 215 /** 216 * Get the rate parameters like a,b,c,d,e,f for DNA model!!! 217 Get the above-diagonal entries of the rate matrix, assuming that the last element is 1. 218 ONE SHOULD OVERRIDE THIS FUNCTION WHEN DEFINING NEW MODEL!!! 219 The default is equal rate of 1 (JC Model), valid for all kind of data. 220 @param rate_mat (OUT) upper-triangle rate matrix. Assume rate_mat has size of num_states*(num_states-1)/2 221 */ 222 223 virtual void getRateMatrix(double *rate_mat); 224 225 /** 226 Get the rate matrix Q. One should override this function when defining new model. 227 The default is equal rate of 1 (JC Model), valid for all kind of data. 228 @param rate_mat (OUT) upper-triagle rate matrix. Assume rate_mat has size of num_states*(num_states-1)/2 229 */ 230 virtual void getQMatrix(double *q_mat); 231 232 /** 233 compute the state frequency vector. One should override this function when defining new model. 234 The default is equal state sequency, valid for all kind of data. 235 @param mixture (optional) class for mixture model 236 @param[out] state_freq state frequency vector. Assume state_freq has size of num_states 237 */ 238 virtual void getStateFrequency(double *state_freq, int mixture = 0); 239 240 /** 241 set the state frequency vector. 242 @param state_freq state frequency vector. Assume state_freq has size of num_states 243 */ 244 virtual void setStateFrequency(double *state_freq); 245 246 /** 247 get frequency type 248 @return frequency type 249 */ getFreqType()250 virtual StateFreqType getFreqType() { return FREQ_EQUAL; } 251 252 253 /** 254 allocate memory for a transition matrix. One should override this function when defining new model 255 such as Gamma model. The default is to allocate a double vector of size num_states * num_states. This 256 is equivalent to the memory needed by a square matrix. 257 @return the pointer to the newly allocated transition matrix 258 */ 259 virtual double *newTransMatrix(); 260 261 262 /** 263 compute the transition probability matrix.and the derivative 1 and 2 264 @param time time between two events 265 @param mixture (optional) class for mixture model 266 @param trans_matrix (OUT) the transition matrix between all pairs of states. 267 Assume trans_matrix has size of num_states * num_states. 268 @param trans_derv1 (OUT) the 1st derivative matrix between all pairs of states. 269 @param trans_derv2 (OUT) the 2nd derivative matrix between all pairs of states. 270 */ 271 virtual void computeTransDerv(double time, double *trans_matrix, 272 double *trans_derv1, double *trans_derv2, int mixture = 0); 273 274 /** 275 decompose the rate matrix into eigenvalues and eigenvectors 276 */ decomposeRateMatrix()277 virtual void decomposeRateMatrix() {} 278 279 280 /** 281 set number of optimization steps 282 @param opt_steps number of optimization steps 283 */ setOptimizeSteps(int optimize_steps)284 virtual void setOptimizeSteps(int optimize_steps) { } 285 286 /** 287 optimize model parameters. One should override this function when defining new model. 288 The default does nothing since it is a Juke-Cantor type model, hence no parameters involved. 289 @param epsilon accuracy of the parameters during optimization 290 @return the best likelihood 291 */ optimizeParameters(double gradient_epsilon)292 virtual double optimizeParameters(double gradient_epsilon) { return 0.0; } 293 294 /** 295 * @return TRUE if parameters are at the boundary that may cause numerical unstability 296 */ isUnstableParameters()297 virtual bool isUnstableParameters() { return false; } 298 299 /** 300 write information 301 @param out output stream 302 */ writeInfo(ostream & out)303 virtual void writeInfo(ostream &out) {} 304 305 /** 306 report model 307 @param out output stream 308 */ report(ostream & out)309 virtual void report(ostream &out) {} 310 getEigenvalues()311 virtual double *getEigenvalues() const { 312 return NULL; 313 } 314 getEigenvectors()315 virtual double *getEigenvectors() const { 316 return NULL; 317 } 318 getInverseEigenvectors()319 virtual double *getInverseEigenvectors() const { 320 return NULL; 321 } 322 323 /** 324 * compute the memory size for the model, can be large for site-specific models 325 * @return memory size required in bytes 326 */ getMemoryRequired()327 virtual uint64_t getMemoryRequired() { 328 return num_states*sizeof(double); 329 } 330 331 /** 332 * get the underlying mutation model, used with PoMo model 333 */ getMutationModel()334 virtual ModelSubst *getMutationModel() { return this; } 335 336 /***************************************************** 337 Checkpointing facility 338 *****************************************************/ 339 340 /** 341 start structure for checkpointing 342 */ 343 virtual void startCheckpoint(); 344 345 /** 346 save object into the checkpoint 347 */ 348 virtual void saveCheckpoint(); 349 350 /** 351 restore object from the checkpoint 352 */ 353 virtual void restoreCheckpoint(); 354 355 /** 356 number of states 357 */ 358 int num_states; 359 360 /** 361 name of the model 362 */ 363 string name; 364 365 366 /** 367 full name of the model 368 */ 369 string full_name; 370 371 /** true to fix parameters, otherwise false */ 372 bool fixed_parameters; 373 374 /** 375 state frequencies 376 */ 377 double *state_freq; 378 379 380 /** 381 state frequency type 382 */ 383 StateFreqType freq_type; 384 385 /** state set for each sequence in the alignment */ 386 //vector<vector<int> > seq_states; 387 388 /** 389 destructor 390 */ 391 virtual ~ModelSubst(); 392 393 protected: 394 395 /** 396 this function is served for the multi-dimension optimization. It should pack the model parameters 397 into a vector that is index from 1 (NOTE: not from 0) 398 @param variables (OUT) vector of variables, indexed from 1 399 */ setVariables(double * variables)400 virtual void setVariables(double *variables) {} 401 402 /** 403 this function is served for the multi-dimension optimization. It should assign the model parameters 404 from a vector of variables that is index from 1 (NOTE: not from 0) 405 @param variables vector of variables, indexed from 1 406 @return TRUE if parameters are changed, FALSE otherwise (2015-10-20) 407 */ getVariables(double * variables)408 virtual bool getVariables(double *variables) { return false; } 409 410 411 }; 412 413 #endif 414