1 /* 2 * modelmixture.h 3 * 4 * Created on: Nov 29, 2014 5 * Author: minh 6 */ 7 8 #ifndef MODELMIXTURE_H_ 9 #define MODELMIXTURE_H_ 10 11 #include "tree/phylotree.h" 12 #include "modelsubst.h" 13 #include "modelmarkov.h" 14 #include "nclextra/modelsblock.h" 15 16 17 const char OPEN_BRACKET = '{'; 18 const char CLOSE_BRACKET = '}'; 19 20 extern const char* builtin_mixmodels_definition; 21 22 /** 23 * create a substitution model 24 * @param model_str model nme 25 * @param freq_type state frequency type 26 * @param freq_params frequency parameters 27 * @param tree associated phylo tree 28 * @param count_rates TRUE to assign rates counted from alignment, FALSE to not initialize rates 29 * @return substitution model created 30 */ 31 ModelSubst *createModel(string model_str, ModelsBlock *models_block, StateFreqType freq_type, string freq_params, 32 PhyloTree *tree); 33 34 35 /** 36 * mixture model 37 */ 38 class ModelMixture: virtual public ModelMarkov, public vector<ModelMarkov*> { 39 public: 40 41 /** 42 constructor 43 @param model_name model name, e.g., JC, HKY. 44 @param freq state frequency type 45 @param tree associated phylogenetic tree 46 */ 47 ModelMixture(string orig_model_name, string model_name, string model_list, ModelsBlock *models_block, 48 StateFreqType freq, string freq_params, PhyloTree *tree, bool optimize_weights); 49 50 void initMixture(string orig_model_name, string model_name, string model_list, ModelsBlock *models_block, 51 StateFreqType freq, string freq_params, PhyloTree *tree, bool optimize_weights); 52 53 void initMem(); 54 55 /** 56 constructor 57 @param tree associated tree for the model 58 */ 59 ModelMixture(PhyloTree *tree); 60 61 62 virtual ~ModelMixture(); 63 64 /** 65 set checkpoint object 66 @param checkpoint 67 */ 68 virtual void setCheckpoint(Checkpoint *checkpoint); 69 70 /** 71 start structure for checkpointing 72 */ 73 virtual void startCheckpoint(); 74 75 /** 76 save object into the checkpoint 77 */ 78 virtual void saveCheckpoint(); 79 80 /** 81 restore object from the checkpoint 82 */ 83 virtual void restoreCheckpoint(); 84 85 86 /** 87 * @return TRUE if this is a mixture model, FALSE otherwise 88 */ isMixture()89 virtual bool isMixture() { return true; } 90 91 92 /** 93 * @return the number of mixture model components 94 */ getNMixtures()95 virtual int getNMixtures() {return size(); } 96 97 /** 98 * @param cat mixture class 99 * @return weight of a mixture model component 100 */ getMixtureWeight(int cat)101 virtual double getMixtureWeight(int cat) { return prop[cat]; } 102 103 /** 104 * @param cat mixture class 105 * @return weight of a mixture model component 106 */ setMixtureWeight(int cat,double weight)107 virtual void setMixtureWeight(int cat, double weight) { prop[cat] = weight; } 108 109 /** 110 * @param cat mixture class 111 * @return weight of a mixture model component 112 */ setFixMixtureWeight(bool fix_prop)113 virtual void setFixMixtureWeight(bool fix_prop) { this->fix_prop = fix_prop; } 114 115 /** 116 * @param cat mixture class ID 117 * @return corresponding mixture model component 118 */ getMixtureClass(int cat)119 virtual ModelSubst* getMixtureClass(int cat) { return at(cat); } 120 121 /** 122 * @param cat mixture class ID 123 * @param m mixture model class to set 124 */ setMixtureClass(int cat,ModelSubst * m)125 virtual void setMixtureClass(int cat, ModelSubst* m) { at(cat) = (ModelMarkov*)m; } 126 127 /** 128 compute the state frequency vector 129 @param mixture (optional) class for mixture model. 130 -1 to get weighted sum of class state frequency vector 131 @param state_freq (OUT) state frequency vector. Assume state_freq has size of num_states 132 */ 133 virtual void getStateFrequency(double *state_freq, int mixture = 0); 134 135 /** 136 compute the transition probability matrix. One should override this function when defining new model. 137 The default is the Juke-Cantor model, valid for all kind of data (DNA, AA, Codon, etc) 138 @param time time between two events 139 @param mixture (optional) class for mixture model 140 @param trans_matrix (OUT) the transition matrix between all pairs of states. 141 Assume trans_matrix has size of num_states * num_states. 142 */ 143 virtual void computeTransMatrix(double time, double *trans_matrix, int mixture = 0); 144 145 146 /** 147 compute the transition probability matrix.and the derivative 1 and 2 148 @param time time between two events 149 @param mixture (optional) class for mixture model 150 @param trans_matrix (OUT) the transition matrix between all pairs of states. 151 Assume trans_matrix has size of num_states * num_states. 152 @param trans_derv1 (OUT) the 1st derivative matrix between all pairs of states. 153 @param trans_derv2 (OUT) the 2nd derivative matrix between all pairs of states. 154 */ 155 virtual void computeTransDerv(double time, double *trans_matrix, 156 double *trans_derv1, double *trans_derv2, int mixture = 0); 157 158 /** 159 @return the number of dimensions 160 */ 161 virtual int getNDim(); 162 163 /** 164 @return the number of dimensions corresponding to state frequencies 165 */ 166 virtual int getNDimFreq(); 167 168 /** 169 the target function which needs to be optimized 170 @param x the input vector x 171 @return the function value at x 172 */ 173 virtual double targetFunk(double x[]); 174 175 /** 176 optimize mixture weights using EM algorithm 177 @return log-likelihood of optimized weights 178 */ 179 double optimizeWeights(); 180 181 /** 182 optimize rate parameters using EM algorithm 183 @param gradient_epsilon 184 @return log-likelihood of optimized parameters 185 */ 186 double optimizeWithEM(double gradient_epsilon); 187 188 189 /** 190 set number of optimization steps 191 @param opt_steps number of optimization steps 192 */ setOptimizeSteps(int optimize_steps)193 virtual void setOptimizeSteps(int optimize_steps) { this->optimize_steps = optimize_steps; } 194 195 /** @return true if model is fused with site_rate */ 196 bool isFused(); 197 198 /** 199 optimize model parameters 200 @return the best likelihood 201 */ 202 virtual double optimizeParameters(double gradient_epsilon); 203 204 /** 205 * @return TRUE if parameters are at the boundary that may cause numerical unstability 206 */ 207 virtual bool isUnstableParameters(); 208 209 /** 210 decompose the rate matrix into eigenvalues and eigenvectors 211 */ 212 virtual void decomposeRateMatrix(); 213 214 /** 215 * setup the bounds for joint optimization with BFGS 216 */ 217 virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check); 218 219 /** 220 write information 221 @param out output stream 222 */ 223 virtual void writeInfo(ostream &out); 224 225 /** 226 write parameters, used with modeltest 227 @param out output stream 228 */ 229 virtual void writeParameters(ostream &out); 230 231 /** 232 * @return model name 233 */ 234 virtual string getName(); 235 236 /** 237 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} 238 */ 239 virtual string getNameParams(); 240 241 /** 242 * compute the memory size for the model, can be large for site-specific models 243 * @return memory size required in bytes 244 */ getMemoryRequired()245 virtual uint64_t getMemoryRequired() { 246 uint64_t mem = ModelMarkov::getMemoryRequired(); 247 for (iterator it = begin(); it != end(); it++) 248 mem += (*it)->getMemoryRequired(); 249 return mem; 250 } 251 252 /** 253 rates of mixture components 254 */ 255 // double *mix_rates; 256 257 /** 258 * weight of each sub-model (must sum to 1) 259 */ 260 double *prop; 261 262 /** 263 * TRUE to fix model weights 264 */ 265 bool fix_prop; 266 267 protected: 268 269 bool optimizing_submodels; 270 271 /** number of optimization steps, default: ncategory*2 */ 272 int optimize_steps; 273 274 /** 275 this function is served for the multi-dimension optimization. It should pack the model parameters 276 into a vector that is index from 1 (NOTE: not from 0) 277 @param variables (OUT) vector of variables, indexed from 1 278 */ 279 virtual void setVariables(double *variables); 280 281 /** 282 this function is served for the multi-dimension optimization. It should assign the model parameters 283 from a vector of variables that is index from 1 (NOTE: not from 0) 284 @param variables vector of variables, indexed from 1 285 @return TRUE if parameters are changed, FALSE otherwise (2015-10-20) 286 */ 287 virtual bool getVariables(double *variables); 288 289 }; 290 291 #endif /* MODELMIXTURE_H_ */ 292