1 /*************************************************************************** 2 * Copyright (C) 2009 by BUI Quang Minh * 3 * minh.bui@univie.ac.at * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 2 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, write to the * 17 * Free Software Foundation, Inc., * 18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 19 ***************************************************************************/ 20 #ifndef RATEHETEROGENEITY_H 21 #define RATEHETEROGENEITY_H 22 23 24 #include "utils/optimization.h" 25 #include <string> 26 #include "utils/tools.h" 27 #include "utils/checkpoint.h" 28 29 using namespace std; 30 31 class PhyloTree; 32 33 const double MIN_SITE_RATE = 1e-6; 34 const double MAX_SITE_RATE = 100.0; 35 const double TOL_SITE_RATE = 1e-6; 36 37 38 /** 39 class for among-site rate heterogeneity, the default is homogeneous (equal) rate model 40 41 @author BUI Quang Minh <minh.bui@univie.ac.at> 42 */ 43 44 class RateHeterogeneity : public Optimization, public CheckpointFactory 45 { 46 friend class ModelFactory; 47 friend class ModelPoMoMixture; 48 49 public: 50 /** 51 constructor 52 */ 53 RateHeterogeneity(); 54 55 /** 56 destructor 57 */ 58 virtual ~RateHeterogeneity(); 59 60 /** 61 start structure for checkpointing 62 */ 63 virtual void startCheckpoint(); 64 65 /** 66 save object into the checkpoint 67 */ 68 virtual void saveCheckpoint(); 69 70 /** 71 restore object from the checkpoint 72 */ 73 virtual void restoreCheckpoint(); 74 75 /** 76 set phylogenetic tree 77 @param tree associated phyogenetic tree 78 */ 79 void setTree(PhyloTree *tree); 80 81 /** 82 set phylogenetic tree 83 @param tree associated phyogenetic tree 84 */ getTree()85 PhyloTree *getTree() { return phylo_tree; } 86 87 /** 88 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} 89 */ getNameParams()90 virtual string getNameParams() { return name; } 91 92 /** 93 @return false by default. True if rates are site-specific (Meyer and von Haeseler (2003) model) 94 */ isSiteSpecificRate()95 virtual bool isSiteSpecificRate() { return false; } 96 97 /** 98 @return TRUE if this is a heterotachy model, default: FALSE 99 */ isHeterotachy()100 virtual bool isHeterotachy() { return false; } 101 102 /** 103 get the number of rate categories. The default returns 1 category since it is homogeneous model 104 @return the number of rate categories 105 */ getNRate()106 virtual int getNRate() { return 1; } 107 108 /** 109 set the number of rate categories. The default raises assertion since it is homogeneous model 110 */ setNCategory(int ncat)111 virtual void setNCategory(int ncat) { ASSERT(0); } 112 113 /** 114 initialize from checkpoint rates and prop from rate model with #category-1 115 */ initFromCatMinusOne()116 virtual void initFromCatMinusOne() {} 117 118 /** 119 get the number of rate categories for site-specific category model 120 The default returns 1 category since it is homogeneous model 121 @return the number of rate categories 122 */ getNDiscreteRate()123 virtual int getNDiscreteRate() { return 1; } 124 125 /** 126 get the rate of a specified category. Default returns 1.0 since it is homogeneous model 127 @param category category ID from 0 to #category-1 128 @return the rate of the specified category 129 */ getRate(int category)130 virtual double getRate(int category) { return 1.0; } 131 132 /** 133 set the rate of a specified category. 134 @param category category ID from 0 to #category-1 135 @param value the rate of the specified category 136 */ setRate(int category,double value)137 virtual void setRate(int category, double value) { } 138 139 /** 140 get the proportion of a specified category. Default returns 1.0 since it is homogeneous model 141 @param category category ID from 0 to #category-1 142 @return the proportion of the specified category 143 */ getProp(int category)144 virtual double getProp(int category) { return 1.0; } 145 146 /** 147 set the proportion of a specified category. 148 @param category category ID from 0 to #category-1 149 @return the proportion of the specified category 150 */ setProp(int category,double value)151 virtual void setProp(int category, double value) {} 152 153 /** 154 get the rate of a specified site-pattern. Default returns 1.0 since it is homogeneous model 155 @param ptn pattern ID 156 @return the rate of the specified site-pattern 157 */ getPtnRate(int ptn)158 virtual double getPtnRate(int ptn) { return 1.0; } 159 160 /** 161 get rate category of a specified site-pattern. Default returns -1 since it is homogeneous model 162 @param ptn pattern ID 163 @return the rate category of the specified site-pattern 164 */ getPtnCat(int ptn)165 virtual int getPtnCat(int ptn) { return -1; } 166 167 /** 168 get the proportion of invariable sites. Default returns 0.0 since it is homogeneous model 169 @return the proportion of invariable sites 170 */ getPInvar()171 virtual double getPInvar() { return 0.0; } 172 173 /** 174 set the proportion of invariable sites. Default: do nothing 175 @param pinv the proportion of invariable sites 176 */ setPInvar(double pinv)177 virtual void setPInvar(double pinv) { } 178 isFixPInvar()179 virtual bool isFixPInvar() const { 180 return true; 181 } 182 183 /** 184 set whether to fix p_invar 185 */ setFixPInvar(bool fixPInvar)186 virtual void setFixPInvar(bool fixPInvar) {} 187 188 /** 189 Set whether or not to optimize p_invar 190 @param opt TRUE to optimize p_invar, FALSE otherwise 191 */ 192 // virtual void setOptimizePInvar(bool opt) { } 193 194 /** 195 get the Gamma shape. Default returns 0.0 since it is homogeneous model 196 @return Gamma shape 197 */ getGammaShape()198 virtual double getGammaShape() { return 0.0; } 199 200 /** 201 set the Gamma shape. Default: nothing 202 @param gs Gamma shape 203 */ setGammaShape(double gs)204 virtual void setGammaShape(double gs) {} 205 isFixGammaShape()206 virtual bool isFixGammaShape() const { 207 return true; 208 } 209 210 /** 211 set whether to fix gamma shape 212 */ setFixGammaShape(bool fixGammaShape)213 virtual void setFixGammaShape(bool fixGammaShape) {} 214 215 /** 216 @return >0 if this is a Gamma model (default: 0) 217 */ isGammaRate()218 virtual int isGammaRate() { return 0; } 219 220 /** 221 fix parameters, so that no optimization done 222 @param mode some input mode 223 */ getFixParams()224 virtual int getFixParams() { return 0; } 225 226 /** 227 fix parameters, so that no optimization done 228 @param mode some input mode 229 */ setFixParams(int mode)230 virtual void setFixParams(int mode) {} 231 232 /** 233 * check whether +I+G is used 234 */ isGammai()235 virtual bool isGammai() const { 236 return false; 237 } 238 239 /** 240 the target function which needs to be optimized 241 @param x the input vector x 242 @return the function value at x 243 */ 244 virtual double targetFunk(double x[]); 245 246 /** 247 * setup the bounds for joint optimization with BFGS 248 */ setBounds(double * lower_bound,double * upper_bound,bool * bound_check)249 virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check) {} 250 251 /** 252 set number of optimization steps 253 @param opt_steps number of optimization steps 254 */ setOptimizeSteps(int optimize_steps)255 virtual void setOptimizeSteps(int optimize_steps) { } 256 257 /** 258 optimize parameters. Default does nothing 259 @return the best likelihood 260 */ optimizeParameters(double gradient_epsilon)261 virtual double optimizeParameters(double gradient_epsilon) { return 0.0; } 262 263 /** 264 classify rates into categories, this is meant for the discrete MH model. 265 The default just return tree_lh 266 @param tree_lh the current tree log-likelihood 267 */ classifyRates(double tree_lh)268 virtual double classifyRates(double tree_lh) { return tree_lh; } 269 270 /** 271 * used to normal branch lengths if mean rate is not equal to 1 (e.g. FreeRate model) 272 * @return mean rate, default = 1 273 */ meanRates()274 virtual double meanRates() { return 1.0; } 275 276 /** 277 * rescale rates s.t. mean rate is equal to 1, useful for FreeRate model 278 * @return rescaling factor 279 */ rescaleRates()280 virtual double rescaleRates() { return 1.0; } 281 282 /** 283 write information 284 @param out output stream 285 */ writeInfo(ostream & out)286 virtual void writeInfo(ostream &out) {} 287 288 /** 289 write parameters, used with modeltest 290 @param out output stream 291 */ writeParameters(ostream & out)292 virtual void writeParameters(ostream &out) {} 293 294 /** 295 Compute site-specific rates. Override this for Gamma model 296 @param pattern_rates (OUT) pattern rates. Resizing if necesary 297 @return total number of categories 298 */ computePatternRates(DoubleVector & pattern_rates,IntVector & pattern_cat)299 virtual int computePatternRates(DoubleVector &pattern_rates, IntVector &pattern_cat) { return 1; } 300 301 /** 302 name of the rate heterogeneity type 303 */ 304 string name; 305 306 307 /** 308 * Specify whether the initial starting value of the gamma shape and p_inv 309 * has already been tested. 310 */ 311 bool testParamDone; 312 313 /** 314 full name of the rate heterogeneity type 315 */ 316 string full_name; 317 318 /** 319 phylogenetic tree associated 320 */ 321 PhyloTree *phylo_tree; 322 323 protected: 324 325 /** 326 this function is served for the multi-dimension optimization. It should pack the model parameters 327 into a vector that is index from 1 (NOTE: not from 0) 328 @param variables (OUT) vector of variables, indexed from 1 329 */ setVariables(double * variables)330 virtual void setVariables(double *variables) {} 331 332 /** 333 this function is served for the multi-dimension optimization. It should assign the model parameters 334 from a vector of variables that is index from 1 (NOTE: not from 0) 335 @param variables vector of variables, indexed from 1 336 @return TRUE if parameters are changed, FALSE otherwise (2015-10-20) 337 */ getVariables(double * variables)338 virtual bool getVariables(double *variables) { return false; } 339 340 341 }; 342 #endif 343