1 /* 2 * modelcodon.h 3 * 4 * Created on: May 24, 2013 5 * Author: minh 6 */ 7 8 #ifndef MODELCODON_H_ 9 #define MODELCODON_H_ 10 11 #include "modelmarkov.h" 12 13 /** CF_TARGET_NT: frequency of target nucleotide is multiplied with the rate entry (Muse and Gaut 1994) 14 CF_TARGET_CODON: frequency of target codon is multiplied with the rate entry (Goldman Yang 1994) 15 */ 16 enum CodonFreqStyle {CF_TARGET_NT, CF_TARGET_CODON}; 17 18 enum CodonKappaStyle {CK_ONE_KAPPA, CK_ONE_KAPPA_TS, CK_ONE_KAPPA_TV, CK_TWO_KAPPA}; 19 20 const int CA_STOP_CODON = 1; // stop codon substitution 21 const int CA_MULTI_NT = 2; // codon substitution involves > 1 NT 22 const int CA_SYNONYMOUS = 4; // synonymous codon substitution 23 const int CA_NONSYNONYMOUS= 8; // synonymous codon substitution 24 const int CA_TRANSVERSION = 16; // codon substitution involves 1 NT transversion 25 const int CA_TRANSITION = 32; // codon substitution involves 1 NT transition 26 const int CA_TRANSVERSION_1NT = 64; // codon substitution involve the 1st NT which is also a transversion 27 const int CA_TRANSVERSION_2NT = 128; // codon substitution involve the 2nd NT which is also a transversion 28 const int CA_TRANSVERSION_3NT = 256; // codon substitution involve the 3rd NT which is also a transversion 29 const int CA_TRANSITION_1NT = 512; // codon substitution involve the 1st NT which is also a transversion 30 const int CA_TRANSITION_2NT = 1024; // codon substitution involve the 2nd NT which is also a transversion 31 const int CA_TRANSITION_3NT = 2048; // codon substitution involve the 3rd NT which is also a transversion 32 33 /** 34 * Codon substitution models 35 */ 36 class ModelCodon: public ModelMarkov { 37 public: 38 /** 39 constructor 40 @param model_name model name, e.g., GY,YN 41 @param freq state frequency type 42 @param tree associated phylogenetic tree 43 */ 44 ModelCodon(const char *model_name, string model_params, StateFreqType freq, string freq_params, 45 PhyloTree *tree); 46 47 /** 48 * destructor 49 */ 50 virtual ~ModelCodon(); 51 52 /** 53 start structure for checkpointing 54 */ 55 virtual void startCheckpoint(); 56 57 /** 58 save object into the checkpoint 59 */ 60 virtual void saveCheckpoint(); 61 62 /** 63 restore object from the checkpoint 64 */ 65 virtual void restoreCheckpoint(); 66 67 /** 68 @return the number of rate entries, equal to the number of non-diagonal elements of the rate matrix 69 since we store full matrix here 70 */ getNumRateEntries()71 virtual int getNumRateEntries() { return num_states*(num_states); } 72 73 /** 74 initialization, called automatically by the constructor, no need to call it 75 @param model_name model name, e.g., JC, HKY. 76 @param freq state frequency type 77 */ 78 virtual void init(const char *model_name, string model_params, StateFreqType freq, string freq_params); 79 80 StateFreqType initCodon(const char *model_name, StateFreqType freq, bool reset_params); 81 82 83 /** 84 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f} 85 */ getNameParams()86 virtual string getNameParams() { return name; } 87 88 /** main function to compute rate matrix */ 89 void computeCodonRateMatrix(); 90 91 /** 92 decompose the rate matrix into eigenvalues and eigenvectors 93 */ 94 virtual void decomposeRateMatrix(); 95 96 /** 97 * read codon model from a stream, modying rates and state_freq accordingly 98 * @param in input stream containing lower triangular matrix of rates, frequencies and list of codons 99 * @reset_params true to reset parameters, false otherwise 100 */ 101 void readCodonModel(istream &in, bool reset_params); 102 103 /** 104 * read codon model from a string, modying rates and state_freq accordingly 105 * @param str input string containing lower triangular matrix of rates, frequencies and list of codons 106 * @reset_params true to reset parameters, false otherwise 107 */ 108 void readCodonModel(string &str, bool reset_params); 109 110 /** 111 * read codon model from a file in PAML format 112 * @param filename input file containing lower triangular matrix of rates, frequencies and list of codons 113 * @reset_params true to reset parameters, false otherwise 114 */ 115 void readCodonModelFile(const char *filename, bool reset_params); 116 117 /** 118 write information 119 @param out output stream 120 */ 121 virtual void writeInfo(ostream &out); 122 123 /** compute rate_attr for all codoni->codoni substitution */ 124 void computeRateAttributes(); 125 126 /** combine rates with target nucleotide frequency (ntfreq) for MG-style model */ 127 void combineRateNTFreq(); 128 129 /** compute the corrected empirical omega (Kosiol et al 2007) */ 130 double computeEmpiricalOmega(); 131 132 /** 133 * setup the bounds for joint optimization with BFGS 134 */ 135 virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check); 136 137 /** 138 optimize model parameters 139 @return the best likelihood 140 */ 141 virtual double optimizeParameters(double gradient_epsilon); 142 143 /** 3x4 matrix of nucleotide frequencies at 1st,2nd,3rd codon position */ 144 double *ntfreq; 145 146 /** dn/ds rate ratio */ 147 double omega; 148 149 /** TRUE to fix omega, default: FALSE */ 150 bool fix_omega; 151 152 /** style for kappa */ 153 CodonKappaStyle codon_kappa_style; 154 155 /** ts/tv rate ratio */ 156 double kappa; 157 158 /** TRUE to fix kappa, default: FALSE */ 159 bool fix_kappa; 160 161 /** ts/tv rate ratio for 2-kappa model (Kosiol et al 2007) */ 162 double kappa2; 163 164 /** TRUE to fix kappa2, default: FALSE */ 165 bool fix_kappa2; 166 167 /** GY- or MG-style codon frequencies */ 168 CodonFreqStyle codon_freq_style; 169 170 /** rate atrributes */ 171 int *rate_attr; 172 173 /** empirical rates for empirical codon model or parametric+empirical codon model */ 174 double *empirical_rates; 175 176 protected: 177 178 void computeCodonRateMatrix_1KAPPA(); 179 void computeCodonRateMatrix_1KAPPATS(); 180 void computeCodonRateMatrix_1KAPPATV(); 181 void computeCodonRateMatrix_2KAPPA(); 182 183 /** initialize Muse-Gaut 1994 model 184 @param fix_kappa whether or not to fix kappa 185 @param freq input frequency 186 @return default frequency type 187 */ 188 StateFreqType initMG94(bool fix_kappa, StateFreqType freq, CodonKappaStyle kappa_style); 189 190 /** initialize Goldman-Yang 1994 model (simplified version with 2 parameters omega and kappa 191 @param fix_kappa whether or not to fix kappa 192 @param kappa_style: CK_ONE_KAPPA for traditional GY model, others follow Kosiol et al 2007 193 @return default frequency type 194 */ 195 StateFreqType initGY94(bool fix_kappa, CodonKappaStyle kappa_style); 196 197 /** 198 this function is served for the multi-dimension optimization. It should pack the model parameters 199 into a vector that is index from 1 (NOTE: not from 0) 200 @param variables (OUT) vector of variables, indexed from 1 201 */ 202 virtual void setVariables(double *variables); 203 204 /** 205 this function is served for the multi-dimension optimization. It should assign the model parameters 206 from a vector of variables that is index from 1 (NOTE: not from 0) 207 @param variables vector of variables, indexed from 1 208 @return TRUE if parameters are changed, FALSE otherwise (2015-10-20) 209 */ 210 virtual bool getVariables(double *variables); 211 212 }; 213 214 #endif /* MODELCODON_H_ */ 215