1 /*
2  * modelcodon.h
3  *
4  *  Created on: May 24, 2013
5  *      Author: minh
6  */
7 
8 #ifndef MODELCODON_H_
9 #define MODELCODON_H_
10 
11 #include "modelmarkov.h"
12 
13 /** CF_TARGET_NT: frequency of target nucleotide is multiplied with the rate entry (Muse and Gaut 1994)
14     CF_TARGET_CODON: frequency of target codon is multiplied with the rate entry (Goldman Yang 1994)
15     */
16 enum CodonFreqStyle {CF_TARGET_NT, CF_TARGET_CODON};
17 
18 enum CodonKappaStyle {CK_ONE_KAPPA, CK_ONE_KAPPA_TS, CK_ONE_KAPPA_TV, CK_TWO_KAPPA};
19 
20 const int CA_STOP_CODON   = 1; // stop codon substitution
21 const int CA_MULTI_NT     = 2; // codon substitution involves > 1 NT
22 const int CA_SYNONYMOUS   = 4; // synonymous codon substitution
23 const int CA_NONSYNONYMOUS= 8; // synonymous codon substitution
24 const int CA_TRANSVERSION = 16; // codon substitution involves 1 NT transversion
25 const int CA_TRANSITION   = 32; // codon substitution involves 1 NT transition
26 const int CA_TRANSVERSION_1NT = 64; // codon substitution involve the 1st NT which is also a transversion
27 const int CA_TRANSVERSION_2NT = 128; // codon substitution involve the 2nd NT which is also a transversion
28 const int CA_TRANSVERSION_3NT = 256; // codon substitution involve the 3rd NT which is also a transversion
29 const int CA_TRANSITION_1NT   = 512; // codon substitution involve the 1st NT which is also a transversion
30 const int CA_TRANSITION_2NT   = 1024; // codon substitution involve the 2nd NT which is also a transversion
31 const int CA_TRANSITION_3NT   = 2048; // codon substitution involve the 3rd NT which is also a transversion
32 
33 /**
34  * Codon substitution models
35  */
36 class ModelCodon: public ModelMarkov {
37 public:
38 	/**
39 		constructor
40 		@param model_name model name, e.g., GY,YN
41 		@param freq state frequency type
42 		@param tree associated phylogenetic tree
43 	*/
44 	ModelCodon(const char *model_name, string model_params, StateFreqType freq, string freq_params,
45     		PhyloTree *tree);
46 
47 	/**
48 	 * destructor
49 	 */
50 	virtual ~ModelCodon();
51 
52     /**
53         start structure for checkpointing
54     */
55     virtual void startCheckpoint();
56 
57     /**
58         save object into the checkpoint
59     */
60     virtual void saveCheckpoint();
61 
62     /**
63         restore object from the checkpoint
64     */
65     virtual void restoreCheckpoint();
66 
67 	/**
68 		@return the number of rate entries, equal to the number of non-diagonal elements of the rate matrix
69         since we store full matrix here
70 	*/
getNumRateEntries()71 	virtual int getNumRateEntries() { return num_states*(num_states); }
72 
73 	/**
74 		initialization, called automatically by the constructor, no need to call it
75 		@param model_name model name, e.g., JC, HKY.
76 		@param freq state frequency type
77 	*/
78 	virtual void init(const char *model_name, string model_params, StateFreqType freq, string freq_params);
79 
80 	StateFreqType initCodon(const char *model_name, StateFreqType freq, bool reset_params);
81 
82 
83 	/**
84 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
85 	 */
getNameParams()86 	virtual string getNameParams() { return name; }
87 
88     /** main function to compute rate matrix */
89     void computeCodonRateMatrix();
90 
91 	/**
92 		decompose the rate matrix into eigenvalues and eigenvectors
93 	*/
94 	virtual void decomposeRateMatrix();
95 
96 	/**
97 	 * read codon model from a stream, modying rates and state_freq accordingly
98 	 * @param in input stream containing lower triangular matrix of rates, frequencies and list of codons
99      * @reset_params true to reset parameters, false otherwise
100 	 */
101 	void readCodonModel(istream &in, bool reset_params);
102 
103 	/**
104 	 * read codon model from a string, modying rates and state_freq accordingly
105 	 * @param str input string containing lower triangular matrix of rates, frequencies and list of codons
106      * @reset_params true to reset parameters, false otherwise
107 	 */
108 	void readCodonModel(string &str, bool reset_params);
109 
110 	/**
111 	 * read codon model from a file in PAML format
112 	 * @param filename input file containing lower triangular matrix of rates, frequencies and list of codons
113      * @reset_params true to reset parameters, false otherwise
114 	 */
115     void readCodonModelFile(const char *filename, bool reset_params);
116 
117 	/**
118 		write information
119 		@param out output stream
120 	*/
121 	virtual void writeInfo(ostream &out);
122 
123     /** compute rate_attr for all codoni->codoni substitution */
124     void computeRateAttributes();
125 
126     /** combine rates with target nucleotide frequency (ntfreq) for MG-style model */
127     void combineRateNTFreq();
128 
129     /** compute the corrected empirical omega (Kosiol et al 2007) */
130     double computeEmpiricalOmega();
131 
132 	/**
133 	 * setup the bounds for joint optimization with BFGS
134 	 */
135 	virtual void setBounds(double *lower_bound, double *upper_bound, bool *bound_check);
136 
137 	/**
138 		optimize model parameters
139 		@return the best likelihood
140 	*/
141 	virtual double optimizeParameters(double gradient_epsilon);
142 
143 	/** 3x4 matrix of nucleotide frequencies at 1st,2nd,3rd codon position */
144 	double *ntfreq;
145 
146     /** dn/ds rate ratio */
147     double omega;
148 
149     /** TRUE to fix omega, default: FALSE */
150     bool fix_omega;
151 
152     /** style for kappa */
153     CodonKappaStyle codon_kappa_style;
154 
155     /** ts/tv rate ratio */
156     double kappa;
157 
158     /** TRUE to fix kappa, default: FALSE */
159     bool fix_kappa;
160 
161     /** ts/tv rate ratio for 2-kappa model (Kosiol et al 2007) */
162     double kappa2;
163 
164     /** TRUE to fix kappa2, default: FALSE */
165     bool fix_kappa2;
166 
167     /** GY- or MG-style codon frequencies */
168     CodonFreqStyle codon_freq_style;
169 
170     /** rate atrributes */
171     int *rate_attr;
172 
173 	/** empirical rates for empirical codon model or parametric+empirical codon model */
174 	double *empirical_rates;
175 
176 protected:
177 
178     void computeCodonRateMatrix_1KAPPA();
179     void computeCodonRateMatrix_1KAPPATS();
180     void computeCodonRateMatrix_1KAPPATV();
181     void computeCodonRateMatrix_2KAPPA();
182 
183 	/** initialize Muse-Gaut 1994 model
184         @param fix_kappa whether or not to fix kappa
185         @param freq input frequency
186         @return default frequency type
187     */
188 	StateFreqType initMG94(bool fix_kappa, StateFreqType freq, CodonKappaStyle kappa_style);
189 
190 	/** initialize Goldman-Yang 1994 model (simplified version with 2 parameters omega and kappa
191         @param fix_kappa whether or not to fix kappa
192         @param kappa_style: CK_ONE_KAPPA for traditional GY model, others follow Kosiol et al 2007
193         @return default frequency type
194     */
195 	StateFreqType initGY94(bool fix_kappa, CodonKappaStyle kappa_style);
196 
197 	/**
198 		this function is served for the multi-dimension optimization. It should pack the model parameters
199 		into a vector that is index from 1 (NOTE: not from 0)
200 		@param variables (OUT) vector of variables, indexed from 1
201 	*/
202 	virtual void setVariables(double *variables);
203 
204 	/**
205 		this function is served for the multi-dimension optimization. It should assign the model parameters
206 		from a vector of variables that is index from 1 (NOTE: not from 0)
207 		@param variables vector of variables, indexed from 1
208 		@return TRUE if parameters are changed, FALSE otherwise (2015-10-20)
209 	*/
210 	virtual bool getVariables(double *variables);
211 
212 };
213 
214 #endif /* MODELCODON_H_ */
215