1 /***************************************************************************
2  *   Copyright (C) 2009 by BUI Quang Minh   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #ifndef RATEMEYERDISCRETE_H
21 #define RATEMEYERDISCRETE_H
22 
23 #include "ratemeyerhaeseler.h"
24 
25 /**
26 The discrete version of Meyer & von Haeseler rate class
27 
28 	@author BUI Quang Minh <minh.bui@univie.ac.at>
29 */
30 class RateMeyerDiscrete : public RateMeyerHaeseler
31 {
32 public:
33  	/**
34 		constructor
35 		@param ncat number of rate categories
36 		@param cat_type category type, bitwise, incl. CAT_LOG, CAT_MEAN, CAT_PATTERN
37 		@param file_name rate file name, NULL if not inputed
38 		@param tree associated phylo tree
39    */
40    RateMeyerDiscrete(int ncat, int cat_type, char *file_name, PhyloTree *tree, bool rate_type);
41 
42    RateMeyerDiscrete();
43 
44 	/**
45 		destructor
46 	*/
47     virtual ~RateMeyerDiscrete();
48 
49 	/**
50 		get the number of rate categories for site-specific category model
51 		@return the number of rate categories
52 	*/
53 	virtual int getNDiscreteRate();
54 
55 	/**
56 		@param category category ID from 0 to #category-1
57 		@return the rate of the specified category
58 	*/
59 	virtual double getRate(int category);
60 
61 	/**
62 		get the rate of a specified site-pattern. Default returns 1.0 since it is homogeneous model
63 		@param ptn pattern ID
64 		@return the rate of the specified site-pattern
65 	*/
66 	virtual double getPtnRate(int ptn);
67 
68 	/**
69 		get rate category of a specified site-pattern.
70 		@param ptn pattern ID
71 		@return the rate category of the specified site-pattern
72 	*/
73 	virtual int getPtnCat(int ptn);
74 
75 	/**
76 		Compute site-specific rates. Override this for Gamma model
77 		@param pattern_rates (OUT) pattern rates. Resizing if necesary
78         @return total number of categories
79 	*/
80 	virtual int computePatternRates(DoubleVector &pattern_rates, IntVector &pattern_cat);
81 
82 	virtual bool isSiteSpecificRate();
83 
84 	/**
85 		return the number of dimensions
86 	*/
getNDim()87 	virtual int getNDim() { return ncategory; }
88 
89 	/**
90 		optimize rates of all site-patterns
91 		compute categorized rates from the "continuous" rate of the original Meyer & von Haeseler model.
92 		The current implementation uses the k-means algorithm with k-means++ package.
93 	*/
94 	virtual double optimizeParameters(double epsilon);
95 
96 	/**
97 		classify rates into categories.
98 		@param tree_lh the current tree log-likelihood
99 	*/
100 	virtual double classifyRates(double tree_lh);
101 
102 	/**
103 		classify rates into categories using k-means++ method.
104 		@return tree likelihood
105 	*/
106 	double classifyRatesKMeans();
107 
108 
109 	/**
110 		This function is inherited from Optimization class for optimizting site rates
111 		@param value x-value of the function
112 		@return f(value) of function f you want to minimize
113 	*/
114 	virtual double computeFunction(double value);
115 
116 	/**
117 		This function calculate f(value), first derivative f'(value) and 2nd derivative f''(value).
118 		@param value x-value of the function
119 		@param df (OUT) first derivative
120 		@param ddf (OUT) second derivative
121 		@return f(value) of function f you want to minimize
122 	*/
123 	virtual void computeFuncDerv(double value, double &df, double &ddf);
124 
125 	double optimizeCatRate(int cat);
126 
127 	void normalizeRates();
128 
129 	/**
130 		write information
131 		@param out output stream
132 	*/
133 	virtual void writeInfo(ostream &out);
134 
135 protected:
136 
137 	/**
138 		number of rate categories
139 	*/
140 	int ncategory;
141 
142 	/**
143 		category index for every pattern
144 	*/
145 	int *ptn_cat;
146 
147 	/**
148 		rates, containing ncategory elements
149 	*/
150 	double *rates;
151 
152 	/**
153 		false at beginning, true after continuous rates were optimized
154 	*/
155 	bool is_categorized;
156 
157 	int mcat_type;
158 
159 	/**
160 		current category under optimization. Note that this is not thread-safe
161 	*/
162 	int optimizing_cat;
163 
164 };
165 
166 #endif
167