1 /*
2 * @file methods/adaboost/adaboost_impl.hpp
3 * @author Udit Saxena
4 *
5 * Implementation of the AdaBoost class.
6 *
7 * @code
8 * @article{schapire1999improved,
9 * author = {Schapire, Robert E. and Singer, Yoram},
10 * title = {Improved Boosting Algorithms Using Confidence-rated Predictions},
11 * journal = {Machine Learning},
12 * volume = {37},
13 * number = {3},
14 * month = dec,
15 * year = {1999},
16 * issn = {0885-6125},
17 * pages = {297--336},
18 * }
19 * @endcode
20 *
21 * mlpack is free software; you may redistribute it and/or modify it under the
22 * terms of the 3-clause BSD license. You should have received a copy of the
23 * 3-clause BSD license along with mlpack. If not, see
24 * http://www.opensource.org/licenses/BSD-3-Clause for more information.
25 */
26 #ifndef MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
27 #define MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
28
29 #include "adaboost.hpp"
30
31 namespace mlpack {
32 namespace adaboost {
33
34 /**
35 * Constructor. Currently runs the AdaBoost.MH algorithm.
36 *
37 * @param data Input data
38 * @param labels Corresponding labels
39 * @param iterations Number of boosting rounds
40 * @param tol Tolerance for termination of Adaboost.MH.
41 * @param other Weak Learner, which has been initialized already.
42 */
43 template<typename WeakLearnerType, typename MatType>
AdaBoost(const MatType & data,const arma::Row<size_t> & labels,const size_t numClasses,const WeakLearnerType & other,const size_t iterations,const double tol)44 AdaBoost<WeakLearnerType, MatType>::AdaBoost(
45 const MatType& data,
46 const arma::Row<size_t>& labels,
47 const size_t numClasses,
48 const WeakLearnerType& other,
49 const size_t iterations,
50 const double tol)
51 {
52 Train(data, labels, numClasses, other, iterations, tol);
53 }
54
55 // Empty constructor.
56 template<typename WeakLearnerType, typename MatType>
AdaBoost(const double tolerance)57 AdaBoost<WeakLearnerType, MatType>::AdaBoost(const double tolerance) :
58 numClasses(0),
59 tolerance(tolerance)
60 {
61 // Nothing to do.
62 }
63
64 // Train AdaBoost.
65 template<typename WeakLearnerType, typename MatType>
Train(const MatType & data,const arma::Row<size_t> & labels,const size_t numClasses,const WeakLearnerType & other,const size_t iterations,const double tolerance)66 double AdaBoost<WeakLearnerType, MatType>::Train(
67 const MatType& data,
68 const arma::Row<size_t>& labels,
69 const size_t numClasses,
70 const WeakLearnerType& other,
71 const size_t iterations,
72 const double tolerance)
73 {
74 // Clear information from previous runs.
75 wl.clear();
76 alpha.clear();
77
78 this->tolerance = tolerance;
79 this->numClasses = numClasses;
80
81 // crt is the cumulative rt value for terminating the optimization when rt is
82 // changing by less than the tolerance.
83 double rt, crt = 0.0, alphat = 0.0, zt;
84
85 double ztProduct = 1.0;
86
87 // To be used for prediction by the weak learner.
88 arma::Row<size_t> predictedLabels(labels.n_cols);
89
90 // Use tempData to modify input data for incorporating weights.
91 MatType tempData(data);
92
93 // This matrix is a helper matrix used to calculate the final hypothesis.
94 arma::mat sumFinalH = arma::zeros<arma::mat>(numClasses,
95 predictedLabels.n_cols);
96
97 // Load the initial weights into a 2-D matrix.
98 const double initWeight = 1.0 / double(data.n_cols * numClasses);
99 arma::mat D(numClasses, data.n_cols);
100 D.fill(initWeight);
101
102 // Weights are stored in this row vector.
103 arma::rowvec weights(predictedLabels.n_cols);
104
105 // This is the final hypothesis.
106 arma::Row<size_t> finalH(predictedLabels.n_cols);
107
108 // Now, start the boosting rounds.
109 for (size_t i = 0; i < iterations; ++i)
110 {
111 // Initialized to zero in every round. rt is used for calculation of
112 // alphat; it is the weighted error.
113 // rt = (sum) D(i) y(i) ht(xi)
114 rt = 0.0;
115
116 // zt is used for weight normalization.
117 zt = 0.0;
118
119 // Build the weight vectors.
120 weights = arma::sum(D);
121
122 // Use the existing weak learner to train a new one with new weights.
123 WeakLearnerType w(other, tempData, labels, numClasses, weights);
124 w.Classify(tempData, predictedLabels);
125
126 // Now from predictedLabels, build ht, the weak hypothesis
127 // buildClassificationMatrix(ht, predictedLabels);
128
129 // Now, calculate alpha(t) using ht.
130 for (size_t j = 0; j < D.n_cols; ++j) // instead of D, ht
131 {
132 if (predictedLabels(j) == labels(j))
133 rt += arma::accu(D.col(j));
134 else
135 rt -= arma::accu(D.col(j));
136 }
137
138 if ((i > 0) && (std::abs(rt - crt) < tolerance))
139 break;
140
141 // Check if model has converged.
142 if (rt >= 1.0)
143 {
144 // Save the weak learner and terminate.
145 alpha.push_back(1.0);
146 wl.push_back(w);
147 break;
148 }
149
150 crt = rt;
151
152 // Our goal is to find alphat which mizimizes or approximately minimizes the
153 // value of Z as a function of alpha.
154 alphat = 0.5 * log((1 + rt) / (1 - rt));
155
156 alpha.push_back(alphat);
157 wl.push_back(w);
158
159 // Now start modifying the weights.
160 for (size_t j = 0; j < D.n_cols; ++j)
161 {
162 const double expo = exp(alphat);
163 if (predictedLabels(j) == labels(j))
164 {
165 for (size_t k = 0; k < D.n_rows; ++k)
166 {
167 // We calculate zt, the normalization constant.
168 D(k, j) /= expo;
169 zt += D(k, j); // * exp(-1 * alphat * yt(j,k) * ht(j,k));
170
171 // Add to the final hypothesis matrix.
172 // sumFinalH(k, j) += (alphat * ht(k, j));
173 if (k == labels(j))
174 sumFinalH(k, j) += (alphat); // * ht(k, j));
175 else
176 sumFinalH(k, j) -= (alphat);
177 }
178 }
179 else
180 {
181 for (size_t k = 0; k < D.n_rows; ++k)
182 {
183 // We calculate zt, the normalization constant.
184 D(k, j) *= expo;
185 zt += D(k, j);
186
187 // Add to the final hypothesis matrix.
188 if (k == labels(j))
189 sumFinalH(k, j) += alphat; // * ht(k, j));
190 else
191 sumFinalH(k, j) -= alphat;
192 }
193 }
194 }
195
196 // Normalize D.
197 D /= zt;
198
199 // Accumulate the value of zt for the Hamming loss bound.
200 ztProduct *= zt;
201 }
202 return ztProduct;
203 }
204
205 /**
206 * Classify the given test points.
207 */
208 template<typename WeakLearnerType, typename MatType>
Classify(const MatType & test,arma::Row<size_t> & predictedLabels)209 void AdaBoost<WeakLearnerType, MatType>::Classify(
210 const MatType& test,
211 arma::Row<size_t>& predictedLabels)
212 {
213 arma::Row<size_t> tempPredictedLabels(test.n_cols);
214 arma::mat probabilities;
215
216 Classify(test, predictedLabels, probabilities);
217 }
218
219 /**
220 * Classify the given test points.
221 */
222 template<typename WeakLearnerType, typename MatType>
Classify(const MatType & test,arma::Row<size_t> & predictedLabels,arma::mat & probabilities)223 void AdaBoost<WeakLearnerType, MatType>::Classify(
224 const MatType& test,
225 arma::Row<size_t>& predictedLabels,
226 arma::mat& probabilities)
227 {
228 arma::Row<size_t> tempPredictedLabels(test.n_cols);
229
230 probabilities.zeros(numClasses, test.n_cols);
231 predictedLabels.set_size(test.n_cols);
232
233 for (size_t i = 0; i < wl.size(); ++i)
234 {
235 wl[i].Classify(test, tempPredictedLabels);
236
237 for (size_t j = 0; j < tempPredictedLabels.n_cols; ++j)
238 probabilities(tempPredictedLabels(j), j) += alpha[i];
239 }
240
241 arma::colvec pRow;
242 arma::uword maxIndex = 0;
243
244 for (size_t i = 0; i < predictedLabels.n_cols; ++i)
245 {
246 probabilities.col(i) /= arma::accu(probabilities.col(i));
247 pRow = probabilities.unsafe_col(i);
248 pRow.max(maxIndex);
249 predictedLabels(i) = maxIndex;
250 }
251 }
252
253 /**
254 * Serialize the AdaBoost model.
255 */
256 template<typename WeakLearnerType, typename MatType>
257 template<typename Archive>
serialize(Archive & ar,const unsigned int version)258 void AdaBoost<WeakLearnerType, MatType>::serialize(Archive& ar,
259 const unsigned int version)
260 {
261 ar & BOOST_SERIALIZATION_NVP(numClasses);
262 ar & BOOST_SERIALIZATION_NVP(tolerance);
263 if (version == 0 && Archive::is_loading::value)
264 {
265 // Load unused ztProduct double and forget it.
266 double tmpZtProduct = 0.0;
267 ar & BOOST_SERIALIZATION_NVP(tmpZtProduct);
268 }
269 ar & BOOST_SERIALIZATION_NVP(alpha);
270
271 // Now serialize each weak learner.
272 if (Archive::is_loading::value)
273 {
274 wl.clear();
275 wl.resize(alpha.size());
276 }
277 ar & BOOST_SERIALIZATION_NVP(wl);
278 }
279
280 } // namespace adaboost
281 } // namespace mlpack
282
283 #endif
284