1 /*
2  * @file methods/adaboost/adaboost_impl.hpp
3  * @author Udit Saxena
4  *
5  * Implementation of the AdaBoost class.
6  *
7  * @code
8  * @article{schapire1999improved,
9  *   author = {Schapire, Robert E. and Singer, Yoram},
10  *   title = {Improved Boosting Algorithms Using Confidence-rated Predictions},
11  *   journal = {Machine Learning},
12  *   volume = {37},
13  *   number = {3},
14  *   month = dec,
15  *   year = {1999},
16  *   issn = {0885-6125},
17  *   pages = {297--336},
18  * }
19  * @endcode
20  *
21  * mlpack is free software; you may redistribute it and/or modify it under the
22  * terms of the 3-clause BSD license.  You should have received a copy of the
23  * 3-clause BSD license along with mlpack.  If not, see
24  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
25  */
26 #ifndef MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
27 #define MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
28 
29 #include "adaboost.hpp"
30 
31 namespace mlpack {
32 namespace adaboost {
33 
34 /**
35  * Constructor. Currently runs the AdaBoost.MH algorithm.
36  *
37  * @param data Input data
38  * @param labels Corresponding labels
39  * @param iterations Number of boosting rounds
40  * @param tol Tolerance for termination of Adaboost.MH.
41  * @param other Weak Learner, which has been initialized already.
42  */
43 template<typename WeakLearnerType, typename MatType>
AdaBoost(const MatType & data,const arma::Row<size_t> & labels,const size_t numClasses,const WeakLearnerType & other,const size_t iterations,const double tol)44 AdaBoost<WeakLearnerType, MatType>::AdaBoost(
45     const MatType& data,
46     const arma::Row<size_t>& labels,
47     const size_t numClasses,
48     const WeakLearnerType& other,
49     const size_t iterations,
50     const double tol)
51 {
52   Train(data, labels, numClasses, other, iterations, tol);
53 }
54 
55 // Empty constructor.
56 template<typename WeakLearnerType, typename MatType>
AdaBoost(const double tolerance)57 AdaBoost<WeakLearnerType, MatType>::AdaBoost(const double tolerance) :
58     numClasses(0),
59     tolerance(tolerance)
60 {
61   // Nothing to do.
62 }
63 
64 // Train AdaBoost.
65 template<typename WeakLearnerType, typename MatType>
Train(const MatType & data,const arma::Row<size_t> & labels,const size_t numClasses,const WeakLearnerType & other,const size_t iterations,const double tolerance)66 double AdaBoost<WeakLearnerType, MatType>::Train(
67     const MatType& data,
68     const arma::Row<size_t>& labels,
69     const size_t numClasses,
70     const WeakLearnerType& other,
71     const size_t iterations,
72     const double tolerance)
73 {
74   // Clear information from previous runs.
75   wl.clear();
76   alpha.clear();
77 
78   this->tolerance = tolerance;
79   this->numClasses = numClasses;
80 
81   // crt is the cumulative rt value for terminating the optimization when rt is
82   // changing by less than the tolerance.
83   double rt, crt = 0.0, alphat = 0.0, zt;
84 
85   double ztProduct = 1.0;
86 
87   // To be used for prediction by the weak learner.
88   arma::Row<size_t> predictedLabels(labels.n_cols);
89 
90   // Use tempData to modify input data for incorporating weights.
91   MatType tempData(data);
92 
93   // This matrix is a helper matrix used to calculate the final hypothesis.
94   arma::mat sumFinalH = arma::zeros<arma::mat>(numClasses,
95       predictedLabels.n_cols);
96 
97   // Load the initial weights into a 2-D matrix.
98   const double initWeight = 1.0 / double(data.n_cols * numClasses);
99   arma::mat D(numClasses, data.n_cols);
100   D.fill(initWeight);
101 
102   // Weights are stored in this row vector.
103   arma::rowvec weights(predictedLabels.n_cols);
104 
105   // This is the final hypothesis.
106   arma::Row<size_t> finalH(predictedLabels.n_cols);
107 
108   // Now, start the boosting rounds.
109   for (size_t i = 0; i < iterations; ++i)
110   {
111     // Initialized to zero in every round.  rt is used for calculation of
112     // alphat; it is the weighted error.
113     // rt = (sum) D(i) y(i) ht(xi)
114     rt = 0.0;
115 
116     // zt is used for weight normalization.
117     zt = 0.0;
118 
119     // Build the weight vectors.
120     weights = arma::sum(D);
121 
122     // Use the existing weak learner to train a new one with new weights.
123     WeakLearnerType w(other, tempData, labels, numClasses, weights);
124     w.Classify(tempData, predictedLabels);
125 
126     // Now from predictedLabels, build ht, the weak hypothesis
127     // buildClassificationMatrix(ht, predictedLabels);
128 
129     // Now, calculate alpha(t) using ht.
130     for (size_t j = 0; j < D.n_cols; ++j) // instead of D, ht
131     {
132       if (predictedLabels(j) == labels(j))
133         rt += arma::accu(D.col(j));
134       else
135         rt -= arma::accu(D.col(j));
136     }
137 
138     if ((i > 0) && (std::abs(rt - crt) < tolerance))
139       break;
140 
141     // Check if model has converged.
142     if (rt >= 1.0)
143     {
144       // Save the weak learner and terminate.
145       alpha.push_back(1.0);
146       wl.push_back(w);
147       break;
148     }
149 
150     crt = rt;
151 
152     // Our goal is to find alphat which mizimizes or approximately minimizes the
153     // value of Z as a function of alpha.
154     alphat = 0.5 * log((1 + rt) / (1 - rt));
155 
156     alpha.push_back(alphat);
157     wl.push_back(w);
158 
159     // Now start modifying the weights.
160     for (size_t j = 0; j < D.n_cols; ++j)
161     {
162       const double expo = exp(alphat);
163       if (predictedLabels(j) == labels(j))
164       {
165         for (size_t k = 0; k < D.n_rows; ++k)
166         {
167           // We calculate zt, the normalization constant.
168           D(k, j) /= expo;
169           zt += D(k, j); // * exp(-1 * alphat * yt(j,k) * ht(j,k));
170 
171           // Add to the final hypothesis matrix.
172           // sumFinalH(k, j) += (alphat * ht(k, j));
173           if (k == labels(j))
174             sumFinalH(k, j) += (alphat); // * ht(k, j));
175           else
176             sumFinalH(k, j) -= (alphat);
177         }
178       }
179       else
180       {
181         for (size_t k = 0; k < D.n_rows; ++k)
182         {
183           // We calculate zt, the normalization constant.
184           D(k, j) *= expo;
185           zt += D(k, j);
186 
187           // Add to the final hypothesis matrix.
188           if (k == labels(j))
189             sumFinalH(k, j) += alphat; // * ht(k, j));
190           else
191             sumFinalH(k, j) -= alphat;
192         }
193       }
194     }
195 
196     // Normalize D.
197     D /= zt;
198 
199     // Accumulate the value of zt for the Hamming loss bound.
200     ztProduct *= zt;
201   }
202   return ztProduct;
203 }
204 
205 /**
206  * Classify the given test points.
207  */
208 template<typename WeakLearnerType, typename MatType>
Classify(const MatType & test,arma::Row<size_t> & predictedLabels)209 void AdaBoost<WeakLearnerType, MatType>::Classify(
210     const MatType& test,
211     arma::Row<size_t>& predictedLabels)
212 {
213   arma::Row<size_t> tempPredictedLabels(test.n_cols);
214   arma::mat probabilities;
215 
216   Classify(test, predictedLabels, probabilities);
217 }
218 
219 /**
220  * Classify the given test points.
221  */
222 template<typename WeakLearnerType, typename MatType>
Classify(const MatType & test,arma::Row<size_t> & predictedLabels,arma::mat & probabilities)223 void AdaBoost<WeakLearnerType, MatType>::Classify(
224     const MatType& test,
225     arma::Row<size_t>& predictedLabels,
226     arma::mat& probabilities)
227 {
228   arma::Row<size_t> tempPredictedLabels(test.n_cols);
229 
230   probabilities.zeros(numClasses, test.n_cols);
231   predictedLabels.set_size(test.n_cols);
232 
233   for (size_t i = 0; i < wl.size(); ++i)
234   {
235     wl[i].Classify(test, tempPredictedLabels);
236 
237     for (size_t j = 0; j < tempPredictedLabels.n_cols; ++j)
238       probabilities(tempPredictedLabels(j), j) += alpha[i];
239   }
240 
241   arma::colvec pRow;
242   arma::uword maxIndex = 0;
243 
244   for (size_t i = 0; i < predictedLabels.n_cols; ++i)
245   {
246     probabilities.col(i) /= arma::accu(probabilities.col(i));
247     pRow = probabilities.unsafe_col(i);
248     pRow.max(maxIndex);
249     predictedLabels(i) = maxIndex;
250   }
251 }
252 
253 /**
254  * Serialize the AdaBoost model.
255  */
256 template<typename WeakLearnerType, typename MatType>
257 template<typename Archive>
serialize(Archive & ar,const unsigned int version)258 void AdaBoost<WeakLearnerType, MatType>::serialize(Archive& ar,
259                                                    const unsigned int version)
260 {
261   ar & BOOST_SERIALIZATION_NVP(numClasses);
262   ar & BOOST_SERIALIZATION_NVP(tolerance);
263   if (version == 0 && Archive::is_loading::value)
264   {
265     // Load unused ztProduct double and forget it.
266     double tmpZtProduct = 0.0;
267     ar & BOOST_SERIALIZATION_NVP(tmpZtProduct);
268   }
269   ar & BOOST_SERIALIZATION_NVP(alpha);
270 
271   // Now serialize each weak learner.
272   if (Archive::is_loading::value)
273   {
274     wl.clear();
275     wl.resize(alpha.size());
276   }
277   ar & BOOST_SERIALIZATION_NVP(wl);
278 }
279 
280 } // namespace adaboost
281 } // namespace mlpack
282 
283 #endif
284