1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 //
5 // AUTHOR: Rahul Kavi rahulkavi[at]live[at]com
6 
7 //
8 // This is a implementation of the Logistic Regression algorithm
9 //
10 
11 #include "precomp.hpp"
12 
13 using namespace std;
14 
15 namespace cv {
16 namespace ml {
17 
18 class LrParams
19 {
20 public:
LrParams()21     LrParams()
22     {
23         alpha = 0.001;
24         num_iters = 1000;
25         norm = LogisticRegression::REG_L2;
26         train_method = LogisticRegression::BATCH;
27         mini_batch_size = 1;
28         term_crit = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, num_iters, alpha);
29     }
30 
31     double alpha; //!< learning rate.
32     int num_iters; //!< number of iterations.
33     int norm;
34     int train_method;
35     int mini_batch_size;
36     TermCriteria term_crit;
37 };
38 
39 class LogisticRegressionImpl CV_FINAL : public LogisticRegression
40 {
41 public:
42 
LogisticRegressionImpl()43     LogisticRegressionImpl() { }
~LogisticRegressionImpl()44     virtual ~LogisticRegressionImpl() {}
45 
getLearningRate() const46     inline double getLearningRate() const CV_OVERRIDE { return params.alpha; }
setLearningRate(double val)47     inline void setLearningRate(double val) CV_OVERRIDE { params.alpha = val; }
getIterations() const48     inline int getIterations() const CV_OVERRIDE { return params.num_iters; }
setIterations(int val)49     inline void setIterations(int val) CV_OVERRIDE { params.num_iters = val; }
getRegularization() const50     inline int getRegularization() const CV_OVERRIDE { return params.norm; }
setRegularization(int val)51     inline void setRegularization(int val) CV_OVERRIDE { params.norm = val; }
getTrainMethod() const52     inline int getTrainMethod() const CV_OVERRIDE { return params.train_method; }
setTrainMethod(int val)53     inline void setTrainMethod(int val) CV_OVERRIDE { params.train_method = val; }
getMiniBatchSize() const54     inline int getMiniBatchSize() const CV_OVERRIDE { return params.mini_batch_size; }
setMiniBatchSize(int val)55     inline void setMiniBatchSize(int val) CV_OVERRIDE { params.mini_batch_size = val; }
getTermCriteria() const56     inline TermCriteria getTermCriteria() const CV_OVERRIDE { return params.term_crit; }
setTermCriteria(TermCriteria val)57     inline void setTermCriteria(TermCriteria val) CV_OVERRIDE { params.term_crit = val; }
58 
59     virtual bool train( const Ptr<TrainData>& trainData, int=0 ) CV_OVERRIDE;
60     virtual float predict(InputArray samples, OutputArray results, int flags=0) const CV_OVERRIDE;
61     virtual void clear() CV_OVERRIDE;
62     virtual void write(FileStorage& fs) const CV_OVERRIDE;
63     virtual void read(const FileNode& fn) CV_OVERRIDE;
get_learnt_thetas() const64     virtual Mat get_learnt_thetas() const CV_OVERRIDE { return learnt_thetas; }
getVarCount() const65     virtual int getVarCount() const CV_OVERRIDE { return learnt_thetas.cols; }
isTrained() const66     virtual bool isTrained() const CV_OVERRIDE { return !learnt_thetas.empty(); }
isClassifier() const67     virtual bool isClassifier() const CV_OVERRIDE { return true; }
getDefaultName() const68     virtual String getDefaultName() const CV_OVERRIDE { return "opencv_ml_lr"; }
69 protected:
70     Mat calc_sigmoid(const Mat& data) const;
71     double compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
72     void compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient );
73     Mat batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
74     Mat mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
75     bool set_label_map(const Mat& _labels_i);
76     Mat remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const;
77 protected:
78     LrParams params;
79     Mat learnt_thetas;
80     map<int, int> forward_mapper;
81     map<int, int> reverse_mapper;
82     Mat labels_o;
83     Mat labels_n;
84 };
85 
create()86 Ptr<LogisticRegression> LogisticRegression::create()
87 {
88     return makePtr<LogisticRegressionImpl>();
89 }
90 
load(const String & filepath,const String & nodeName)91 Ptr<LogisticRegression> LogisticRegression::load(const String& filepath, const String& nodeName)
92 {
93     return Algorithm::load<LogisticRegression>(filepath, nodeName);
94 }
95 
96 
train(const Ptr<TrainData> & trainData,int)97 bool LogisticRegressionImpl::train(const Ptr<TrainData>& trainData, int)
98 {
99     CV_TRACE_FUNCTION_SKIP_NESTED();
100     CV_Assert(!trainData.empty());
101 
102     // return value
103     bool ok = false;
104     clear();
105     Mat _data_i = trainData->getSamples();
106     Mat _labels_i = trainData->getResponses();
107 
108     // check size and type of training data
109     CV_Assert( !_labels_i.empty() && !_data_i.empty());
110     if(_labels_i.cols != 1)
111     {
112         CV_Error( CV_StsBadArg, "labels should be a column matrix" );
113     }
114     if(_data_i.type() != CV_32FC1 || _labels_i.type() != CV_32FC1)
115     {
116         CV_Error( CV_StsBadArg, "data and labels must be a floating point matrix" );
117     }
118     if(_labels_i.rows != _data_i.rows)
119     {
120         CV_Error( CV_StsBadArg, "number of rows in data and labels should be equal" );
121     }
122 
123     // class labels
124     set_label_map(_labels_i);
125     Mat labels_l = remap_labels(_labels_i, this->forward_mapper);
126     int num_classes = (int) this->forward_mapper.size();
127     if(num_classes < 2)
128     {
129         CV_Error( CV_StsBadArg, "data should have atleast 2 classes" );
130     }
131 
132     // add a column of ones to the data (bias/intercept term)
133     Mat data_t;
134     hconcat( cv::Mat::ones( _data_i.rows, 1, CV_32F ), _data_i, data_t );
135 
136     // coefficient matrix (zero-initialized)
137     Mat thetas;
138     Mat init_theta = Mat::zeros(data_t.cols, 1, CV_32F);
139 
140     // fit the model (handles binary and multiclass cases)
141     Mat new_theta;
142     Mat labels;
143     if(num_classes == 2)
144     {
145         labels_l.convertTo(labels, CV_32F);
146         if(this->params.train_method == LogisticRegression::BATCH)
147             new_theta = batch_gradient_descent(data_t, labels, init_theta);
148         else
149             new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
150         thetas = new_theta.t();
151     }
152     else
153     {
154         /* take each class and rename classes you will get a theta per class
155         as in multi class class scenario, we will have n thetas for n classes */
156         thetas.create(num_classes, data_t.cols, CV_32F);
157         Mat labels_binary;
158         int ii = 0;
159         for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
160         {
161             // one-vs-rest (OvR) scheme
162             labels_binary = (labels_l == it->second)/255;
163             labels_binary.convertTo(labels, CV_32F);
164             if(this->params.train_method == LogisticRegression::BATCH)
165                 new_theta = batch_gradient_descent(data_t, labels, init_theta);
166             else
167                 new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
168             hconcat(new_theta.t(), thetas.row(ii));
169             ii += 1;
170         }
171     }
172 
173     // check that the estimates are stable and finite
174     this->learnt_thetas = thetas.clone();
175     if( cvIsNaN( (double)sum(this->learnt_thetas)[0] ) )
176     {
177         CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
178     }
179 
180     // success
181     ok = true;
182     return ok;
183 }
184 
predict(InputArray samples,OutputArray results,int flags) const185 float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int flags) const
186 {
187     // check if learnt_mats array is populated
188     if(!this->isTrained())
189     {
190         CV_Error( CV_StsBadArg, "classifier should be trained first" );
191     }
192 
193     // coefficient matrix
194     Mat thetas;
195     if ( learnt_thetas.type() == CV_32F )
196     {
197         thetas = learnt_thetas;
198     }
199     else
200     {
201         this->learnt_thetas.convertTo( thetas, CV_32F );
202     }
203     CV_Assert(thetas.rows > 0);
204 
205     // data samples
206     Mat data = samples.getMat();
207     if(data.type() != CV_32F)
208     {
209         CV_Error( CV_StsBadArg, "data must be of floating type" );
210     }
211 
212     // add a column of ones to the data (bias/intercept term)
213     Mat data_t;
214     hconcat( cv::Mat::ones( data.rows, 1, CV_32F ), data, data_t );
215     CV_Assert(data_t.cols == thetas.cols);
216 
217     // predict class labels for samples (handles binary and multiclass cases)
218     Mat labels_c;
219     Mat pred_m;
220     Mat temp_pred;
221     if(thetas.rows == 1)
222     {
223         // apply sigmoid function
224         temp_pred = calc_sigmoid(data_t * thetas.t());
225         CV_Assert(temp_pred.cols==1);
226         pred_m = temp_pred.clone();
227 
228         // if greater than 0.5, predict class 0 or predict class 1
229         temp_pred = (temp_pred > 0.5f) / 255;
230         temp_pred.convertTo(labels_c, CV_32S);
231     }
232     else
233     {
234         // apply sigmoid function
235         pred_m.create(data_t.rows, thetas.rows, data.type());
236         for(int i = 0; i < thetas.rows; i++)
237         {
238             temp_pred = calc_sigmoid(data_t * thetas.row(i).t());
239             vconcat(temp_pred, pred_m.col(i));
240         }
241 
242         // predict class with the maximum output
243         Point max_loc;
244         Mat labels;
245         for(int i = 0; i < pred_m.rows; i++)
246         {
247             temp_pred = pred_m.row(i);
248             minMaxLoc( temp_pred, NULL, NULL, NULL, &max_loc );
249             labels.push_back(max_loc.x);
250         }
251         labels.convertTo(labels_c, CV_32S);
252     }
253 
254     // return label of the predicted class. class names can be 1,2,3,...
255     Mat pred_labs = remap_labels(labels_c, this->reverse_mapper);
256     pred_labs.convertTo(pred_labs, CV_32S);
257 
258     // return either the labels or the raw output
259     if ( results.needed() )
260     {
261         if ( flags & StatModel::RAW_OUTPUT )
262         {
263             pred_m.copyTo( results );
264         }
265         else
266         {
267             pred_labs.copyTo(results);
268         }
269     }
270 
271     return ( pred_labs.empty() ? 0.f : static_cast<float>(pred_labs.at<int>(0)) );
272 }
273 
calc_sigmoid(const Mat & data) const274 Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const
275 {
276     CV_TRACE_FUNCTION();
277     Mat dest;
278     exp(-data, dest);
279     return 1.0/(1.0+dest);
280 }
281 
compute_cost(const Mat & _data,const Mat & _labels,const Mat & _init_theta)282 double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
283 {
284     CV_TRACE_FUNCTION();
285     float llambda = 0;                   /*changed llambda from int to float to solve issue #7924*/
286     int m;
287     int n;
288     double cost = 0;
289     double rparameter = 0;
290     Mat theta_b;
291     Mat theta_c;
292     Mat d_a;
293     Mat d_b;
294 
295     m = _data.rows;
296     n = _data.cols;
297 
298     theta_b = _init_theta(Range(1, n), Range::all());
299 
300     if (params.norm != REG_DISABLE)
301     {
302         llambda = 1;
303     }
304 
305     if(this->params.norm == LogisticRegression::REG_L1)
306     {
307         rparameter = (llambda/(2*m)) * sum(theta_b)[0];
308     }
309     else
310     {
311         // assuming it to be L2 by default
312         multiply(theta_b, theta_b, theta_c, 1);
313         rparameter = (llambda/(2*m)) * sum(theta_c)[0];
314     }
315 
316     d_a = calc_sigmoid(_data * _init_theta);
317     log(d_a, d_a);
318     multiply(d_a, _labels, d_a);
319 
320     // use the fact that: log(1 - sigmoid(x)) = log(sigmoid(-x))
321     d_b = calc_sigmoid(- _data * _init_theta);
322     log(d_b, d_b);
323     multiply(d_b, 1-_labels, d_b);
324 
325     cost = (-1.0/m) * (sum(d_a)[0] + sum(d_b)[0]);
326     cost = cost + rparameter;
327 
328     if(cvIsNaN( cost ) == 1)
329     {
330         CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
331     }
332 
333     return cost;
334 }
335 
336 struct LogisticRegressionImpl_ComputeDradient_Impl : ParallelLoopBody
337 {
338     const Mat* data;
339     const Mat* theta;
340     const Mat* pcal_a;
341     Mat* gradient;
342     double lambda;
343 
LogisticRegressionImpl_ComputeDradient_Implcv::ml::LogisticRegressionImpl_ComputeDradient_Impl344     LogisticRegressionImpl_ComputeDradient_Impl(const Mat& _data, const Mat &_theta, const Mat& _pcal_a, const double _lambda, Mat & _gradient)
345         : data(&_data)
346         , theta(&_theta)
347         , pcal_a(&_pcal_a)
348         , gradient(&_gradient)
349         , lambda(_lambda)
350     {
351 
352     }
353 
operator ()cv::ml::LogisticRegressionImpl_ComputeDradient_Impl354     void operator()(const cv::Range& r) const CV_OVERRIDE
355     {
356         const Mat& _data  = *data;
357         const Mat &_theta = *theta;
358         Mat & _gradient   = *gradient;
359         const Mat & _pcal_a = *pcal_a;
360         const int m = _data.rows;
361         Mat pcal_ab;
362 
363         for (int ii = r.start; ii<r.end; ii++)
364         {
365             Mat pcal_b = _data(Range::all(), Range(ii,ii+1));
366             multiply(_pcal_a, pcal_b, pcal_ab, 1);
367 
368             _gradient.row(ii) = (1.0/m)*sum(pcal_ab)[0] + (lambda/m) * _theta.row(ii);
369         }
370     }
371 };
372 
compute_gradient(const Mat & _data,const Mat & _labels,const Mat & _theta,const double _lambda,Mat & _gradient)373 void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient )
374 {
375     CV_TRACE_FUNCTION();
376     const int m = _data.rows;
377     Mat pcal_a, pcal_b, pcal_ab;
378 
379     const Mat z = _data * _theta;
380 
381     CV_Assert( _gradient.rows == _theta.rows && _gradient.cols == _theta.cols );
382 
383     pcal_a = calc_sigmoid(z) - _labels;
384     pcal_b = _data(Range::all(), Range(0,1));
385     multiply(pcal_a, pcal_b, pcal_ab, 1);
386 
387     _gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0];
388 
389     //cout<<"for each training data entry"<<endl;
390     LogisticRegressionImpl_ComputeDradient_Impl invoker(_data, _theta, pcal_a, _lambda, _gradient);
391     cv::parallel_for_(cv::Range(1, _gradient.rows), invoker);
392 }
393 
394 
batch_gradient_descent(const Mat & _data,const Mat & _labels,const Mat & _init_theta)395 Mat LogisticRegressionImpl::batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
396 {
397     CV_TRACE_FUNCTION();
398     // implements batch gradient descent
399     if(this->params.alpha<=0)
400     {
401         CV_Error( CV_StsBadArg, "check training parameters (learning rate) for the classifier" );
402     }
403 
404     if(this->params.num_iters <= 0)
405     {
406         CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
407     }
408 
409     int llambda = 0;
410     int m;
411     Mat theta_p = _init_theta.clone();
412     Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
413     m = _data.rows;
414 
415     if (params.norm != REG_DISABLE)
416     {
417         llambda = 1;
418     }
419 
420     for(int i = 0;i<this->params.num_iters;i++)
421     {
422         // this seems to only be called to ensure that cost is not NaN
423         compute_cost(_data, _labels, theta_p);
424 
425         compute_gradient( _data, _labels, theta_p, llambda, gradient );
426 
427         theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
428     }
429     return theta_p;
430 }
431 
mini_batch_gradient_descent(const Mat & _data,const Mat & _labels,const Mat & _init_theta)432 Mat LogisticRegressionImpl::mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
433 {
434     // implements batch gradient descent
435     int lambda_l = 0;
436     int m;
437     int j = 0;
438     int size_b = this->params.mini_batch_size;
439 
440     if(this->params.mini_batch_size <= 0 || this->params.alpha == 0)
441     {
442         CV_Error( CV_StsBadArg, "check training parameters for the classifier" );
443     }
444 
445     if(this->params.num_iters <= 0)
446     {
447         CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
448     }
449 
450     Mat theta_p = _init_theta.clone();
451     Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
452     Mat data_d;
453     Mat labels_l;
454 
455     if (params.norm != REG_DISABLE)
456     {
457         lambda_l = 1;
458     }
459 
460     for(int i = 0;i<this->params.term_crit.maxCount;i++)
461     {
462         if(j+size_b<=_data.rows)
463         {
464             data_d = _data(Range(j,j+size_b), Range::all());
465             labels_l = _labels(Range(j,j+size_b),Range::all());
466         }
467         else
468         {
469             data_d = _data(Range(j, _data.rows), Range::all());
470             labels_l = _labels(Range(j, _labels.rows),Range::all());
471         }
472 
473         m = data_d.rows;
474 
475         // this seems to only be called to ensure that cost is not NaN
476         compute_cost(data_d, labels_l, theta_p);
477 
478         compute_gradient(data_d, labels_l, theta_p, lambda_l, gradient);
479 
480         theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
481 
482         j += this->params.mini_batch_size;
483 
484         // if parsed through all data variables
485         if (j >= _data.rows) {
486             j = 0;
487         }
488     }
489     return theta_p;
490 }
491 
set_label_map(const Mat & _labels_i)492 bool LogisticRegressionImpl::set_label_map(const Mat &_labels_i)
493 {
494     // this function creates two maps to map user defined labels to program friendly labels two ways.
495     int ii = 0;
496     Mat labels;
497 
498     this->labels_o = Mat(0,1, CV_8U);
499     this->labels_n = Mat(0,1, CV_8U);
500 
501     _labels_i.convertTo(labels, CV_32S);
502 
503     for(int i = 0;i<labels.rows;i++)
504     {
505         this->forward_mapper[labels.at<int>(i)] += 1;
506     }
507 
508     for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
509     {
510         this->forward_mapper[it->first] = ii;
511         this->labels_o.push_back(it->first);
512         this->labels_n.push_back(ii);
513         ii += 1;
514     }
515 
516     for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
517     {
518         this->reverse_mapper[it->second] = it->first;
519     }
520 
521     return true;
522 }
523 
remap_labels(const Mat & _labels_i,const map<int,int> & lmap) const524 Mat LogisticRegressionImpl::remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const
525 {
526     Mat labels;
527     _labels_i.convertTo(labels, CV_32S);
528 
529     Mat new_labels = Mat::zeros(labels.rows, labels.cols, labels.type());
530 
531     CV_Assert( !lmap.empty() );
532 
533     for(int i =0;i<labels.rows;i++)
534     {
535         map<int, int>::const_iterator val = lmap.find(labels.at<int>(i,0));
536         CV_Assert(val != lmap.end());
537         new_labels.at<int>(i,0) = val->second;
538     }
539     return new_labels;
540 }
541 
clear()542 void LogisticRegressionImpl::clear()
543 {
544     this->learnt_thetas.release();
545     this->labels_o.release();
546     this->labels_n.release();
547 }
548 
write(FileStorage & fs) const549 void LogisticRegressionImpl::write(FileStorage& fs) const
550 {
551     // check if open
552     if(fs.isOpened() == 0)
553     {
554         CV_Error(CV_StsBadArg,"file can't open. Check file path");
555     }
556     writeFormat(fs);
557     string desc = "Logistic Regression Classifier";
558     fs<<"classifier"<<desc.c_str();
559     fs<<"alpha"<<this->params.alpha;
560     fs<<"iterations"<<this->params.num_iters;
561     fs<<"norm"<<this->params.norm;
562     fs<<"train_method"<<this->params.train_method;
563     if(this->params.train_method == LogisticRegression::MINI_BATCH)
564     {
565         fs<<"mini_batch_size"<<this->params.mini_batch_size;
566     }
567     fs<<"learnt_thetas"<<this->learnt_thetas;
568     fs<<"n_labels"<<this->labels_n;
569     fs<<"o_labels"<<this->labels_o;
570 }
571 
read(const FileNode & fn)572 void LogisticRegressionImpl::read(const FileNode& fn)
573 {
574     // check if empty
575     if(fn.empty())
576     {
577         CV_Error( CV_StsBadArg, "empty FileNode object" );
578     }
579 
580     this->params.alpha = (double)fn["alpha"];
581     this->params.num_iters = (int)fn["iterations"];
582     this->params.norm = (int)fn["norm"];
583     this->params.train_method = (int)fn["train_method"];
584 
585     if(this->params.train_method == LogisticRegression::MINI_BATCH)
586     {
587         this->params.mini_batch_size = (int)fn["mini_batch_size"];
588     }
589 
590     fn["learnt_thetas"] >> this->learnt_thetas;
591     fn["o_labels"] >> this->labels_o;
592     fn["n_labels"] >> this->labels_n;
593 
594     for(int ii =0;ii<labels_o.rows;ii++)
595     {
596         this->forward_mapper[labels_o.at<int>(ii,0)] = labels_n.at<int>(ii,0);
597         this->reverse_mapper[labels_n.at<int>(ii,0)] = labels_o.at<int>(ii,0);
598     }
599 }
600 
601 }
602 }
603 
604 /* End of file. */
605