1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 //
5 // AUTHOR: Rahul Kavi rahulkavi[at]live[at]com
6
7 //
8 // This is a implementation of the Logistic Regression algorithm
9 //
10
11 #include "precomp.hpp"
12
13 using namespace std;
14
15 namespace cv {
16 namespace ml {
17
18 class LrParams
19 {
20 public:
LrParams()21 LrParams()
22 {
23 alpha = 0.001;
24 num_iters = 1000;
25 norm = LogisticRegression::REG_L2;
26 train_method = LogisticRegression::BATCH;
27 mini_batch_size = 1;
28 term_crit = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, num_iters, alpha);
29 }
30
31 double alpha; //!< learning rate.
32 int num_iters; //!< number of iterations.
33 int norm;
34 int train_method;
35 int mini_batch_size;
36 TermCriteria term_crit;
37 };
38
39 class LogisticRegressionImpl CV_FINAL : public LogisticRegression
40 {
41 public:
42
LogisticRegressionImpl()43 LogisticRegressionImpl() { }
~LogisticRegressionImpl()44 virtual ~LogisticRegressionImpl() {}
45
getLearningRate() const46 inline double getLearningRate() const CV_OVERRIDE { return params.alpha; }
setLearningRate(double val)47 inline void setLearningRate(double val) CV_OVERRIDE { params.alpha = val; }
getIterations() const48 inline int getIterations() const CV_OVERRIDE { return params.num_iters; }
setIterations(int val)49 inline void setIterations(int val) CV_OVERRIDE { params.num_iters = val; }
getRegularization() const50 inline int getRegularization() const CV_OVERRIDE { return params.norm; }
setRegularization(int val)51 inline void setRegularization(int val) CV_OVERRIDE { params.norm = val; }
getTrainMethod() const52 inline int getTrainMethod() const CV_OVERRIDE { return params.train_method; }
setTrainMethod(int val)53 inline void setTrainMethod(int val) CV_OVERRIDE { params.train_method = val; }
getMiniBatchSize() const54 inline int getMiniBatchSize() const CV_OVERRIDE { return params.mini_batch_size; }
setMiniBatchSize(int val)55 inline void setMiniBatchSize(int val) CV_OVERRIDE { params.mini_batch_size = val; }
getTermCriteria() const56 inline TermCriteria getTermCriteria() const CV_OVERRIDE { return params.term_crit; }
setTermCriteria(TermCriteria val)57 inline void setTermCriteria(TermCriteria val) CV_OVERRIDE { params.term_crit = val; }
58
59 virtual bool train( const Ptr<TrainData>& trainData, int=0 ) CV_OVERRIDE;
60 virtual float predict(InputArray samples, OutputArray results, int flags=0) const CV_OVERRIDE;
61 virtual void clear() CV_OVERRIDE;
62 virtual void write(FileStorage& fs) const CV_OVERRIDE;
63 virtual void read(const FileNode& fn) CV_OVERRIDE;
get_learnt_thetas() const64 virtual Mat get_learnt_thetas() const CV_OVERRIDE { return learnt_thetas; }
getVarCount() const65 virtual int getVarCount() const CV_OVERRIDE { return learnt_thetas.cols; }
isTrained() const66 virtual bool isTrained() const CV_OVERRIDE { return !learnt_thetas.empty(); }
isClassifier() const67 virtual bool isClassifier() const CV_OVERRIDE { return true; }
getDefaultName() const68 virtual String getDefaultName() const CV_OVERRIDE { return "opencv_ml_lr"; }
69 protected:
70 Mat calc_sigmoid(const Mat& data) const;
71 double compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
72 void compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient );
73 Mat batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
74 Mat mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta);
75 bool set_label_map(const Mat& _labels_i);
76 Mat remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const;
77 protected:
78 LrParams params;
79 Mat learnt_thetas;
80 map<int, int> forward_mapper;
81 map<int, int> reverse_mapper;
82 Mat labels_o;
83 Mat labels_n;
84 };
85
create()86 Ptr<LogisticRegression> LogisticRegression::create()
87 {
88 return makePtr<LogisticRegressionImpl>();
89 }
90
load(const String & filepath,const String & nodeName)91 Ptr<LogisticRegression> LogisticRegression::load(const String& filepath, const String& nodeName)
92 {
93 return Algorithm::load<LogisticRegression>(filepath, nodeName);
94 }
95
96
train(const Ptr<TrainData> & trainData,int)97 bool LogisticRegressionImpl::train(const Ptr<TrainData>& trainData, int)
98 {
99 CV_TRACE_FUNCTION_SKIP_NESTED();
100 CV_Assert(!trainData.empty());
101
102 // return value
103 bool ok = false;
104 clear();
105 Mat _data_i = trainData->getSamples();
106 Mat _labels_i = trainData->getResponses();
107
108 // check size and type of training data
109 CV_Assert( !_labels_i.empty() && !_data_i.empty());
110 if(_labels_i.cols != 1)
111 {
112 CV_Error( CV_StsBadArg, "labels should be a column matrix" );
113 }
114 if(_data_i.type() != CV_32FC1 || _labels_i.type() != CV_32FC1)
115 {
116 CV_Error( CV_StsBadArg, "data and labels must be a floating point matrix" );
117 }
118 if(_labels_i.rows != _data_i.rows)
119 {
120 CV_Error( CV_StsBadArg, "number of rows in data and labels should be equal" );
121 }
122
123 // class labels
124 set_label_map(_labels_i);
125 Mat labels_l = remap_labels(_labels_i, this->forward_mapper);
126 int num_classes = (int) this->forward_mapper.size();
127 if(num_classes < 2)
128 {
129 CV_Error( CV_StsBadArg, "data should have atleast 2 classes" );
130 }
131
132 // add a column of ones to the data (bias/intercept term)
133 Mat data_t;
134 hconcat( cv::Mat::ones( _data_i.rows, 1, CV_32F ), _data_i, data_t );
135
136 // coefficient matrix (zero-initialized)
137 Mat thetas;
138 Mat init_theta = Mat::zeros(data_t.cols, 1, CV_32F);
139
140 // fit the model (handles binary and multiclass cases)
141 Mat new_theta;
142 Mat labels;
143 if(num_classes == 2)
144 {
145 labels_l.convertTo(labels, CV_32F);
146 if(this->params.train_method == LogisticRegression::BATCH)
147 new_theta = batch_gradient_descent(data_t, labels, init_theta);
148 else
149 new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
150 thetas = new_theta.t();
151 }
152 else
153 {
154 /* take each class and rename classes you will get a theta per class
155 as in multi class class scenario, we will have n thetas for n classes */
156 thetas.create(num_classes, data_t.cols, CV_32F);
157 Mat labels_binary;
158 int ii = 0;
159 for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
160 {
161 // one-vs-rest (OvR) scheme
162 labels_binary = (labels_l == it->second)/255;
163 labels_binary.convertTo(labels, CV_32F);
164 if(this->params.train_method == LogisticRegression::BATCH)
165 new_theta = batch_gradient_descent(data_t, labels, init_theta);
166 else
167 new_theta = mini_batch_gradient_descent(data_t, labels, init_theta);
168 hconcat(new_theta.t(), thetas.row(ii));
169 ii += 1;
170 }
171 }
172
173 // check that the estimates are stable and finite
174 this->learnt_thetas = thetas.clone();
175 if( cvIsNaN( (double)sum(this->learnt_thetas)[0] ) )
176 {
177 CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
178 }
179
180 // success
181 ok = true;
182 return ok;
183 }
184
predict(InputArray samples,OutputArray results,int flags) const185 float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int flags) const
186 {
187 // check if learnt_mats array is populated
188 if(!this->isTrained())
189 {
190 CV_Error( CV_StsBadArg, "classifier should be trained first" );
191 }
192
193 // coefficient matrix
194 Mat thetas;
195 if ( learnt_thetas.type() == CV_32F )
196 {
197 thetas = learnt_thetas;
198 }
199 else
200 {
201 this->learnt_thetas.convertTo( thetas, CV_32F );
202 }
203 CV_Assert(thetas.rows > 0);
204
205 // data samples
206 Mat data = samples.getMat();
207 if(data.type() != CV_32F)
208 {
209 CV_Error( CV_StsBadArg, "data must be of floating type" );
210 }
211
212 // add a column of ones to the data (bias/intercept term)
213 Mat data_t;
214 hconcat( cv::Mat::ones( data.rows, 1, CV_32F ), data, data_t );
215 CV_Assert(data_t.cols == thetas.cols);
216
217 // predict class labels for samples (handles binary and multiclass cases)
218 Mat labels_c;
219 Mat pred_m;
220 Mat temp_pred;
221 if(thetas.rows == 1)
222 {
223 // apply sigmoid function
224 temp_pred = calc_sigmoid(data_t * thetas.t());
225 CV_Assert(temp_pred.cols==1);
226 pred_m = temp_pred.clone();
227
228 // if greater than 0.5, predict class 0 or predict class 1
229 temp_pred = (temp_pred > 0.5f) / 255;
230 temp_pred.convertTo(labels_c, CV_32S);
231 }
232 else
233 {
234 // apply sigmoid function
235 pred_m.create(data_t.rows, thetas.rows, data.type());
236 for(int i = 0; i < thetas.rows; i++)
237 {
238 temp_pred = calc_sigmoid(data_t * thetas.row(i).t());
239 vconcat(temp_pred, pred_m.col(i));
240 }
241
242 // predict class with the maximum output
243 Point max_loc;
244 Mat labels;
245 for(int i = 0; i < pred_m.rows; i++)
246 {
247 temp_pred = pred_m.row(i);
248 minMaxLoc( temp_pred, NULL, NULL, NULL, &max_loc );
249 labels.push_back(max_loc.x);
250 }
251 labels.convertTo(labels_c, CV_32S);
252 }
253
254 // return label of the predicted class. class names can be 1,2,3,...
255 Mat pred_labs = remap_labels(labels_c, this->reverse_mapper);
256 pred_labs.convertTo(pred_labs, CV_32S);
257
258 // return either the labels or the raw output
259 if ( results.needed() )
260 {
261 if ( flags & StatModel::RAW_OUTPUT )
262 {
263 pred_m.copyTo( results );
264 }
265 else
266 {
267 pred_labs.copyTo(results);
268 }
269 }
270
271 return ( pred_labs.empty() ? 0.f : static_cast<float>(pred_labs.at<int>(0)) );
272 }
273
calc_sigmoid(const Mat & data) const274 Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const
275 {
276 CV_TRACE_FUNCTION();
277 Mat dest;
278 exp(-data, dest);
279 return 1.0/(1.0+dest);
280 }
281
compute_cost(const Mat & _data,const Mat & _labels,const Mat & _init_theta)282 double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
283 {
284 CV_TRACE_FUNCTION();
285 float llambda = 0; /*changed llambda from int to float to solve issue #7924*/
286 int m;
287 int n;
288 double cost = 0;
289 double rparameter = 0;
290 Mat theta_b;
291 Mat theta_c;
292 Mat d_a;
293 Mat d_b;
294
295 m = _data.rows;
296 n = _data.cols;
297
298 theta_b = _init_theta(Range(1, n), Range::all());
299
300 if (params.norm != REG_DISABLE)
301 {
302 llambda = 1;
303 }
304
305 if(this->params.norm == LogisticRegression::REG_L1)
306 {
307 rparameter = (llambda/(2*m)) * sum(theta_b)[0];
308 }
309 else
310 {
311 // assuming it to be L2 by default
312 multiply(theta_b, theta_b, theta_c, 1);
313 rparameter = (llambda/(2*m)) * sum(theta_c)[0];
314 }
315
316 d_a = calc_sigmoid(_data * _init_theta);
317 log(d_a, d_a);
318 multiply(d_a, _labels, d_a);
319
320 // use the fact that: log(1 - sigmoid(x)) = log(sigmoid(-x))
321 d_b = calc_sigmoid(- _data * _init_theta);
322 log(d_b, d_b);
323 multiply(d_b, 1-_labels, d_b);
324
325 cost = (-1.0/m) * (sum(d_a)[0] + sum(d_b)[0]);
326 cost = cost + rparameter;
327
328 if(cvIsNaN( cost ) == 1)
329 {
330 CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" );
331 }
332
333 return cost;
334 }
335
336 struct LogisticRegressionImpl_ComputeDradient_Impl : ParallelLoopBody
337 {
338 const Mat* data;
339 const Mat* theta;
340 const Mat* pcal_a;
341 Mat* gradient;
342 double lambda;
343
LogisticRegressionImpl_ComputeDradient_Implcv::ml::LogisticRegressionImpl_ComputeDradient_Impl344 LogisticRegressionImpl_ComputeDradient_Impl(const Mat& _data, const Mat &_theta, const Mat& _pcal_a, const double _lambda, Mat & _gradient)
345 : data(&_data)
346 , theta(&_theta)
347 , pcal_a(&_pcal_a)
348 , gradient(&_gradient)
349 , lambda(_lambda)
350 {
351
352 }
353
operator ()cv::ml::LogisticRegressionImpl_ComputeDradient_Impl354 void operator()(const cv::Range& r) const CV_OVERRIDE
355 {
356 const Mat& _data = *data;
357 const Mat &_theta = *theta;
358 Mat & _gradient = *gradient;
359 const Mat & _pcal_a = *pcal_a;
360 const int m = _data.rows;
361 Mat pcal_ab;
362
363 for (int ii = r.start; ii<r.end; ii++)
364 {
365 Mat pcal_b = _data(Range::all(), Range(ii,ii+1));
366 multiply(_pcal_a, pcal_b, pcal_ab, 1);
367
368 _gradient.row(ii) = (1.0/m)*sum(pcal_ab)[0] + (lambda/m) * _theta.row(ii);
369 }
370 }
371 };
372
compute_gradient(const Mat & _data,const Mat & _labels,const Mat & _theta,const double _lambda,Mat & _gradient)373 void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient )
374 {
375 CV_TRACE_FUNCTION();
376 const int m = _data.rows;
377 Mat pcal_a, pcal_b, pcal_ab;
378
379 const Mat z = _data * _theta;
380
381 CV_Assert( _gradient.rows == _theta.rows && _gradient.cols == _theta.cols );
382
383 pcal_a = calc_sigmoid(z) - _labels;
384 pcal_b = _data(Range::all(), Range(0,1));
385 multiply(pcal_a, pcal_b, pcal_ab, 1);
386
387 _gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0];
388
389 //cout<<"for each training data entry"<<endl;
390 LogisticRegressionImpl_ComputeDradient_Impl invoker(_data, _theta, pcal_a, _lambda, _gradient);
391 cv::parallel_for_(cv::Range(1, _gradient.rows), invoker);
392 }
393
394
batch_gradient_descent(const Mat & _data,const Mat & _labels,const Mat & _init_theta)395 Mat LogisticRegressionImpl::batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
396 {
397 CV_TRACE_FUNCTION();
398 // implements batch gradient descent
399 if(this->params.alpha<=0)
400 {
401 CV_Error( CV_StsBadArg, "check training parameters (learning rate) for the classifier" );
402 }
403
404 if(this->params.num_iters <= 0)
405 {
406 CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
407 }
408
409 int llambda = 0;
410 int m;
411 Mat theta_p = _init_theta.clone();
412 Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
413 m = _data.rows;
414
415 if (params.norm != REG_DISABLE)
416 {
417 llambda = 1;
418 }
419
420 for(int i = 0;i<this->params.num_iters;i++)
421 {
422 // this seems to only be called to ensure that cost is not NaN
423 compute_cost(_data, _labels, theta_p);
424
425 compute_gradient( _data, _labels, theta_p, llambda, gradient );
426
427 theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
428 }
429 return theta_p;
430 }
431
mini_batch_gradient_descent(const Mat & _data,const Mat & _labels,const Mat & _init_theta)432 Mat LogisticRegressionImpl::mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta)
433 {
434 // implements batch gradient descent
435 int lambda_l = 0;
436 int m;
437 int j = 0;
438 int size_b = this->params.mini_batch_size;
439
440 if(this->params.mini_batch_size <= 0 || this->params.alpha == 0)
441 {
442 CV_Error( CV_StsBadArg, "check training parameters for the classifier" );
443 }
444
445 if(this->params.num_iters <= 0)
446 {
447 CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" );
448 }
449
450 Mat theta_p = _init_theta.clone();
451 Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() );
452 Mat data_d;
453 Mat labels_l;
454
455 if (params.norm != REG_DISABLE)
456 {
457 lambda_l = 1;
458 }
459
460 for(int i = 0;i<this->params.term_crit.maxCount;i++)
461 {
462 if(j+size_b<=_data.rows)
463 {
464 data_d = _data(Range(j,j+size_b), Range::all());
465 labels_l = _labels(Range(j,j+size_b),Range::all());
466 }
467 else
468 {
469 data_d = _data(Range(j, _data.rows), Range::all());
470 labels_l = _labels(Range(j, _labels.rows),Range::all());
471 }
472
473 m = data_d.rows;
474
475 // this seems to only be called to ensure that cost is not NaN
476 compute_cost(data_d, labels_l, theta_p);
477
478 compute_gradient(data_d, labels_l, theta_p, lambda_l, gradient);
479
480 theta_p = theta_p - ( static_cast<double>(this->params.alpha)/m)*gradient;
481
482 j += this->params.mini_batch_size;
483
484 // if parsed through all data variables
485 if (j >= _data.rows) {
486 j = 0;
487 }
488 }
489 return theta_p;
490 }
491
set_label_map(const Mat & _labels_i)492 bool LogisticRegressionImpl::set_label_map(const Mat &_labels_i)
493 {
494 // this function creates two maps to map user defined labels to program friendly labels two ways.
495 int ii = 0;
496 Mat labels;
497
498 this->labels_o = Mat(0,1, CV_8U);
499 this->labels_n = Mat(0,1, CV_8U);
500
501 _labels_i.convertTo(labels, CV_32S);
502
503 for(int i = 0;i<labels.rows;i++)
504 {
505 this->forward_mapper[labels.at<int>(i)] += 1;
506 }
507
508 for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
509 {
510 this->forward_mapper[it->first] = ii;
511 this->labels_o.push_back(it->first);
512 this->labels_n.push_back(ii);
513 ii += 1;
514 }
515
516 for(map<int,int>::iterator it = this->forward_mapper.begin(); it != this->forward_mapper.end(); ++it)
517 {
518 this->reverse_mapper[it->second] = it->first;
519 }
520
521 return true;
522 }
523
remap_labels(const Mat & _labels_i,const map<int,int> & lmap) const524 Mat LogisticRegressionImpl::remap_labels(const Mat& _labels_i, const map<int, int>& lmap) const
525 {
526 Mat labels;
527 _labels_i.convertTo(labels, CV_32S);
528
529 Mat new_labels = Mat::zeros(labels.rows, labels.cols, labels.type());
530
531 CV_Assert( !lmap.empty() );
532
533 for(int i =0;i<labels.rows;i++)
534 {
535 map<int, int>::const_iterator val = lmap.find(labels.at<int>(i,0));
536 CV_Assert(val != lmap.end());
537 new_labels.at<int>(i,0) = val->second;
538 }
539 return new_labels;
540 }
541
clear()542 void LogisticRegressionImpl::clear()
543 {
544 this->learnt_thetas.release();
545 this->labels_o.release();
546 this->labels_n.release();
547 }
548
write(FileStorage & fs) const549 void LogisticRegressionImpl::write(FileStorage& fs) const
550 {
551 // check if open
552 if(fs.isOpened() == 0)
553 {
554 CV_Error(CV_StsBadArg,"file can't open. Check file path");
555 }
556 writeFormat(fs);
557 string desc = "Logistic Regression Classifier";
558 fs<<"classifier"<<desc.c_str();
559 fs<<"alpha"<<this->params.alpha;
560 fs<<"iterations"<<this->params.num_iters;
561 fs<<"norm"<<this->params.norm;
562 fs<<"train_method"<<this->params.train_method;
563 if(this->params.train_method == LogisticRegression::MINI_BATCH)
564 {
565 fs<<"mini_batch_size"<<this->params.mini_batch_size;
566 }
567 fs<<"learnt_thetas"<<this->learnt_thetas;
568 fs<<"n_labels"<<this->labels_n;
569 fs<<"o_labels"<<this->labels_o;
570 }
571
read(const FileNode & fn)572 void LogisticRegressionImpl::read(const FileNode& fn)
573 {
574 // check if empty
575 if(fn.empty())
576 {
577 CV_Error( CV_StsBadArg, "empty FileNode object" );
578 }
579
580 this->params.alpha = (double)fn["alpha"];
581 this->params.num_iters = (int)fn["iterations"];
582 this->params.norm = (int)fn["norm"];
583 this->params.train_method = (int)fn["train_method"];
584
585 if(this->params.train_method == LogisticRegression::MINI_BATCH)
586 {
587 this->params.mini_batch_size = (int)fn["mini_batch_size"];
588 }
589
590 fn["learnt_thetas"] >> this->learnt_thetas;
591 fn["o_labels"] >> this->labels_o;
592 fn["n_labels"] >> this->labels_n;
593
594 for(int ii =0;ii<labels_o.rows;ii++)
595 {
596 this->forward_mapper[labels_o.at<int>(ii,0)] = labels_n.at<int>(ii,0);
597 this->reverse_mapper[labels_n.at<int>(ii,0)] = labels_o.at<int>(ii,0);
598 }
599 }
600
601 }
602 }
603
604 /* End of file. */
605