1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // Intel License Agreement
11 //
12 // Copyright (C) 2000, Intel Corporation, all rights reserved.
13 // Third party copyrights are property of their respective owners.
14 //
15 // Redistribution and use in source and binary forms, with or without modification,
16 // are permitted provided that the following conditions are met:
17 //
18 // * Redistribution's of source code must retain the above copyright notice,
19 // this list of conditions and the following disclaimer.
20 //
21 // * Redistribution's in binary form must reproduce the above copyright notice,
22 // this list of conditions and the following disclaimer in the documentation
23 // and/or other materials provided with the distribution.
24 //
25 // * The name of Intel Corporation may not be used to endorse or promote products
26 // derived from this software without specific prior written permission.
27 //
28 // This software is provided by the copyright holders and contributors "as is" and
29 // any express or implied warranties, including, but not limited to, the implied
30 // warranties of merchantability and fitness for a particular purpose are disclaimed.
31 // In no event shall the Intel Corporation or contributors be liable for any direct,
32 // indirect, incidental, special, exemplary, or consequential damages
33 // (including, but not limited to, procurement of substitute goods or services;
34 // loss of use, data, or profits; or business interruption) however caused
35 // and on any theory of liability, whether in contract, strict liability,
36 // or tort (including negligence or otherwise) arising in any way out of
37 // the use of this software, even if advised of the possibility of such damage.
38 //
39 //M*/
40
41 #include "precomp.hpp"
42
43 namespace cv {
44 namespace ml {
45
46
47 class NormalBayesClassifierImpl : public NormalBayesClassifier
48 {
49 public:
NormalBayesClassifierImpl()50 NormalBayesClassifierImpl()
51 {
52 nallvars = 0;
53 }
54
train(const Ptr<TrainData> & trainData,int flags)55 bool train( const Ptr<TrainData>& trainData, int flags ) CV_OVERRIDE
56 {
57 CV_Assert(!trainData.empty());
58 const float min_variation = FLT_EPSILON;
59 Mat responses = trainData->getNormCatResponses();
60 Mat __cls_labels = trainData->getClassLabels();
61 Mat __var_idx = trainData->getVarIdx();
62 Mat samples = trainData->getTrainSamples();
63 int nclasses = (int)__cls_labels.total();
64
65 int nvars = trainData->getNVars();
66 int s, c1, c2, cls;
67
68 int __nallvars = trainData->getNAllVars();
69 bool update = (flags & UPDATE_MODEL) != 0;
70
71 if( !update )
72 {
73 nallvars = __nallvars;
74 count.resize(nclasses);
75 sum.resize(nclasses);
76 productsum.resize(nclasses);
77 avg.resize(nclasses);
78 inv_eigen_values.resize(nclasses);
79 cov_rotate_mats.resize(nclasses);
80
81 for( cls = 0; cls < nclasses; cls++ )
82 {
83 count[cls] = Mat::zeros( 1, nvars, CV_32SC1 );
84 sum[cls] = Mat::zeros( 1, nvars, CV_64FC1 );
85 productsum[cls] = Mat::zeros( nvars, nvars, CV_64FC1 );
86 avg[cls] = Mat::zeros( 1, nvars, CV_64FC1 );
87 inv_eigen_values[cls] = Mat::zeros( 1, nvars, CV_64FC1 );
88 cov_rotate_mats[cls] = Mat::zeros( nvars, nvars, CV_64FC1 );
89 }
90
91 var_idx = __var_idx;
92 cls_labels = __cls_labels;
93
94 c.create(1, nclasses, CV_64FC1);
95 }
96 else
97 {
98 // check that the new training data has the same dimensionality etc.
99 if( nallvars != __nallvars ||
100 var_idx.size() != __var_idx.size() ||
101 norm(var_idx, __var_idx, NORM_INF) != 0 ||
102 cls_labels.size() != __cls_labels.size() ||
103 norm(cls_labels, __cls_labels, NORM_INF) != 0 )
104 CV_Error( CV_StsBadArg,
105 "The new training data is inconsistent with the original training data; varIdx and the class labels should be the same" );
106 }
107
108 Mat cov( nvars, nvars, CV_64FC1 );
109 int nsamples = samples.rows;
110
111 // process train data (count, sum , productsum)
112 for( s = 0; s < nsamples; s++ )
113 {
114 cls = responses.at<int>(s);
115 int* count_data = count[cls].ptr<int>();
116 double* sum_data = sum[cls].ptr<double>();
117 double* prod_data = productsum[cls].ptr<double>();
118 const float* train_vec = samples.ptr<float>(s);
119
120 for( c1 = 0; c1 < nvars; c1++, prod_data += nvars )
121 {
122 double val1 = train_vec[c1];
123 sum_data[c1] += val1;
124 count_data[c1]++;
125 for( c2 = c1; c2 < nvars; c2++ )
126 prod_data[c2] += train_vec[c2]*val1;
127 }
128 }
129
130 Mat vt;
131
132 // calculate avg, covariance matrix, c
133 for( cls = 0; cls < nclasses; cls++ )
134 {
135 double det = 1;
136 int i, j;
137 Mat& w = inv_eigen_values[cls];
138 int* count_data = count[cls].ptr<int>();
139 double* avg_data = avg[cls].ptr<double>();
140 double* sum1 = sum[cls].ptr<double>();
141
142 completeSymm(productsum[cls], 0);
143
144 for( j = 0; j < nvars; j++ )
145 {
146 int n = count_data[j];
147 avg_data[j] = n ? sum1[j] / n : 0.;
148 }
149
150 count_data = count[cls].ptr<int>();
151 avg_data = avg[cls].ptr<double>();
152 sum1 = sum[cls].ptr<double>();
153
154 for( i = 0; i < nvars; i++ )
155 {
156 double* avg2_data = avg[cls].ptr<double>();
157 double* sum2 = sum[cls].ptr<double>();
158 double* prod_data = productsum[cls].ptr<double>(i);
159 double* cov_data = cov.ptr<double>(i);
160 double s1val = sum1[i];
161 double avg1 = avg_data[i];
162 int _count = count_data[i];
163
164 for( j = 0; j <= i; j++ )
165 {
166 double avg2 = avg2_data[j];
167 double cov_val = prod_data[j] - avg1 * sum2[j] - avg2 * s1val + avg1 * avg2 * _count;
168 cov_val = (_count > 1) ? cov_val / (_count - 1) : cov_val;
169 cov_data[j] = cov_val;
170 }
171 }
172
173 completeSymm( cov, 1 );
174
175 SVD::compute(cov, w, cov_rotate_mats[cls], noArray());
176 transpose(cov_rotate_mats[cls], cov_rotate_mats[cls]);
177 cv::max(w, min_variation, w);
178 for( j = 0; j < nvars; j++ )
179 det *= w.at<double>(j);
180
181 divide(1., w, w);
182 c.at<double>(cls) = det > 0 ? log(det) : -700;
183 }
184
185 return true;
186 }
187
188 class NBPredictBody : public ParallelLoopBody
189 {
190 public:
NBPredictBody(const Mat & _c,const vector<Mat> & _cov_rotate_mats,const vector<Mat> & _inv_eigen_values,const vector<Mat> & _avg,const Mat & _samples,const Mat & _vidx,const Mat & _cls_labels,Mat & _results,Mat & _results_prob,bool _rawOutput)191 NBPredictBody( const Mat& _c, const vector<Mat>& _cov_rotate_mats,
192 const vector<Mat>& _inv_eigen_values,
193 const vector<Mat>& _avg,
194 const Mat& _samples, const Mat& _vidx, const Mat& _cls_labels,
195 Mat& _results, Mat& _results_prob, bool _rawOutput )
196 {
197 c = &_c;
198 cov_rotate_mats = &_cov_rotate_mats;
199 inv_eigen_values = &_inv_eigen_values;
200 avg = &_avg;
201 samples = &_samples;
202 vidx = &_vidx;
203 cls_labels = &_cls_labels;
204 results = &_results;
205 results_prob = !_results_prob.empty() ? &_results_prob : 0;
206 rawOutput = _rawOutput;
207 value = 0;
208 }
209
210 const Mat* c;
211 const vector<Mat>* cov_rotate_mats;
212 const vector<Mat>* inv_eigen_values;
213 const vector<Mat>* avg;
214 const Mat* samples;
215 const Mat* vidx;
216 const Mat* cls_labels;
217
218 Mat* results_prob;
219 Mat* results;
220 float* value;
221 bool rawOutput;
222
operator ()(const Range & range) const223 void operator()(const Range& range) const CV_OVERRIDE
224 {
225 int cls = -1;
226 int rtype = 0, rptype = 0;
227 size_t rstep = 0, rpstep = 0;
228 int nclasses = (int)cls_labels->total();
229 int nvars = avg->at(0).cols;
230 double probability = 0;
231 const int* vptr = vidx && !vidx->empty() ? vidx->ptr<int>() : 0;
232
233 if (results)
234 {
235 rtype = results->type();
236 rstep = results->isContinuous() ? 1 : results->step/results->elemSize();
237 }
238 if (results_prob)
239 {
240 rptype = results_prob->type();
241 rpstep = results_prob->isContinuous() ? results_prob->cols : results_prob->step/results_prob->elemSize();
242 }
243 // allocate memory and initializing headers for calculating
244 cv::AutoBuffer<double> _buffer(nvars*2);
245 double* _diffin = _buffer.data();
246 double* _diffout = _buffer.data() + nvars;
247 Mat diffin( 1, nvars, CV_64FC1, _diffin );
248 Mat diffout( 1, nvars, CV_64FC1, _diffout );
249
250 for(int k = range.start; k < range.end; k++ )
251 {
252 double opt = FLT_MAX;
253
254 for(int i = 0; i < nclasses; i++ )
255 {
256 double cur = c->at<double>(i);
257 const Mat& u = cov_rotate_mats->at(i);
258 const Mat& w = inv_eigen_values->at(i);
259
260 const double* avg_data = avg->at(i).ptr<double>();
261 const float* x = samples->ptr<float>(k);
262
263 // cov = u w u' --> cov^(-1) = u w^(-1) u'
264 for(int j = 0; j < nvars; j++ )
265 _diffin[j] = avg_data[j] - x[vptr ? vptr[j] : j];
266
267 gemm( diffin, u, 1, noArray(), 0, diffout, GEMM_2_T );
268 for(int j = 0; j < nvars; j++ )
269 {
270 double d = _diffout[j];
271 cur += d*d*w.ptr<double>()[j];
272 }
273
274 if( cur < opt )
275 {
276 cls = i;
277 opt = cur;
278 }
279 probability = exp( -0.5 * cur );
280
281 if( results_prob )
282 {
283 if ( rptype == CV_32FC1 )
284 results_prob->ptr<float>()[k*rpstep + i] = (float)probability;
285 else
286 results_prob->ptr<double>()[k*rpstep + i] = probability;
287 }
288 }
289
290 int ival = rawOutput ? cls : cls_labels->at<int>(cls);
291 if( results )
292 {
293 if( rtype == CV_32SC1 )
294 results->ptr<int>()[k*rstep] = ival;
295 else
296 results->ptr<float>()[k*rstep] = (float)ival;
297 }
298 }
299 }
300 };
301
predict(InputArray _samples,OutputArray _results,int flags) const302 float predict( InputArray _samples, OutputArray _results, int flags ) const CV_OVERRIDE
303 {
304 return predictProb(_samples, _results, noArray(), flags);
305 }
306
predictProb(InputArray _samples,OutputArray _results,OutputArray _resultsProb,int flags) const307 float predictProb( InputArray _samples, OutputArray _results, OutputArray _resultsProb, int flags ) const CV_OVERRIDE
308 {
309 int value=0;
310 Mat samples = _samples.getMat(), results, resultsProb;
311 int nsamples = samples.rows, nclasses = (int)cls_labels.total();
312 bool rawOutput = (flags & RAW_OUTPUT) != 0;
313
314 if( samples.type() != CV_32F || samples.cols != nallvars )
315 CV_Error( CV_StsBadArg,
316 "The input samples must be 32f matrix with the number of columns = nallvars" );
317
318 if( (samples.rows > 1) && (! _results.needed()) )
319 CV_Error( CV_StsNullPtr,
320 "When the number of input samples is >1, the output vector of results must be passed" );
321
322 if( _results.needed() )
323 {
324 _results.create(nsamples, 1, CV_32S);
325 results = _results.getMat();
326 }
327 else
328 results = Mat(1, 1, CV_32S, &value);
329
330 if( _resultsProb.needed() )
331 {
332 _resultsProb.create(nsamples, nclasses, CV_32F);
333 resultsProb = _resultsProb.getMat();
334 }
335
336 cv::parallel_for_(cv::Range(0, nsamples),
337 NBPredictBody(c, cov_rotate_mats, inv_eigen_values, avg, samples,
338 var_idx, cls_labels, results, resultsProb, rawOutput));
339
340 return (float)value;
341 }
342
write(FileStorage & fs) const343 void write( FileStorage& fs ) const CV_OVERRIDE
344 {
345 int nclasses = (int)cls_labels.total(), i;
346
347 writeFormat(fs);
348 fs << "var_count" << (var_idx.empty() ? nallvars : (int)var_idx.total());
349 fs << "var_all" << nallvars;
350
351 if( !var_idx.empty() )
352 fs << "var_idx" << var_idx;
353 fs << "cls_labels" << cls_labels;
354
355 fs << "count" << "[";
356 for( i = 0; i < nclasses; i++ )
357 fs << count[i];
358
359 fs << "]" << "sum" << "[";
360 for( i = 0; i < nclasses; i++ )
361 fs << sum[i];
362
363 fs << "]" << "productsum" << "[";
364 for( i = 0; i < nclasses; i++ )
365 fs << productsum[i];
366
367 fs << "]" << "avg" << "[";
368 for( i = 0; i < nclasses; i++ )
369 fs << avg[i];
370
371 fs << "]" << "inv_eigen_values" << "[";
372 for( i = 0; i < nclasses; i++ )
373 fs << inv_eigen_values[i];
374
375 fs << "]" << "cov_rotate_mats" << "[";
376 for( i = 0; i < nclasses; i++ )
377 fs << cov_rotate_mats[i];
378
379 fs << "]";
380
381 fs << "c" << c;
382 }
383
read(const FileNode & fn)384 void read( const FileNode& fn ) CV_OVERRIDE
385 {
386 clear();
387
388 fn["var_all"] >> nallvars;
389
390 if( nallvars <= 0 )
391 CV_Error( CV_StsParseError,
392 "The field \"var_count\" of NBayes classifier is missing or non-positive" );
393
394 fn["var_idx"] >> var_idx;
395 fn["cls_labels"] >> cls_labels;
396
397 int nclasses = (int)cls_labels.total(), i;
398
399 if( cls_labels.empty() || nclasses < 1 )
400 CV_Error( CV_StsParseError, "No or invalid \"cls_labels\" in NBayes classifier" );
401
402 FileNodeIterator
403 count_it = fn["count"].begin(),
404 sum_it = fn["sum"].begin(),
405 productsum_it = fn["productsum"].begin(),
406 avg_it = fn["avg"].begin(),
407 inv_eigen_values_it = fn["inv_eigen_values"].begin(),
408 cov_rotate_mats_it = fn["cov_rotate_mats"].begin();
409
410 count.resize(nclasses);
411 sum.resize(nclasses);
412 productsum.resize(nclasses);
413 avg.resize(nclasses);
414 inv_eigen_values.resize(nclasses);
415 cov_rotate_mats.resize(nclasses);
416
417 for( i = 0; i < nclasses; i++, ++count_it, ++sum_it, ++productsum_it, ++avg_it,
418 ++inv_eigen_values_it, ++cov_rotate_mats_it )
419 {
420 *count_it >> count[i];
421 *sum_it >> sum[i];
422 *productsum_it >> productsum[i];
423 *avg_it >> avg[i];
424 *inv_eigen_values_it >> inv_eigen_values[i];
425 *cov_rotate_mats_it >> cov_rotate_mats[i];
426 }
427
428 fn["c"] >> c;
429 }
430
clear()431 void clear() CV_OVERRIDE
432 {
433 count.clear();
434 sum.clear();
435 productsum.clear();
436 avg.clear();
437 inv_eigen_values.clear();
438 cov_rotate_mats.clear();
439
440 var_idx.release();
441 cls_labels.release();
442 c.release();
443 nallvars = 0;
444 }
445
isTrained() const446 bool isTrained() const CV_OVERRIDE { return !avg.empty(); }
isClassifier() const447 bool isClassifier() const CV_OVERRIDE { return true; }
getVarCount() const448 int getVarCount() const CV_OVERRIDE { return nallvars; }
getDefaultName() const449 String getDefaultName() const CV_OVERRIDE { return "opencv_ml_nbayes"; }
450
451 int nallvars;
452 Mat var_idx, cls_labels, c;
453 vector<Mat> count, sum, productsum, avg, inv_eigen_values, cov_rotate_mats;
454 };
455
456
create()457 Ptr<NormalBayesClassifier> NormalBayesClassifier::create()
458 {
459 Ptr<NormalBayesClassifierImpl> p = makePtr<NormalBayesClassifierImpl>();
460 return p;
461 }
462
load(const String & filepath,const String & nodeName)463 Ptr<NormalBayesClassifier> NormalBayesClassifier::load(const String& filepath, const String& nodeName)
464 {
465 return Algorithm::load<NormalBayesClassifier>(filepath, nodeName);
466 }
467
468 }
469 }
470
471 /* End of file. */
472