1 #include "opencv2/core.hpp"
2 #include "opencv2/core/utility.hpp"
3
4 using cv::Size;
5 using cv::Mat;
6 using cv::Point;
7 using cv::FileStorage;
8 using cv::Rect;
9 using cv::Ptr;
10 using cv::FileNode;
11 using cv::Mat_;
12 using cv::Range;
13 using cv::FileNodeIterator;
14 using cv::ParallelLoopBody;
15
16
17 using cv::Size;
18 using cv::Mat;
19 using cv::Point;
20 using cv::FileStorage;
21 using cv::Rect;
22 using cv::Ptr;
23 using cv::FileNode;
24 using cv::Mat_;
25 using cv::Range;
26 using cv::FileNodeIterator;
27 using cv::ParallelLoopBody;
28
29
30 #include "boost.h"
31 #include "cascadeclassifier.h"
32 #include <queue>
33
34 #include "cvconfig.h"
35
36 using namespace std;
37
38 static inline double
logRatio(double val)39 logRatio( double val )
40 {
41 const double eps = 1e-5;
42
43 val = max( val, eps );
44 val = min( val, 1. - eps );
45 return log( val/(1. - val) );
46 }
47
48 template<typename T, typename Idx>
49 class LessThanIdx
50 {
51 public:
LessThanIdx(const T * _arr)52 LessThanIdx( const T* _arr ) : arr(_arr) {}
operator ()(Idx a,Idx b) const53 bool operator()(Idx a, Idx b) const { return arr[a] < arr[b]; }
54 const T* arr;
55 };
56
cvAlign(int size,int align)57 static inline int cvAlign( int size, int align )
58 {
59 CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
60 return (size + align - 1) & -align;
61 }
62
63 #define CV_THRESHOLD_EPS (0.00001F)
64
65 static const int MinBlockSize = 1 << 16;
66 static const int BlockSizeDelta = 1 << 10;
67
68 // TODO remove this code duplication with ml/precomp.hpp
69
icvCmpIntegers(const void * a,const void * b)70 static int CV_CDECL icvCmpIntegers( const void* a, const void* b )
71 {
72 return *(const int*)a - *(const int*)b;
73 }
74
cvPreprocessIndexArray(const CvMat * idx_arr,int data_arr_size,bool check_for_duplicates=false)75 static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false )
76 {
77 CvMat* idx = 0;
78
79 CV_FUNCNAME( "cvPreprocessIndexArray" );
80
81 __CV_BEGIN__;
82
83 int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
84 uchar* srcb = 0;
85 int* srci = 0;
86 int* dsti;
87
88 if( !CV_IS_MAT(idx_arr) )
89 CV_ERROR( CV_StsBadArg, "Invalid index array" );
90
91 if( idx_arr->rows != 1 && idx_arr->cols != 1 )
92 CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
93
94 idx_total = idx_arr->rows + idx_arr->cols - 1;
95 srcb = idx_arr->data.ptr;
96 srci = idx_arr->data.i;
97
98 type = CV_MAT_TYPE(idx_arr->type);
99 step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
100
101 switch( type )
102 {
103 case CV_8UC1:
104 case CV_8SC1:
105 // idx_arr is array of 1's and 0's -
106 // i.e. it is a mask of the selected components
107 if( idx_total != data_arr_size )
108 CV_ERROR( CV_StsUnmatchedSizes,
109 "Component mask should contain as many elements as the total number of input variables" );
110
111 for( i = 0; i < idx_total; i++ )
112 idx_selected += srcb[i*step] != 0;
113
114 if( idx_selected == 0 )
115 CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
116
117 break;
118 case CV_32SC1:
119 // idx_arr is array of integer indices of selected components
120 if( idx_total > data_arr_size )
121 CV_ERROR( CV_StsOutOfRange,
122 "index array may not contain more elements than the total number of input variables" );
123 idx_selected = idx_total;
124 // check if sorted already
125 for( i = 0; i < idx_total; i++ )
126 {
127 int val = srci[i*step];
128 if( val >= prev )
129 {
130 is_sorted = 0;
131 break;
132 }
133 prev = val;
134 }
135 break;
136 default:
137 CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
138 "(it should be 8uC1, 8sC1 or 32sC1)" );
139 }
140
141 CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
142 dsti = idx->data.i;
143
144 if( type < CV_32SC1 )
145 {
146 for( i = 0; i < idx_total; i++ )
147 if( srcb[i*step] )
148 *dsti++ = i;
149 }
150 else
151 {
152 for( i = 0; i < idx_total; i++ )
153 dsti[i] = srci[i*step];
154
155 if( !is_sorted )
156 qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
157
158 if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
159 CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
160
161 if( check_for_duplicates )
162 {
163 for( i = 1; i < idx_total; i++ )
164 if( dsti[i] <= dsti[i-1] )
165 CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
166 }
167 }
168
169 __CV_END__;
170
171 if( cvGetErrStatus() < 0 )
172 cvReleaseMat( &idx );
173
174 return idx;
175 }
176
177 //----------------------------- CascadeBoostParams -------------------------------------------------
178
CvCascadeBoostParams()179 CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
180 {
181 boost_type = CvBoost::GENTLE;
182 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
183 }
184
CvCascadeBoostParams(int _boostType,float _minHitRate,float _maxFalseAlarm,double _weightTrimRate,int _maxDepth,int _maxWeakCount)185 CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
186 float _minHitRate, float _maxFalseAlarm,
187 double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
188 CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
189 {
190 boost_type = CvBoost::GENTLE;
191 minHitRate = _minHitRate;
192 maxFalseAlarm = _maxFalseAlarm;
193 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
194 }
195
write(FileStorage & fs) const196 void CvCascadeBoostParams::write( FileStorage &fs ) const
197 {
198 string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
199 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
200 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
201 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
202 CV_Assert( !boostTypeStr.empty() );
203 fs << CC_BOOST_TYPE << boostTypeStr;
204 fs << CC_MINHITRATE << minHitRate;
205 fs << CC_MAXFALSEALARM << maxFalseAlarm;
206 fs << CC_TRIM_RATE << weight_trim_rate;
207 fs << CC_MAX_DEPTH << max_depth;
208 fs << CC_WEAK_COUNT << weak_count;
209 }
210
read(const FileNode & node)211 bool CvCascadeBoostParams::read( const FileNode &node )
212 {
213 string boostTypeStr;
214 FileNode rnode = node[CC_BOOST_TYPE];
215 rnode >> boostTypeStr;
216 boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
217 !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
218 !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
219 !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
220 if (boost_type == -1)
221 CV_Error( CV_StsBadArg, "unsupported Boost type" );
222 node[CC_MINHITRATE] >> minHitRate;
223 node[CC_MAXFALSEALARM] >> maxFalseAlarm;
224 node[CC_TRIM_RATE] >> weight_trim_rate ;
225 node[CC_MAX_DEPTH] >> max_depth ;
226 node[CC_WEAK_COUNT] >> weak_count ;
227 if ( minHitRate <= 0 || minHitRate > 1 ||
228 maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
229 weight_trim_rate <= 0 || weight_trim_rate > 1 ||
230 max_depth <= 0 || weak_count <= 0 )
231 CV_Error( CV_StsBadArg, "bad parameters range");
232 return true;
233 }
234
printDefaults() const235 void CvCascadeBoostParams::printDefaults() const
236 {
237 cout << "--boostParams--" << endl;
238 cout << " [-bt <{" << CC_DISCRETE_BOOST << ", "
239 << CC_REAL_BOOST << ", "
240 << CC_LOGIT_BOOST ", "
241 << CC_GENTLE_BOOST << "(default)}>]" << endl;
242 cout << " [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
243 cout << " [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
244 cout << " [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
245 cout << " [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
246 cout << " [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
247 }
248
printAttrs() const249 void CvCascadeBoostParams::printAttrs() const
250 {
251 string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
252 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
253 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
254 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
255 CV_Assert( !boostTypeStr.empty() );
256 cout << "boostType: " << boostTypeStr << endl;
257 cout << "minHitRate: " << minHitRate << endl;
258 cout << "maxFalseAlarmRate: " << maxFalseAlarm << endl;
259 cout << "weightTrimRate: " << weight_trim_rate << endl;
260 cout << "maxDepth: " << max_depth << endl;
261 cout << "maxWeakCount: " << weak_count << endl;
262 }
263
scanAttr(const string prmName,const string val)264 bool CvCascadeBoostParams::scanAttr( const string prmName, const string val)
265 {
266 bool res = true;
267
268 if( !prmName.compare( "-bt" ) )
269 {
270 boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
271 !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
272 !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
273 !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
274 if (boost_type == -1)
275 res = false;
276 }
277 else if( !prmName.compare( "-minHitRate" ) )
278 {
279 minHitRate = (float) atof( val.c_str() );
280 }
281 else if( !prmName.compare( "-maxFalseAlarmRate" ) )
282 {
283 maxFalseAlarm = (float) atof( val.c_str() );
284 }
285 else if( !prmName.compare( "-weightTrimRate" ) )
286 {
287 weight_trim_rate = (float) atof( val.c_str() );
288 }
289 else if( !prmName.compare( "-maxDepth" ) )
290 {
291 max_depth = atoi( val.c_str() );
292 }
293 else if( !prmName.compare( "-maxWeakCount" ) )
294 {
295 weak_count = atoi( val.c_str() );
296 }
297 else
298 res = false;
299
300 return res;
301 }
302
subsample_data(const CvMat * _subsample_idx)303 CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx )
304 {
305 CvDTreeNode* root = 0;
306 CvMat* isubsample_idx = 0;
307 CvMat* subsample_co = 0;
308
309 bool isMakeRootCopy = true;
310
311 if( !data_root )
312 CV_Error( CV_StsError, "No training data has been set" );
313
314 if( _subsample_idx )
315 {
316 CV_Assert( (isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )) != 0 );
317
318 if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count )
319 {
320 const int* sidx = isubsample_idx->data.i;
321 for( int i = 0; i < sample_count; i++ )
322 {
323 if( sidx[i] != i )
324 {
325 isMakeRootCopy = false;
326 break;
327 }
328 }
329 }
330 else
331 isMakeRootCopy = false;
332 }
333
334 if( isMakeRootCopy )
335 {
336 // make a copy of the root node
337 CvDTreeNode temp;
338 int i;
339 root = new_node( 0, 1, 0, 0 );
340 temp = *root;
341 *root = *data_root;
342 root->num_valid = temp.num_valid;
343 if( root->num_valid )
344 {
345 for( i = 0; i < var_count; i++ )
346 root->num_valid[i] = data_root->num_valid[i];
347 }
348 root->cv_Tn = temp.cv_Tn;
349 root->cv_node_risk = temp.cv_node_risk;
350 root->cv_node_error = temp.cv_node_error;
351 }
352 else
353 {
354 int* sidx = isubsample_idx->data.i;
355 // co - array of count/offset pairs (to handle duplicated values in _subsample_idx)
356 int* co, cur_ofs = 0;
357 int workVarCount = get_work_var_count();
358 int count = isubsample_idx->rows + isubsample_idx->cols - 1;
359
360 root = new_node( 0, count, 1, 0 );
361
362 CV_Assert( (subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 )) != 0);
363 cvZero( subsample_co );
364 co = subsample_co->data.i;
365 for( int i = 0; i < count; i++ )
366 co[sidx[i]*2]++;
367 for( int i = 0; i < sample_count; i++ )
368 {
369 if( co[i*2] )
370 {
371 co[i*2+1] = cur_ofs;
372 cur_ofs += co[i*2];
373 }
374 else
375 co[i*2+1] = -1;
376 }
377
378 cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
379 // subsample ordered variables
380 for( int vi = 0; vi < numPrecalcIdx; vi++ )
381 {
382 int ci = get_var_type(vi);
383 CV_Assert( ci < 0 );
384
385 int *src_idx_buf = (int*)inn_buf.data();
386 float *src_val_buf = (float*)(src_idx_buf + sample_count);
387 int* sample_indices_buf = (int*)(src_val_buf + sample_count);
388 const int* src_idx = 0;
389 const float* src_val = 0;
390 get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
391
392 int j = 0, idx, count_i;
393 int num_valid = data_root->get_num_valid(vi);
394 CV_Assert( num_valid == sample_count );
395
396 if (is_buf_16u)
397 {
398 unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
399 (size_t)vi*sample_count + data_root->offset);
400 for( int i = 0; i < num_valid; i++ )
401 {
402 idx = src_idx[i];
403 count_i = co[idx*2];
404 if( count_i )
405 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
406 udst_idx[j] = (unsigned short)cur_ofs;
407 }
408 }
409 else
410 {
411 int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
412 (size_t)vi*sample_count + root->offset;
413 for( int i = 0; i < num_valid; i++ )
414 {
415 idx = src_idx[i];
416 count_i = co[idx*2];
417 if( count_i )
418 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
419 idst_idx[j] = cur_ofs;
420 }
421 }
422 }
423
424 // subsample cv_lables
425 const int* src_lbls = get_cv_labels(data_root, (int*)inn_buf.data());
426 if (is_buf_16u)
427 {
428 unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
429 (size_t)(workVarCount-1)*sample_count + root->offset);
430 for( int i = 0; i < count; i++ )
431 udst[i] = (unsigned short)src_lbls[sidx[i]];
432 }
433 else
434 {
435 int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
436 (size_t)(workVarCount-1)*sample_count + root->offset;
437 for( int i = 0; i < count; i++ )
438 idst[i] = src_lbls[sidx[i]];
439 }
440
441 // subsample sample_indices
442 const int* sample_idx_src = get_sample_indices(data_root, (int*)inn_buf.data());
443 if (is_buf_16u)
444 {
445 unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
446 (size_t)workVarCount*sample_count + root->offset);
447 for( int i = 0; i < count; i++ )
448 sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
449 }
450 else
451 {
452 int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
453 (size_t)workVarCount*sample_count + root->offset;
454 for( int i = 0; i < count; i++ )
455 sample_idx_dst[i] = sample_idx_src[sidx[i]];
456 }
457
458 for( int vi = 0; vi < var_count; vi++ )
459 root->set_num_valid(vi, count);
460 }
461
462 cvReleaseMat( &isubsample_idx );
463 cvReleaseMat( &subsample_co );
464
465 return root;
466 }
467
468 //---------------------------- CascadeBoostTrainData -----------------------------
469
CvCascadeBoostTrainData(const CvFeatureEvaluator * _featureEvaluator,const CvDTreeParams & _params)470 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
471 const CvDTreeParams& _params )
472 {
473 is_classifier = true;
474 var_all = var_count = (int)_featureEvaluator->getNumFeatures();
475
476 featureEvaluator = _featureEvaluator;
477 shared = true;
478 set_params( _params );
479 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
480 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
481 if ( featureEvaluator->getMaxCatCount() > 0 )
482 {
483 numPrecalcIdx = 0;
484 cat_var_count = var_count;
485 ord_var_count = 0;
486 for( int vi = 0; vi < var_count; vi++ )
487 {
488 var_type->data.i[vi] = vi;
489 }
490 }
491 else
492 {
493 cat_var_count = 0;
494 ord_var_count = var_count;
495 for( int vi = 1; vi <= var_count; vi++ )
496 {
497 var_type->data.i[vi-1] = -vi;
498 }
499 }
500 var_type->data.i[var_count] = cat_var_count;
501 var_type->data.i[var_count+1] = cat_var_count+1;
502
503 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
504 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
505 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
506 tree_storage = cvCreateMemStorage( treeBlockSize );
507 node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
508 split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
509 }
510
CvCascadeBoostTrainData(const CvFeatureEvaluator * _featureEvaluator,int _numSamples,int _precalcValBufSize,int _precalcIdxBufSize,const CvDTreeParams & _params)511 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
512 int _numSamples,
513 int _precalcValBufSize, int _precalcIdxBufSize,
514 const CvDTreeParams& _params )
515 {
516 setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
517 }
518
setData(const CvFeatureEvaluator * _featureEvaluator,int _numSamples,int _precalcValBufSize,int _precalcIdxBufSize,const CvDTreeParams & _params)519 void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
520 int _numSamples,
521 int _precalcValBufSize, int _precalcIdxBufSize,
522 const CvDTreeParams& _params )
523 {
524 int* idst = 0;
525 unsigned short* udst = 0;
526
527 uint64 effective_buf_size = 0;
528 int effective_buf_height = 0, effective_buf_width = 0;
529
530
531 clear();
532 shared = true;
533 have_labels = true;
534 have_priors = false;
535 is_classifier = true;
536
537 rng = &cv::theRNG();
538
539 set_params( _params );
540
541 CV_Assert( _featureEvaluator );
542 featureEvaluator = _featureEvaluator;
543
544 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
545 _resp = cvMat(featureEvaluator->getCls());
546 responses = &_resp;
547 // TODO: check responses: elements must be 0 or 1
548
549 if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
550 CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
551
552 var_count = var_all = featureEvaluator->getNumFeatures() * featureEvaluator->getFeatureSize();
553 sample_count = _numSamples;
554
555 is_buf_16u = false;
556 if (sample_count < 65536)
557 is_buf_16u = true;
558
559 numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count );
560 numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. /
561 ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
562
563 assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 );
564
565 valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
566 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
567
568 if ( featureEvaluator->getMaxCatCount() > 0 )
569 {
570 numPrecalcIdx = 0;
571 cat_var_count = var_count;
572 ord_var_count = 0;
573 for( int vi = 0; vi < var_count; vi++ )
574 {
575 var_type->data.i[vi] = vi;
576 }
577 }
578 else
579 {
580 cat_var_count = 0;
581 ord_var_count = var_count;
582 for( int vi = 1; vi <= var_count; vi++ )
583 {
584 var_type->data.i[vi-1] = -vi;
585 }
586 }
587 var_type->data.i[var_count] = cat_var_count;
588 var_type->data.i[var_count+1] = cat_var_count+1;
589 work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
590 buf_count = 2;
591
592 buf_size = -1; // the member buf_size is obsolete
593
594 effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
595 effective_buf_width = sample_count;
596 effective_buf_height = work_var_count+1;
597
598 if (effective_buf_width >= effective_buf_height)
599 effective_buf_height *= buf_count;
600 else
601 effective_buf_width *= buf_count;
602
603 if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
604 {
605 CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
606 }
607
608 if ( is_buf_16u )
609 buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
610 else
611 buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );
612
613 cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
614
615 // precalculate valCache and set indices in buf
616 precalculate();
617
618 // now calculate the maximum size of split,
619 // create memory storage that will keep nodes and splits of the decision tree
620 // allocate root node and the buffer for the whole training data
621 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
622 (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
623 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
624 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
625 tree_storage = cvCreateMemStorage( treeBlockSize );
626 node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
627
628 int nvSize = var_count*sizeof(int);
629 nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
630 int tempBlockSize = nvSize;
631 tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
632 temp_storage = cvCreateMemStorage( tempBlockSize );
633 nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
634
635 data_root = new_node( 0, sample_count, 0, 0 );
636
637 // set sample labels
638 if (is_buf_16u)
639 udst = (unsigned short*)(buf->data.s + (size_t)work_var_count*sample_count);
640 else
641 idst = buf->data.i + (size_t)work_var_count*sample_count;
642
643 for (int si = 0; si < sample_count; si++)
644 {
645 if (udst)
646 udst[si] = (unsigned short)si;
647 else
648 idst[si] = si;
649 }
650 for( int vi = 0; vi < var_count; vi++ )
651 data_root->set_num_valid(vi, sample_count);
652 for( int vi = 0; vi < cat_var_count; vi++ )
653 cat_count->data.i[vi] = max_c_count;
654
655 cat_count->data.i[cat_var_count] = 2;
656
657 maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
658 (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
659 split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
660
661 priors = cvCreateMat( 1, get_num_classes(), CV_64F );
662 cvSet(priors, cvScalar(1));
663 priors_mult = cvCloneMat( priors );
664 counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
665 direction = cvCreateMat( 1, sample_count, CV_8UC1 );
666 split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
667 }
668
free_train_data()669 void CvCascadeBoostTrainData::free_train_data()
670 {
671 CvDTreeTrainData::free_train_data();
672 valCache.release();
673 }
674
get_class_labels(CvDTreeNode * n,int * labelsBuf)675 const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
676 {
677 int nodeSampleCount = n->sample_count;
678 int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
679
680 int* sampleIndicesBuf = labelsBuf; //
681 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
682 for( int si = 0; si < nodeSampleCount; si++ )
683 {
684 int sidx = sampleIndices[si];
685 labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
686 }
687 return labelsBuf;
688 }
689
get_sample_indices(CvDTreeNode * n,int * indicesBuf)690 const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
691 {
692 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
693 }
694
get_cv_labels(CvDTreeNode * n,int * labels_buf)695 const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
696 {
697 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
698 }
699
get_ord_var_data(CvDTreeNode * n,int vi,float * ordValuesBuf,int * sortedIndicesBuf,const float ** ordValues,const int ** sortedIndices,int * sampleIndicesBuf)700 void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
701 const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
702 {
703 int nodeSampleCount = n->sample_count;
704 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
705
706 if ( vi < numPrecalcIdx )
707 {
708 if( !is_buf_16u )
709 *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + (size_t)vi*sample_count + n->offset;
710 else
711 {
712 const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
713 (size_t)vi*sample_count + n->offset );
714 for( int i = 0; i < nodeSampleCount; i++ )
715 sortedIndicesBuf[i] = shortIndices[i];
716
717 *sortedIndices = sortedIndicesBuf;
718 }
719
720 if( vi < numPrecalcVal )
721 {
722 for( int i = 0; i < nodeSampleCount; i++ )
723 {
724 int idx = (*sortedIndices)[i];
725 idx = sampleIndices[idx];
726 ordValuesBuf[i] = valCache.at<float>( vi, idx);
727 }
728 }
729 else
730 {
731 for( int i = 0; i < nodeSampleCount; i++ )
732 {
733 int idx = (*sortedIndices)[i];
734 idx = sampleIndices[idx];
735 ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
736 }
737 }
738 }
739 else // vi >= numPrecalcIdx
740 {
741 cv::AutoBuffer<float> abuf(nodeSampleCount);
742 float* sampleValues = &abuf[0];
743
744 if ( vi < numPrecalcVal )
745 {
746 for( int i = 0; i < nodeSampleCount; i++ )
747 {
748 sortedIndicesBuf[i] = i;
749 sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] );
750 }
751 }
752 else
753 {
754 for( int i = 0; i < nodeSampleCount; i++ )
755 {
756 sortedIndicesBuf[i] = i;
757 sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]);
758 }
759 }
760 std::sort(sortedIndicesBuf, sortedIndicesBuf + nodeSampleCount, LessThanIdx<float, int>(&sampleValues[0]) );
761 for( int i = 0; i < nodeSampleCount; i++ )
762 ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]];
763 *sortedIndices = sortedIndicesBuf;
764 }
765
766 *ordValues = ordValuesBuf;
767 }
768
get_cat_var_data(CvDTreeNode * n,int vi,int * catValuesBuf)769 const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf )
770 {
771 int nodeSampleCount = n->sample_count;
772 int* sampleIndicesBuf = catValuesBuf; //
773 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
774
775 if ( vi < numPrecalcVal )
776 {
777 for( int i = 0; i < nodeSampleCount; i++ )
778 catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
779 }
780 else
781 {
782 if( vi >= numPrecalcVal && vi < var_count )
783 {
784 for( int i = 0; i < nodeSampleCount; i++ )
785 catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
786 }
787 else
788 {
789 get_cv_labels( n, catValuesBuf );
790 }
791 }
792
793 return catValuesBuf;
794 }
795
getVarValue(int vi,int si)796 float CvCascadeBoostTrainData::getVarValue( int vi, int si )
797 {
798 if ( vi < numPrecalcVal && !valCache.empty() )
799 return valCache.at<float>( vi, si );
800 return (*featureEvaluator)( vi, si );
801 }
802
803
804 struct FeatureIdxOnlyPrecalc : ParallelLoopBody
805 {
FeatureIdxOnlyPrecalcFeatureIdxOnlyPrecalc806 FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
807 {
808 featureEvaluator = _featureEvaluator;
809 sample_count = _sample_count;
810 udst = (unsigned short*)_buf->data.s;
811 idst = _buf->data.i;
812 is_buf_16u = _is_buf_16u;
813 }
operator ()FeatureIdxOnlyPrecalc814 void operator()( const Range& range ) const
815 {
816 cv::AutoBuffer<float> valCache(sample_count);
817 float* valCachePtr = valCache.data();
818 for ( int fi = range.start; fi < range.end; fi++)
819 {
820 for( int si = 0; si < sample_count; si++ )
821 {
822 valCachePtr[si] = (*featureEvaluator)( fi, si );
823 if ( is_buf_16u )
824 *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
825 else
826 *(idst + (size_t)fi*sample_count + si) = si;
827 }
828 if ( is_buf_16u )
829 std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCachePtr) );
830 else
831 std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCachePtr) );
832 }
833 }
834 const CvFeatureEvaluator* featureEvaluator;
835 int sample_count;
836 int* idst;
837 unsigned short* udst;
838 bool is_buf_16u;
839 };
840
841 struct FeatureValAndIdxPrecalc : ParallelLoopBody
842 {
FeatureValAndIdxPrecalcFeatureValAndIdxPrecalc843 FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
844 {
845 featureEvaluator = _featureEvaluator;
846 valCache = _valCache;
847 sample_count = _sample_count;
848 udst = (unsigned short*)_buf->data.s;
849 idst = _buf->data.i;
850 is_buf_16u = _is_buf_16u;
851 }
operator ()FeatureValAndIdxPrecalc852 void operator()( const Range& range ) const
853 {
854 for ( int fi = range.start; fi < range.end; fi++)
855 {
856 for( int si = 0; si < sample_count; si++ )
857 {
858 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
859 if ( is_buf_16u )
860 *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
861 else
862 *(idst + (size_t)fi*sample_count + si) = si;
863 }
864 if ( is_buf_16u )
865 std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCache->ptr<float>(fi)) );
866 else
867 std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCache->ptr<float>(fi)) );
868 }
869 }
870 const CvFeatureEvaluator* featureEvaluator;
871 Mat* valCache;
872 int sample_count;
873 int* idst;
874 unsigned short* udst;
875 bool is_buf_16u;
876 };
877
878 struct FeatureValOnlyPrecalc : ParallelLoopBody
879 {
FeatureValOnlyPrecalcFeatureValOnlyPrecalc880 FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
881 {
882 featureEvaluator = _featureEvaluator;
883 valCache = _valCache;
884 sample_count = _sample_count;
885 }
operator ()FeatureValOnlyPrecalc886 void operator()( const Range& range ) const
887 {
888 for ( int fi = range.start; fi < range.end; fi++)
889 for( int si = 0; si < sample_count; si++ )
890 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
891 }
892 const CvFeatureEvaluator* featureEvaluator;
893 Mat* valCache;
894 int sample_count;
895 };
896
precalculate()897 void CvCascadeBoostTrainData::precalculate()
898 {
899 int minNum = MIN( numPrecalcVal, numPrecalcIdx);
900
901 double proctime = -TIME( 0 );
902 parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
903 FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
904 parallel_for_( Range(0, minNum),
905 FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
906 parallel_for_( Range(minNum, numPrecalcVal),
907 FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
908 cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
909 }
910
911 //-------------------------------- CascadeBoostTree ----------------------------------------
912
predict(int sampleIdx) const913 CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
914 {
915 CvDTreeNode* node = root;
916 if( !node )
917 CV_Error( CV_StsError, "The tree has not been trained yet" );
918
919 if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
920 {
921 while( node->left )
922 {
923 CvDTreeSplit* split = node->split;
924 float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
925 node = val <= split->ord.c ? node->left : node->right;
926 }
927 }
928 else // categorical
929 {
930 while( node->left )
931 {
932 CvDTreeSplit* split = node->split;
933 int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
934 node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
935 }
936 }
937 return node;
938 }
939
write(FileStorage & fs,const Mat & featureMap)940 void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
941 {
942 int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
943 int subsetN = (maxCatCount + 31)/32;
944 queue<CvDTreeNode*> internalNodesQueue;
945 int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
946 std::vector<float> leafVals(size);
947 int leafValIdx = 0;
948 int internalNodeIdx = 1;
949 CvDTreeNode* tempNode;
950
951 CV_DbgAssert( root );
952 internalNodesQueue.push( root );
953
954 fs << "{";
955 fs << CC_INTERNAL_NODES << "[:";
956 while (!internalNodesQueue.empty())
957 {
958 tempNode = internalNodesQueue.front();
959 CV_Assert( tempNode->left );
960 if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
961 {
962 leafVals[-leafValIdx] = (float)tempNode->left->value;
963 fs << leafValIdx-- ;
964 }
965 else
966 {
967 internalNodesQueue.push( tempNode->left );
968 fs << internalNodeIdx++;
969 }
970 CV_Assert( tempNode->right );
971 if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
972 {
973 leafVals[-leafValIdx] = (float)tempNode->right->value;
974 fs << leafValIdx--;
975 }
976 else
977 {
978 internalNodesQueue.push( tempNode->right );
979 fs << internalNodeIdx++;
980 }
981 int fidx = tempNode->split->var_idx;
982 fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
983 fs << fidx;
984 if ( !maxCatCount )
985 fs << tempNode->split->ord.c;
986 else
987 for( int i = 0; i < subsetN; i++ )
988 fs << tempNode->split->subset[i];
989 internalNodesQueue.pop();
990 }
991 fs << "]"; // CC_INTERNAL_NODES
992
993 fs << CC_LEAF_VALUES << "[:";
994 for (int ni = 0; ni < -leafValIdx; ni++)
995 fs << leafVals[ni];
996 fs << "]"; // CC_LEAF_VALUES
997 fs << "}";
998 }
999
read(const FileNode & node,CvBoost * _ensemble,CvDTreeTrainData * _data)1000 void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
1001 CvDTreeTrainData* _data )
1002 {
1003 int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
1004 int subsetN = (maxCatCount + 31)/32;
1005 int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
1006
1007 queue<CvDTreeNode*> internalNodesQueue;
1008 FileNodeIterator internalNodesIt, leafValsuesIt;
1009 CvDTreeNode* prntNode, *cldNode;
1010
1011 clear();
1012 data = _data;
1013 ensemble = _ensemble;
1014 pruned_tree_idx = 0;
1015
1016 // read tree nodes
1017 FileNode rnode = node[CC_INTERNAL_NODES];
1018 internalNodesIt = rnode.end();
1019 leafValsuesIt = node[CC_LEAF_VALUES].end();
1020 internalNodesIt--; leafValsuesIt--;
1021 for( size_t i = 0; i < rnode.size()/step; i++ )
1022 {
1023 prntNode = data->new_node( 0, 0, 0, 0 );
1024 if ( maxCatCount > 0 )
1025 {
1026 prntNode->split = data->new_split_cat( 0, 0 );
1027 for( int j = subsetN-1; j>=0; j--)
1028 {
1029 *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
1030 }
1031 }
1032 else
1033 {
1034 float split_value;
1035 *internalNodesIt >> split_value; internalNodesIt--;
1036 prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
1037 }
1038 *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
1039 int ridx, lidx;
1040 *internalNodesIt >> ridx; internalNodesIt--;
1041 *internalNodesIt >> lidx;internalNodesIt--;
1042 if ( ridx <= 0)
1043 {
1044 prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
1045 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1046 cldNode->parent = prntNode;
1047 }
1048 else
1049 {
1050 prntNode->right = internalNodesQueue.front();
1051 prntNode->right->parent = prntNode;
1052 internalNodesQueue.pop();
1053 }
1054
1055 if ( lidx <= 0)
1056 {
1057 prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
1058 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1059 cldNode->parent = prntNode;
1060 }
1061 else
1062 {
1063 prntNode->left = internalNodesQueue.front();
1064 prntNode->left->parent = prntNode;
1065 internalNodesQueue.pop();
1066 }
1067
1068 internalNodesQueue.push( prntNode );
1069 }
1070
1071 root = internalNodesQueue.front();
1072 internalNodesQueue.pop();
1073 }
1074
split_node_data(CvDTreeNode * node)1075 void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
1076 {
1077 int n = node->sample_count, nl, nr, scount = data->sample_count;
1078 char* dir = (char*)data->direction->data.ptr;
1079 CvDTreeNode *left = 0, *right = 0;
1080 int* newIdx = data->split_buf->data.i;
1081 int newBufIdx = data->get_child_buf_idx( node );
1082 int workVarCount = data->get_work_var_count();
1083 CvMat* buf = data->buf;
1084 size_t length_buf_row = data->get_length_subbuf();
1085 cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
1086 int* tempBuf = (int*)inn_buf.data();
1087 bool splitInputData;
1088
1089 complete_node_dir(node);
1090
1091 for( int i = nl = nr = 0; i < n; i++ )
1092 {
1093 int d = dir[i];
1094 // initialize new indices for splitting ordered variables
1095 newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
1096 nr += d;
1097 nl += d^1;
1098 }
1099
1100 node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
1101 node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
1102
1103 splitInputData = node->depth + 1 < data->params.max_depth &&
1104 (node->left->sample_count > data->params.min_sample_count ||
1105 node->right->sample_count > data->params.min_sample_count);
1106
1107 // split ordered variables, keep both halves sorted.
1108 for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
1109 {
1110 int ci = data->get_var_type(vi);
1111 if( ci >= 0 || !splitInputData )
1112 continue;
1113
1114 int n1 = node->get_num_valid(vi);
1115 float *src_val_buf = (float*)(tempBuf + n);
1116 int *src_sorted_idx_buf = (int*)(src_val_buf + n);
1117 int *src_sample_idx_buf = src_sorted_idx_buf + n;
1118 const int* src_sorted_idx = 0;
1119 const float* src_val = 0;
1120 data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
1121
1122 for(int i = 0; i < n; i++)
1123 tempBuf[i] = src_sorted_idx[i];
1124
1125 if (data->is_buf_16u)
1126 {
1127 ushort *ldst, *rdst;
1128 ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
1129 vi*scount + left->offset);
1130 rdst = (ushort*)(ldst + nl);
1131
1132 // split sorted
1133 for( int i = 0; i < n1; i++ )
1134 {
1135 int idx = tempBuf[i];
1136 int d = dir[idx];
1137 idx = newIdx[idx];
1138 if (d)
1139 {
1140 *rdst = (ushort)idx;
1141 rdst++;
1142 }
1143 else
1144 {
1145 *ldst = (ushort)idx;
1146 ldst++;
1147 }
1148 }
1149 CV_Assert( n1 == n );
1150 }
1151 else
1152 {
1153 int *ldst, *rdst;
1154 ldst = buf->data.i + left->buf_idx*length_buf_row +
1155 vi*scount + left->offset;
1156 rdst = buf->data.i + right->buf_idx*length_buf_row +
1157 vi*scount + right->offset;
1158
1159 // split sorted
1160 for( int i = 0; i < n1; i++ )
1161 {
1162 int idx = tempBuf[i];
1163 int d = dir[idx];
1164 idx = newIdx[idx];
1165 if (d)
1166 {
1167 *rdst = idx;
1168 rdst++;
1169 }
1170 else
1171 {
1172 *ldst = idx;
1173 ldst++;
1174 }
1175 }
1176 CV_Assert( n1 == n );
1177 }
1178 }
1179
1180 // split cv_labels using newIdx relocation table
1181 int *src_lbls_buf = tempBuf + n;
1182 const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
1183
1184 for(int i = 0; i < n; i++)
1185 tempBuf[i] = src_lbls[i];
1186
1187 if (data->is_buf_16u)
1188 {
1189 unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
1190 (size_t)(workVarCount-1)*scount + left->offset);
1191 unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
1192 (size_t)(workVarCount-1)*scount + right->offset);
1193
1194 for( int i = 0; i < n; i++ )
1195 {
1196 int idx = tempBuf[i];
1197 if (dir[i])
1198 {
1199 *rdst = (unsigned short)idx;
1200 rdst++;
1201 }
1202 else
1203 {
1204 *ldst = (unsigned short)idx;
1205 ldst++;
1206 }
1207 }
1208
1209 }
1210 else
1211 {
1212 int *ldst = buf->data.i + left->buf_idx*length_buf_row +
1213 (size_t)(workVarCount-1)*scount + left->offset;
1214 int *rdst = buf->data.i + right->buf_idx*length_buf_row +
1215 (size_t)(workVarCount-1)*scount + right->offset;
1216
1217 for( int i = 0; i < n; i++ )
1218 {
1219 int idx = tempBuf[i];
1220 if (dir[i])
1221 {
1222 *rdst = idx;
1223 rdst++;
1224 }
1225 else
1226 {
1227 *ldst = idx;
1228 ldst++;
1229 }
1230 }
1231 }
1232
1233 // split sample indices
1234 int *sampleIdx_src_buf = tempBuf + n;
1235 const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
1236
1237 for(int i = 0; i < n; i++)
1238 tempBuf[i] = sampleIdx_src[i];
1239
1240 if (data->is_buf_16u)
1241 {
1242 unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
1243 (size_t)workVarCount*scount + left->offset);
1244 unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
1245 (size_t)workVarCount*scount + right->offset);
1246 for (int i = 0; i < n; i++)
1247 {
1248 unsigned short idx = (unsigned short)tempBuf[i];
1249 if (dir[i])
1250 {
1251 *rdst = idx;
1252 rdst++;
1253 }
1254 else
1255 {
1256 *ldst = idx;
1257 ldst++;
1258 }
1259 }
1260 }
1261 else
1262 {
1263 int* ldst = buf->data.i + left->buf_idx*length_buf_row +
1264 (size_t)workVarCount*scount + left->offset;
1265 int* rdst = buf->data.i + right->buf_idx*length_buf_row +
1266 (size_t)workVarCount*scount + right->offset;
1267 for (int i = 0; i < n; i++)
1268 {
1269 int idx = tempBuf[i];
1270 if (dir[i])
1271 {
1272 *rdst = idx;
1273 rdst++;
1274 }
1275 else
1276 {
1277 *ldst = idx;
1278 ldst++;
1279 }
1280 }
1281 }
1282
1283 for( int vi = 0; vi < data->var_count; vi++ )
1284 {
1285 left->set_num_valid(vi, (int)(nl));
1286 right->set_num_valid(vi, (int)(nr));
1287 }
1288
1289 // deallocate the parent node data that is not needed anymore
1290 data->free_node_data(node);
1291 }
1292
auxMarkFeaturesInMap(const CvDTreeNode * node,Mat & featureMap)1293 static void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
1294 {
1295 if ( node && node->split )
1296 {
1297 featureMap.ptr<int>(0)[node->split->var_idx] = 1;
1298 auxMarkFeaturesInMap( node->left, featureMap );
1299 auxMarkFeaturesInMap( node->right, featureMap );
1300 }
1301 }
1302
markFeaturesInMap(Mat & featureMap)1303 void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
1304 {
1305 auxMarkFeaturesInMap( root, featureMap );
1306 }
1307
1308 //----------------------------------- CascadeBoost --------------------------------------
1309
train(const CvFeatureEvaluator * _featureEvaluator,int _numSamples,int _precalcValBufSize,int _precalcIdxBufSize,const CvCascadeBoostParams & _params)1310 bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
1311 int _numSamples,
1312 int _precalcValBufSize, int _precalcIdxBufSize,
1313 const CvCascadeBoostParams& _params )
1314 {
1315 bool isTrained = false;
1316 CV_Assert( !data );
1317 clear();
1318 data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
1319 _precalcValBufSize, _precalcIdxBufSize, _params );
1320 CvMemStorage *storage = cvCreateMemStorage();
1321 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1322 storage = 0;
1323
1324 set_params( _params );
1325 if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
1326 data->do_responses_copy();
1327
1328 update_weights( 0 );
1329
1330 cout << "+----+---------+---------+" << endl;
1331 cout << "| N | HR | FA |" << endl;
1332 cout << "+----+---------+---------+" << endl;
1333
1334 do
1335 {
1336 CvCascadeBoostTree* tree = new CvCascadeBoostTree;
1337 if( !tree->train( data, subsample_mask, this ) )
1338 {
1339 delete tree;
1340 break;
1341 }
1342 cvSeqPush( weak, &tree );
1343 update_weights( tree );
1344 trim_weights();
1345 if( cvCountNonZero(subsample_mask) == 0 )
1346 break;
1347 }
1348 while( !isErrDesired() && (weak->total < params.weak_count) );
1349
1350 if(weak->total > 0)
1351 {
1352 data->is_classifier = true;
1353 data->free_train_data();
1354 isTrained = true;
1355 }
1356 else
1357 clear();
1358
1359 return isTrained;
1360 }
1361
predict(int sampleIdx,bool returnSum) const1362 float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1363 {
1364 CV_Assert( weak );
1365 double sum = 0;
1366 CvSeqReader reader;
1367 cvStartReadSeq( weak, &reader );
1368 cvSetSeqReaderPos( &reader, 0 );
1369 for( int i = 0; i < weak->total; i++ )
1370 {
1371 CvBoostTree* wtree;
1372 CV_READ_SEQ_ELEM( wtree, reader );
1373 sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1374 }
1375 if( !returnSum )
1376 sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1377 return (float)sum;
1378 }
1379
set_params(const CvBoostParams & _params)1380 bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1381 {
1382 minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1383 maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1384 return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1385 ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1386 CvBoost::set_params( _params ));
1387 }
1388
update_weights(CvBoostTree * tree)1389 void CvCascadeBoost::update_weights( CvBoostTree* tree )
1390 {
1391 int n = data->sample_count;
1392 double sumW = 0.;
1393 int step = 0;
1394 float* fdata = 0;
1395 int *sampleIdxBuf;
1396 const int* sampleIdx = 0;
1397 int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
1398 ( !tree ? n*sizeof(int) : 0 );
1399 cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1400 uchar* cur_inn_buf_pos = inn_buf.data();
1401 if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1402 {
1403 step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1404 1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1405 fdata = data->responses_copy->data.fl;
1406 sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1407 sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1408 }
1409 CvMat* buf = data->buf;
1410 size_t length_buf_row = data->get_length_subbuf();
1411 if( !tree ) // before training the first tree, initialize weights and other parameters
1412 {
1413 int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
1414 const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
1415 // in case of logitboost and gentle adaboost each weak tree is a regression tree,
1416 // so we need to convert class labels to floating-point values
1417 double w0 = 1./n;
1418 double p[2] = { 1, 1 };
1419
1420 cvReleaseMat( &orig_response );
1421 cvReleaseMat( &sum_response );
1422 cvReleaseMat( &weak_eval );
1423 cvReleaseMat( &subsample_mask );
1424 cvReleaseMat( &weights );
1425
1426 orig_response = cvCreateMat( 1, n, CV_32S );
1427 weak_eval = cvCreateMat( 1, n, CV_64F );
1428 subsample_mask = cvCreateMat( 1, n, CV_8U );
1429 weights = cvCreateMat( 1, n, CV_64F );
1430 subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1431
1432 if (data->is_buf_16u)
1433 {
1434 unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
1435 data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count);
1436 for( int i = 0; i < n; i++ )
1437 {
1438 // save original categorical responses {0,1}, convert them to {-1,1}
1439 orig_response->data.i[i] = classLabels[i]*2 - 1;
1440 // make all the samples active at start.
1441 // later, in trim_weights() deactivate/reactive again some, if need
1442 subsample_mask->data.ptr[i] = (uchar)1;
1443 // make all the initial weights the same.
1444 weights->data.db[i] = w0*p[classLabels[i]];
1445 // set the labels to find (from within weak tree learning proc)
1446 // the particular sample weight, and where to store the response.
1447 labels[i] = (unsigned short)i;
1448 }
1449 }
1450 else
1451 {
1452 int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
1453 data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count;
1454
1455 for( int i = 0; i < n; i++ )
1456 {
1457 // save original categorical responses {0,1}, convert them to {-1,1}
1458 orig_response->data.i[i] = classLabels[i]*2 - 1;
1459 subsample_mask->data.ptr[i] = (uchar)1;
1460 weights->data.db[i] = w0*p[classLabels[i]];
1461 labels[i] = i;
1462 }
1463 }
1464
1465 if( params.boost_type == LOGIT )
1466 {
1467 sum_response = cvCreateMat( 1, n, CV_64F );
1468
1469 for( int i = 0; i < n; i++ )
1470 {
1471 sum_response->data.db[i] = 0;
1472 fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1473 }
1474
1475 // in case of logitboost each weak tree is a regression tree.
1476 // the target function values are recalculated for each of the trees
1477 data->is_classifier = false;
1478 }
1479 else if( params.boost_type == GENTLE )
1480 {
1481 for( int i = 0; i < n; i++ )
1482 fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1483
1484 data->is_classifier = false;
1485 }
1486 }
1487 else
1488 {
1489 // at this moment, for all the samples that participated in the training of the most
1490 // recent weak classifier we know the responses. For other samples we need to compute them
1491 if( have_subsample )
1492 {
1493 // invert the subsample mask
1494 cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1495
1496 // run tree through all the non-processed samples
1497 for( int i = 0; i < n; i++ )
1498 if( subsample_mask->data.ptr[i] )
1499 {
1500 weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1501 }
1502 }
1503
1504 // now update weights and other parameters for each type of boosting
1505 if( params.boost_type == DISCRETE )
1506 {
1507 // Discrete AdaBoost:
1508 // weak_eval[i] (=f(x_i)) is in {-1,1}
1509 // err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1510 // C = log((1-err)/err)
1511 // w_i *= exp(C*(f(x_i) != y_i))
1512
1513 double C, err = 0.;
1514 double scale[] = { 1., 0. };
1515
1516 for( int i = 0; i < n; i++ )
1517 {
1518 double w = weights->data.db[i];
1519 sumW += w;
1520 err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1521 }
1522
1523 if( sumW != 0 )
1524 err /= sumW;
1525 C = err = -logRatio( err );
1526 scale[1] = exp(err);
1527
1528 sumW = 0;
1529 for( int i = 0; i < n; i++ )
1530 {
1531 double w = weights->data.db[i]*
1532 scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1533 sumW += w;
1534 weights->data.db[i] = w;
1535 }
1536
1537 tree->scale( C );
1538 }
1539 else if( params.boost_type == REAL )
1540 {
1541 // Real AdaBoost:
1542 // weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1543 // w_i *= exp(-y_i*f(x_i))
1544
1545 for( int i = 0; i < n; i++ )
1546 weak_eval->data.db[i] *= -orig_response->data.i[i];
1547
1548 cvExp( weak_eval, weak_eval );
1549
1550 for( int i = 0; i < n; i++ )
1551 {
1552 double w = weights->data.db[i]*weak_eval->data.db[i];
1553 sumW += w;
1554 weights->data.db[i] = w;
1555 }
1556 }
1557 else if( params.boost_type == LOGIT )
1558 {
1559 // LogitBoost:
1560 // weak_eval[i] = f(x_i) in [-z_max,z_max]
1561 // sum_response = F(x_i).
1562 // F(x_i) += 0.5*f(x_i)
1563 // p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1564 // reuse weak_eval: weak_eval[i] <- p(x_i)
1565 // w_i = p(x_i)*1(1 - p(x_i))
1566 // z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1567 // store z_i to the data->data_root as the new target responses
1568
1569 const double lbWeightThresh = FLT_EPSILON;
1570 const double lbZMax = 10.;
1571
1572 for( int i = 0; i < n; i++ )
1573 {
1574 double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1575 sum_response->data.db[i] = s;
1576 weak_eval->data.db[i] = -2*s;
1577 }
1578
1579 cvExp( weak_eval, weak_eval );
1580
1581 for( int i = 0; i < n; i++ )
1582 {
1583 double p = 1./(1. + weak_eval->data.db[i]);
1584 double w = p*(1 - p), z;
1585 w = MAX( w, lbWeightThresh );
1586 weights->data.db[i] = w;
1587 sumW += w;
1588 if( orig_response->data.i[i] > 0 )
1589 {
1590 z = 1./p;
1591 fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1592 }
1593 else
1594 {
1595 z = 1./(1-p);
1596 fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1597 }
1598 }
1599 }
1600 else
1601 {
1602 // Gentle AdaBoost:
1603 // weak_eval[i] = f(x_i) in [-1,1]
1604 // w_i *= exp(-y_i*f(x_i))
1605 assert( params.boost_type == GENTLE );
1606
1607 for( int i = 0; i < n; i++ )
1608 weak_eval->data.db[i] *= -orig_response->data.i[i];
1609
1610 cvExp( weak_eval, weak_eval );
1611
1612 for( int i = 0; i < n; i++ )
1613 {
1614 double w = weights->data.db[i] * weak_eval->data.db[i];
1615 weights->data.db[i] = w;
1616 sumW += w;
1617 }
1618 }
1619 }
1620
1621 // renormalize weights
1622 if( sumW > FLT_EPSILON )
1623 {
1624 sumW = 1./sumW;
1625 for( int i = 0; i < n; ++i )
1626 weights->data.db[i] *= sumW;
1627 }
1628 }
1629
isErrDesired()1630 bool CvCascadeBoost::isErrDesired()
1631 {
1632 int sCount = data->sample_count,
1633 numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1634 vector<float> eval(sCount);
1635
1636 for( int i = 0; i < sCount; i++ )
1637 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1638 eval[numPos++] = predict( i, true );
1639
1640 std::sort(&eval[0], &eval[0] + numPos);
1641
1642 int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1643
1644 threshold = eval[ thresholdIdx ];
1645 numPosTrue = numPos - thresholdIdx;
1646 for( int i = thresholdIdx - 1; i >= 0; i--)
1647 if ( abs( eval[i] - threshold) < FLT_EPSILON )
1648 numPosTrue++;
1649 float hitRate = ((float) numPosTrue) / ((float) numPos);
1650
1651 for( int i = 0; i < sCount; i++ )
1652 {
1653 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1654 {
1655 numNeg++;
1656 if( predict( i ) )
1657 numFalse++;
1658 }
1659 }
1660 float falseAlarm = ((float) numFalse) / ((float) numNeg);
1661
1662 cout << "|"; cout.width(4); cout << right << weak->total;
1663 cout << "|"; cout.width(9); cout << right << hitRate;
1664 cout << "|"; cout.width(9); cout << right << falseAlarm;
1665 cout << "|" << endl;
1666 cout << "+----+---------+---------+" << endl;
1667
1668 return falseAlarm <= maxFalseAlarm;
1669 }
1670
write(FileStorage & fs,const Mat & featureMap) const1671 void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1672 {
1673 // char cmnt[30];
1674 CvCascadeBoostTree* weakTree;
1675 fs << CC_WEAK_COUNT << weak->total;
1676 fs << CC_STAGE_THRESHOLD << threshold;
1677 fs << CC_WEAK_CLASSIFIERS << "[";
1678 for( int wi = 0; wi < weak->total; wi++)
1679 {
1680 /*sprintf( cmnt, "tree %i", wi );
1681 cvWriteComment( fs, cmnt, 0 );*/
1682 weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1683 weakTree->write( fs, featureMap );
1684 }
1685 fs << "]";
1686 }
1687
read(const FileNode & node,const CvFeatureEvaluator * _featureEvaluator,const CvCascadeBoostParams & _params)1688 bool CvCascadeBoost::read( const FileNode &node,
1689 const CvFeatureEvaluator* _featureEvaluator,
1690 const CvCascadeBoostParams& _params )
1691 {
1692 CvMemStorage* storage;
1693 clear();
1694 data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1695 set_params( _params );
1696
1697 node[CC_STAGE_THRESHOLD] >> threshold;
1698 FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1699
1700 storage = cvCreateMemStorage();
1701 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1702 for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1703 {
1704 CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1705 tree->read( *it, this, data );
1706 cvSeqPush( weak, &tree );
1707 }
1708 return true;
1709 }
1710
markUsedFeaturesInMap(Mat & featureMap)1711 void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1712 {
1713 for( int wi = 0; wi < weak->total; wi++ )
1714 {
1715 CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1716 weakTree->markFeaturesInMap( featureMap );
1717 }
1718 }
1719