1 #include "opencv2/core.hpp"
2 #include "opencv2/core/utility.hpp"
3 
4 using cv::Size;
5 using cv::Mat;
6 using cv::Point;
7 using cv::FileStorage;
8 using cv::Rect;
9 using cv::Ptr;
10 using cv::FileNode;
11 using cv::Mat_;
12 using cv::Range;
13 using cv::FileNodeIterator;
14 using cv::ParallelLoopBody;
15 
16 
17 using cv::Size;
18 using cv::Mat;
19 using cv::Point;
20 using cv::FileStorage;
21 using cv::Rect;
22 using cv::Ptr;
23 using cv::FileNode;
24 using cv::Mat_;
25 using cv::Range;
26 using cv::FileNodeIterator;
27 using cv::ParallelLoopBody;
28 
29 
30 #include "boost.h"
31 #include "cascadeclassifier.h"
32 #include <queue>
33 
34 #include "cvconfig.h"
35 
36 using namespace std;
37 
38 static inline double
logRatio(double val)39 logRatio( double val )
40 {
41     const double eps = 1e-5;
42 
43     val = max( val, eps );
44     val = min( val, 1. - eps );
45     return log( val/(1. - val) );
46 }
47 
48 template<typename T, typename Idx>
49 class LessThanIdx
50 {
51 public:
LessThanIdx(const T * _arr)52     LessThanIdx( const T* _arr ) : arr(_arr) {}
operator ()(Idx a,Idx b) const53     bool operator()(Idx a, Idx b) const { return arr[a] < arr[b]; }
54     const T* arr;
55 };
56 
cvAlign(int size,int align)57 static inline int cvAlign( int size, int align )
58 {
59     CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
60     return (size + align - 1) & -align;
61 }
62 
63 #define CV_THRESHOLD_EPS (0.00001F)
64 
65 static const int MinBlockSize = 1 << 16;
66 static const int BlockSizeDelta = 1 << 10;
67 
68 // TODO remove this code duplication with ml/precomp.hpp
69 
icvCmpIntegers(const void * a,const void * b)70 static int CV_CDECL icvCmpIntegers( const void* a, const void* b )
71 {
72     return *(const int*)a - *(const int*)b;
73 }
74 
cvPreprocessIndexArray(const CvMat * idx_arr,int data_arr_size,bool check_for_duplicates=false)75 static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false )
76 {
77     CvMat* idx = 0;
78 
79     CV_FUNCNAME( "cvPreprocessIndexArray" );
80 
81     __CV_BEGIN__;
82 
83     int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
84     uchar* srcb = 0;
85     int* srci = 0;
86     int* dsti;
87 
88     if( !CV_IS_MAT(idx_arr) )
89         CV_ERROR( CV_StsBadArg, "Invalid index array" );
90 
91     if( idx_arr->rows != 1 && idx_arr->cols != 1 )
92         CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
93 
94     idx_total = idx_arr->rows + idx_arr->cols - 1;
95     srcb = idx_arr->data.ptr;
96     srci = idx_arr->data.i;
97 
98     type = CV_MAT_TYPE(idx_arr->type);
99     step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
100 
101     switch( type )
102     {
103     case CV_8UC1:
104     case CV_8SC1:
105         // idx_arr is array of 1's and 0's -
106         // i.e. it is a mask of the selected components
107         if( idx_total != data_arr_size )
108             CV_ERROR( CV_StsUnmatchedSizes,
109             "Component mask should contain as many elements as the total number of input variables" );
110 
111         for( i = 0; i < idx_total; i++ )
112             idx_selected += srcb[i*step] != 0;
113 
114         if( idx_selected == 0 )
115             CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
116 
117         break;
118     case CV_32SC1:
119         // idx_arr is array of integer indices of selected components
120         if( idx_total > data_arr_size )
121             CV_ERROR( CV_StsOutOfRange,
122             "index array may not contain more elements than the total number of input variables" );
123         idx_selected = idx_total;
124         // check if sorted already
125         for( i = 0; i < idx_total; i++ )
126         {
127             int val = srci[i*step];
128             if( val >= prev )
129             {
130                 is_sorted = 0;
131                 break;
132             }
133             prev = val;
134         }
135         break;
136     default:
137         CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
138                                            "(it should be 8uC1, 8sC1 or 32sC1)" );
139     }
140 
141     CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
142     dsti = idx->data.i;
143 
144     if( type < CV_32SC1 )
145     {
146         for( i = 0; i < idx_total; i++ )
147             if( srcb[i*step] )
148                 *dsti++ = i;
149     }
150     else
151     {
152         for( i = 0; i < idx_total; i++ )
153             dsti[i] = srci[i*step];
154 
155         if( !is_sorted )
156             qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
157 
158         if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
159             CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
160 
161         if( check_for_duplicates )
162         {
163             for( i = 1; i < idx_total; i++ )
164                 if( dsti[i] <= dsti[i-1] )
165                     CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
166         }
167     }
168 
169     __CV_END__;
170 
171     if( cvGetErrStatus() < 0 )
172         cvReleaseMat( &idx );
173 
174     return idx;
175 }
176 
177 //----------------------------- CascadeBoostParams -------------------------------------------------
178 
CvCascadeBoostParams()179 CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
180 {
181     boost_type = CvBoost::GENTLE;
182     use_surrogates = use_1se_rule = truncate_pruned_tree = false;
183 }
184 
CvCascadeBoostParams(int _boostType,float _minHitRate,float _maxFalseAlarm,double _weightTrimRate,int _maxDepth,int _maxWeakCount)185 CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
186         float _minHitRate, float _maxFalseAlarm,
187         double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
188     CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
189 {
190     boost_type = CvBoost::GENTLE;
191     minHitRate = _minHitRate;
192     maxFalseAlarm = _maxFalseAlarm;
193     use_surrogates = use_1se_rule = truncate_pruned_tree = false;
194 }
195 
write(FileStorage & fs) const196 void CvCascadeBoostParams::write( FileStorage &fs ) const
197 {
198     string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
199                           boost_type == CvBoost::REAL ? CC_REAL_BOOST :
200                           boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
201                           boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
202     CV_Assert( !boostTypeStr.empty() );
203     fs << CC_BOOST_TYPE << boostTypeStr;
204     fs << CC_MINHITRATE << minHitRate;
205     fs << CC_MAXFALSEALARM << maxFalseAlarm;
206     fs << CC_TRIM_RATE << weight_trim_rate;
207     fs << CC_MAX_DEPTH << max_depth;
208     fs << CC_WEAK_COUNT << weak_count;
209 }
210 
read(const FileNode & node)211 bool CvCascadeBoostParams::read( const FileNode &node )
212 {
213     string boostTypeStr;
214     FileNode rnode = node[CC_BOOST_TYPE];
215     rnode >> boostTypeStr;
216     boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
217                  !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
218                  !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
219                  !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
220     if (boost_type == -1)
221         CV_Error( CV_StsBadArg, "unsupported Boost type" );
222     node[CC_MINHITRATE] >> minHitRate;
223     node[CC_MAXFALSEALARM] >> maxFalseAlarm;
224     node[CC_TRIM_RATE] >> weight_trim_rate ;
225     node[CC_MAX_DEPTH] >> max_depth ;
226     node[CC_WEAK_COUNT] >> weak_count ;
227     if ( minHitRate <= 0 || minHitRate > 1 ||
228          maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
229          weight_trim_rate <= 0 || weight_trim_rate > 1 ||
230          max_depth <= 0 || weak_count <= 0 )
231         CV_Error( CV_StsBadArg, "bad parameters range");
232     return true;
233 }
234 
printDefaults() const235 void CvCascadeBoostParams::printDefaults() const
236 {
237     cout << "--boostParams--" << endl;
238     cout << "  [-bt <{" << CC_DISCRETE_BOOST << ", "
239                         << CC_REAL_BOOST << ", "
240                         << CC_LOGIT_BOOST ", "
241                         << CC_GENTLE_BOOST << "(default)}>]" << endl;
242     cout << "  [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
243     cout << "  [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
244     cout << "  [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
245     cout << "  [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
246     cout << "  [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
247 }
248 
printAttrs() const249 void CvCascadeBoostParams::printAttrs() const
250 {
251     string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
252                           boost_type == CvBoost::REAL ? CC_REAL_BOOST :
253                           boost_type == CvBoost::LOGIT  ? CC_LOGIT_BOOST :
254                           boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
255     CV_Assert( !boostTypeStr.empty() );
256     cout << "boostType: " << boostTypeStr << endl;
257     cout << "minHitRate: " << minHitRate << endl;
258     cout << "maxFalseAlarmRate: " <<  maxFalseAlarm << endl;
259     cout << "weightTrimRate: " << weight_trim_rate << endl;
260     cout << "maxDepth: " << max_depth << endl;
261     cout << "maxWeakCount: " << weak_count << endl;
262 }
263 
scanAttr(const string prmName,const string val)264 bool CvCascadeBoostParams::scanAttr( const string prmName, const string val)
265 {
266     bool res = true;
267 
268     if( !prmName.compare( "-bt" ) )
269     {
270         boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
271                      !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
272                      !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
273                      !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
274         if (boost_type == -1)
275             res = false;
276     }
277     else if( !prmName.compare( "-minHitRate" ) )
278     {
279         minHitRate = (float) atof( val.c_str() );
280     }
281     else if( !prmName.compare( "-maxFalseAlarmRate" ) )
282     {
283         maxFalseAlarm = (float) atof( val.c_str() );
284     }
285     else if( !prmName.compare( "-weightTrimRate" ) )
286     {
287         weight_trim_rate = (float) atof( val.c_str() );
288     }
289     else if( !prmName.compare( "-maxDepth" ) )
290     {
291         max_depth = atoi( val.c_str() );
292     }
293     else if( !prmName.compare( "-maxWeakCount" ) )
294     {
295         weak_count = atoi( val.c_str() );
296     }
297     else
298         res = false;
299 
300     return res;
301 }
302 
subsample_data(const CvMat * _subsample_idx)303 CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx )
304 {
305     CvDTreeNode* root = 0;
306     CvMat* isubsample_idx = 0;
307     CvMat* subsample_co = 0;
308 
309     bool isMakeRootCopy = true;
310 
311     if( !data_root )
312         CV_Error( CV_StsError, "No training data has been set" );
313 
314     if( _subsample_idx )
315     {
316         CV_Assert( (isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )) != 0 );
317 
318         if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count )
319         {
320             const int* sidx = isubsample_idx->data.i;
321             for( int i = 0; i < sample_count; i++ )
322             {
323                 if( sidx[i] != i )
324                 {
325                     isMakeRootCopy = false;
326                     break;
327                 }
328             }
329         }
330         else
331             isMakeRootCopy = false;
332     }
333 
334     if( isMakeRootCopy )
335     {
336         // make a copy of the root node
337         CvDTreeNode temp;
338         int i;
339         root = new_node( 0, 1, 0, 0 );
340         temp = *root;
341         *root = *data_root;
342         root->num_valid = temp.num_valid;
343         if( root->num_valid )
344         {
345             for( i = 0; i < var_count; i++ )
346                 root->num_valid[i] = data_root->num_valid[i];
347         }
348         root->cv_Tn = temp.cv_Tn;
349         root->cv_node_risk = temp.cv_node_risk;
350         root->cv_node_error = temp.cv_node_error;
351     }
352     else
353     {
354         int* sidx = isubsample_idx->data.i;
355         // co - array of count/offset pairs (to handle duplicated values in _subsample_idx)
356         int* co, cur_ofs = 0;
357         int workVarCount = get_work_var_count();
358         int count = isubsample_idx->rows + isubsample_idx->cols - 1;
359 
360         root = new_node( 0, count, 1, 0 );
361 
362         CV_Assert( (subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 )) != 0);
363         cvZero( subsample_co );
364         co = subsample_co->data.i;
365         for( int i = 0; i < count; i++ )
366             co[sidx[i]*2]++;
367         for( int i = 0; i < sample_count; i++ )
368         {
369             if( co[i*2] )
370             {
371                 co[i*2+1] = cur_ofs;
372                 cur_ofs += co[i*2];
373             }
374             else
375                 co[i*2+1] = -1;
376         }
377 
378         cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
379         // subsample ordered variables
380         for( int vi = 0; vi < numPrecalcIdx; vi++ )
381         {
382             int ci = get_var_type(vi);
383             CV_Assert( ci < 0 );
384 
385             int *src_idx_buf = (int*)inn_buf.data();
386             float *src_val_buf = (float*)(src_idx_buf + sample_count);
387             int* sample_indices_buf = (int*)(src_val_buf + sample_count);
388             const int* src_idx = 0;
389             const float* src_val = 0;
390             get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
391 
392             int j = 0, idx, count_i;
393             int num_valid = data_root->get_num_valid(vi);
394             CV_Assert( num_valid == sample_count );
395 
396             if (is_buf_16u)
397             {
398                 unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
399                     (size_t)vi*sample_count + data_root->offset);
400                 for( int i = 0; i < num_valid; i++ )
401                 {
402                     idx = src_idx[i];
403                     count_i = co[idx*2];
404                     if( count_i )
405                         for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
406                             udst_idx[j] = (unsigned short)cur_ofs;
407                 }
408             }
409             else
410             {
411                 int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
412                     (size_t)vi*sample_count + root->offset;
413                 for( int i = 0; i < num_valid; i++ )
414                 {
415                     idx = src_idx[i];
416                     count_i = co[idx*2];
417                     if( count_i )
418                         for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
419                             idst_idx[j] = cur_ofs;
420                 }
421             }
422         }
423 
424         // subsample cv_lables
425         const int* src_lbls = get_cv_labels(data_root, (int*)inn_buf.data());
426         if (is_buf_16u)
427         {
428             unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
429                 (size_t)(workVarCount-1)*sample_count + root->offset);
430             for( int i = 0; i < count; i++ )
431                 udst[i] = (unsigned short)src_lbls[sidx[i]];
432         }
433         else
434         {
435             int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
436                 (size_t)(workVarCount-1)*sample_count + root->offset;
437             for( int i = 0; i < count; i++ )
438                 idst[i] = src_lbls[sidx[i]];
439         }
440 
441         // subsample sample_indices
442         const int* sample_idx_src = get_sample_indices(data_root, (int*)inn_buf.data());
443         if (is_buf_16u)
444         {
445             unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
446                 (size_t)workVarCount*sample_count + root->offset);
447             for( int i = 0; i < count; i++ )
448                 sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
449         }
450         else
451         {
452             int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
453                 (size_t)workVarCount*sample_count + root->offset;
454             for( int i = 0; i < count; i++ )
455                 sample_idx_dst[i] = sample_idx_src[sidx[i]];
456         }
457 
458         for( int vi = 0; vi < var_count; vi++ )
459             root->set_num_valid(vi, count);
460     }
461 
462     cvReleaseMat( &isubsample_idx );
463     cvReleaseMat( &subsample_co );
464 
465     return root;
466 }
467 
468 //---------------------------- CascadeBoostTrainData -----------------------------
469 
CvCascadeBoostTrainData(const CvFeatureEvaluator * _featureEvaluator,const CvDTreeParams & _params)470 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
471                                                   const CvDTreeParams& _params )
472 {
473     is_classifier = true;
474     var_all = var_count = (int)_featureEvaluator->getNumFeatures();
475 
476     featureEvaluator = _featureEvaluator;
477     shared = true;
478     set_params( _params );
479     max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
480     var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
481     if ( featureEvaluator->getMaxCatCount() > 0 )
482     {
483         numPrecalcIdx = 0;
484         cat_var_count = var_count;
485         ord_var_count = 0;
486         for( int vi = 0; vi < var_count; vi++ )
487         {
488             var_type->data.i[vi] = vi;
489         }
490     }
491     else
492     {
493         cat_var_count = 0;
494         ord_var_count = var_count;
495         for( int vi = 1; vi <= var_count; vi++ )
496         {
497             var_type->data.i[vi-1] = -vi;
498         }
499     }
500     var_type->data.i[var_count] = cat_var_count;
501     var_type->data.i[var_count+1] = cat_var_count+1;
502 
503     int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
504     int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
505     treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
506     tree_storage = cvCreateMemStorage( treeBlockSize );
507     node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
508     split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
509 }
510 
CvCascadeBoostTrainData(const CvFeatureEvaluator * _featureEvaluator,int _numSamples,int _precalcValBufSize,int _precalcIdxBufSize,const CvDTreeParams & _params)511 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
512                                                  int _numSamples,
513                                                  int _precalcValBufSize, int _precalcIdxBufSize,
514                                                  const CvDTreeParams& _params )
515 {
516     setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
517 }
518 
setData(const CvFeatureEvaluator * _featureEvaluator,int _numSamples,int _precalcValBufSize,int _precalcIdxBufSize,const CvDTreeParams & _params)519 void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
520                                       int _numSamples,
521                                       int _precalcValBufSize, int _precalcIdxBufSize,
522                                       const CvDTreeParams& _params )
523 {
524     int* idst = 0;
525     unsigned short* udst = 0;
526 
527     uint64 effective_buf_size = 0;
528     int effective_buf_height = 0, effective_buf_width = 0;
529 
530 
531     clear();
532     shared = true;
533     have_labels = true;
534     have_priors = false;
535     is_classifier = true;
536 
537     rng = &cv::theRNG();
538 
539     set_params( _params );
540 
541     CV_Assert( _featureEvaluator );
542     featureEvaluator = _featureEvaluator;
543 
544     max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
545     _resp = cvMat(featureEvaluator->getCls());
546     responses = &_resp;
547     // TODO: check responses: elements must be 0 or 1
548 
549     if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
550         CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
551 
552     var_count = var_all = featureEvaluator->getNumFeatures() * featureEvaluator->getFeatureSize();
553     sample_count = _numSamples;
554 
555     is_buf_16u = false;
556     if (sample_count < 65536)
557         is_buf_16u = true;
558 
559     numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count );
560     numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. /
561                 ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
562 
563     assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 );
564 
565     valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
566     var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
567 
568     if ( featureEvaluator->getMaxCatCount() > 0 )
569     {
570         numPrecalcIdx = 0;
571         cat_var_count = var_count;
572         ord_var_count = 0;
573         for( int vi = 0; vi < var_count; vi++ )
574         {
575             var_type->data.i[vi] = vi;
576         }
577     }
578     else
579     {
580         cat_var_count = 0;
581         ord_var_count = var_count;
582         for( int vi = 1; vi <= var_count; vi++ )
583         {
584             var_type->data.i[vi-1] = -vi;
585         }
586     }
587     var_type->data.i[var_count] = cat_var_count;
588     var_type->data.i[var_count+1] = cat_var_count+1;
589     work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
590     buf_count = 2;
591 
592     buf_size = -1; // the member buf_size is obsolete
593 
594     effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
595     effective_buf_width = sample_count;
596     effective_buf_height = work_var_count+1;
597 
598     if (effective_buf_width >= effective_buf_height)
599         effective_buf_height *= buf_count;
600     else
601         effective_buf_width *= buf_count;
602 
603     if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
604     {
605         CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
606     }
607 
608     if ( is_buf_16u )
609         buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
610     else
611         buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );
612 
613     cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
614 
615     // precalculate valCache and set indices in buf
616     precalculate();
617 
618     // now calculate the maximum size of split,
619     // create memory storage that will keep nodes and splits of the decision tree
620     // allocate root node and the buffer for the whole training data
621     int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
622         (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
623     int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
624     treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
625     tree_storage = cvCreateMemStorage( treeBlockSize );
626     node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
627 
628     int nvSize = var_count*sizeof(int);
629     nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
630     int tempBlockSize = nvSize;
631     tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
632     temp_storage = cvCreateMemStorage( tempBlockSize );
633     nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
634 
635     data_root = new_node( 0, sample_count, 0, 0 );
636 
637     // set sample labels
638     if (is_buf_16u)
639         udst = (unsigned short*)(buf->data.s + (size_t)work_var_count*sample_count);
640     else
641         idst = buf->data.i + (size_t)work_var_count*sample_count;
642 
643     for (int si = 0; si < sample_count; si++)
644     {
645         if (udst)
646             udst[si] = (unsigned short)si;
647         else
648             idst[si] = si;
649     }
650     for( int vi = 0; vi < var_count; vi++ )
651         data_root->set_num_valid(vi, sample_count);
652     for( int vi = 0; vi < cat_var_count; vi++ )
653         cat_count->data.i[vi] = max_c_count;
654 
655     cat_count->data.i[cat_var_count] = 2;
656 
657     maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
658         (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
659     split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
660 
661     priors = cvCreateMat( 1, get_num_classes(), CV_64F );
662     cvSet(priors, cvScalar(1));
663     priors_mult = cvCloneMat( priors );
664     counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
665     direction = cvCreateMat( 1, sample_count, CV_8UC1 );
666     split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
667 }
668 
free_train_data()669 void CvCascadeBoostTrainData::free_train_data()
670 {
671     CvDTreeTrainData::free_train_data();
672     valCache.release();
673 }
674 
get_class_labels(CvDTreeNode * n,int * labelsBuf)675 const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
676 {
677     int nodeSampleCount = n->sample_count;
678     int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
679 
680     int* sampleIndicesBuf = labelsBuf; //
681     const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
682     for( int si = 0; si < nodeSampleCount; si++ )
683     {
684         int sidx = sampleIndices[si];
685         labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
686     }
687     return labelsBuf;
688 }
689 
get_sample_indices(CvDTreeNode * n,int * indicesBuf)690 const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
691 {
692     return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
693 }
694 
get_cv_labels(CvDTreeNode * n,int * labels_buf)695 const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
696 {
697     return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
698 }
699 
get_ord_var_data(CvDTreeNode * n,int vi,float * ordValuesBuf,int * sortedIndicesBuf,const float ** ordValues,const int ** sortedIndices,int * sampleIndicesBuf)700 void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
701         const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
702 {
703     int nodeSampleCount = n->sample_count;
704     const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
705 
706     if ( vi < numPrecalcIdx )
707     {
708         if( !is_buf_16u )
709             *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + (size_t)vi*sample_count + n->offset;
710         else
711         {
712             const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
713                                                     (size_t)vi*sample_count + n->offset );
714             for( int i = 0; i < nodeSampleCount; i++ )
715                 sortedIndicesBuf[i] = shortIndices[i];
716 
717             *sortedIndices = sortedIndicesBuf;
718         }
719 
720         if( vi < numPrecalcVal )
721         {
722             for( int i = 0; i < nodeSampleCount; i++ )
723             {
724                 int idx = (*sortedIndices)[i];
725                 idx = sampleIndices[idx];
726                 ordValuesBuf[i] =  valCache.at<float>( vi, idx);
727             }
728         }
729         else
730         {
731             for( int i = 0; i < nodeSampleCount; i++ )
732             {
733                 int idx = (*sortedIndices)[i];
734                 idx = sampleIndices[idx];
735                 ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
736             }
737         }
738     }
739     else // vi >= numPrecalcIdx
740     {
741         cv::AutoBuffer<float> abuf(nodeSampleCount);
742         float* sampleValues = &abuf[0];
743 
744         if ( vi < numPrecalcVal )
745         {
746             for( int i = 0; i < nodeSampleCount; i++ )
747             {
748                 sortedIndicesBuf[i] = i;
749                 sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] );
750             }
751         }
752         else
753         {
754             for( int i = 0; i < nodeSampleCount; i++ )
755             {
756                 sortedIndicesBuf[i] = i;
757                 sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]);
758             }
759         }
760         std::sort(sortedIndicesBuf, sortedIndicesBuf + nodeSampleCount, LessThanIdx<float, int>(&sampleValues[0]) );
761         for( int i = 0; i < nodeSampleCount; i++ )
762             ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]];
763         *sortedIndices = sortedIndicesBuf;
764     }
765 
766     *ordValues = ordValuesBuf;
767 }
768 
get_cat_var_data(CvDTreeNode * n,int vi,int * catValuesBuf)769 const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf )
770 {
771     int nodeSampleCount = n->sample_count;
772     int* sampleIndicesBuf = catValuesBuf; //
773     const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
774 
775     if ( vi < numPrecalcVal )
776     {
777         for( int i = 0; i < nodeSampleCount; i++ )
778             catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
779     }
780     else
781     {
782         if( vi >= numPrecalcVal && vi < var_count )
783         {
784             for( int i = 0; i < nodeSampleCount; i++ )
785                 catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
786         }
787         else
788         {
789             get_cv_labels( n, catValuesBuf );
790         }
791     }
792 
793     return catValuesBuf;
794 }
795 
getVarValue(int vi,int si)796 float CvCascadeBoostTrainData::getVarValue( int vi, int si )
797 {
798     if ( vi < numPrecalcVal && !valCache.empty() )
799         return valCache.at<float>( vi, si );
800     return (*featureEvaluator)( vi, si );
801 }
802 
803 
804 struct FeatureIdxOnlyPrecalc : ParallelLoopBody
805 {
FeatureIdxOnlyPrecalcFeatureIdxOnlyPrecalc806     FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
807     {
808         featureEvaluator = _featureEvaluator;
809         sample_count = _sample_count;
810         udst = (unsigned short*)_buf->data.s;
811         idst = _buf->data.i;
812         is_buf_16u = _is_buf_16u;
813     }
operator ()FeatureIdxOnlyPrecalc814     void operator()( const Range& range ) const
815     {
816         cv::AutoBuffer<float> valCache(sample_count);
817         float* valCachePtr = valCache.data();
818         for ( int fi = range.start; fi < range.end; fi++)
819         {
820             for( int si = 0; si < sample_count; si++ )
821             {
822                 valCachePtr[si] = (*featureEvaluator)( fi, si );
823                 if ( is_buf_16u )
824                     *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
825                 else
826                     *(idst + (size_t)fi*sample_count + si) = si;
827             }
828             if ( is_buf_16u )
829                 std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCachePtr) );
830             else
831                 std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCachePtr) );
832         }
833     }
834     const CvFeatureEvaluator* featureEvaluator;
835     int sample_count;
836     int* idst;
837     unsigned short* udst;
838     bool is_buf_16u;
839 };
840 
841 struct FeatureValAndIdxPrecalc : ParallelLoopBody
842 {
FeatureValAndIdxPrecalcFeatureValAndIdxPrecalc843     FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
844     {
845         featureEvaluator = _featureEvaluator;
846         valCache = _valCache;
847         sample_count = _sample_count;
848         udst = (unsigned short*)_buf->data.s;
849         idst = _buf->data.i;
850         is_buf_16u = _is_buf_16u;
851     }
operator ()FeatureValAndIdxPrecalc852     void operator()( const Range& range ) const
853     {
854         for ( int fi = range.start; fi < range.end; fi++)
855         {
856             for( int si = 0; si < sample_count; si++ )
857             {
858                 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
859                 if ( is_buf_16u )
860                     *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
861                 else
862                     *(idst + (size_t)fi*sample_count + si) = si;
863             }
864             if ( is_buf_16u )
865                 std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCache->ptr<float>(fi)) );
866             else
867                 std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCache->ptr<float>(fi)) );
868         }
869     }
870     const CvFeatureEvaluator* featureEvaluator;
871     Mat* valCache;
872     int sample_count;
873     int* idst;
874     unsigned short* udst;
875     bool is_buf_16u;
876 };
877 
878 struct FeatureValOnlyPrecalc : ParallelLoopBody
879 {
FeatureValOnlyPrecalcFeatureValOnlyPrecalc880     FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
881     {
882         featureEvaluator = _featureEvaluator;
883         valCache = _valCache;
884         sample_count = _sample_count;
885     }
operator ()FeatureValOnlyPrecalc886     void operator()( const Range& range ) const
887     {
888         for ( int fi = range.start; fi < range.end; fi++)
889             for( int si = 0; si < sample_count; si++ )
890                 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
891     }
892     const CvFeatureEvaluator* featureEvaluator;
893     Mat* valCache;
894     int sample_count;
895 };
896 
precalculate()897 void CvCascadeBoostTrainData::precalculate()
898 {
899     int minNum = MIN( numPrecalcVal, numPrecalcIdx);
900 
901     double proctime = -TIME( 0 );
902     parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
903                    FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
904     parallel_for_( Range(0, minNum),
905                    FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
906     parallel_for_( Range(minNum, numPrecalcVal),
907                    FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
908     cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
909 }
910 
911 //-------------------------------- CascadeBoostTree ----------------------------------------
912 
predict(int sampleIdx) const913 CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
914 {
915     CvDTreeNode* node = root;
916     if( !node )
917         CV_Error( CV_StsError, "The tree has not been trained yet" );
918 
919     if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
920     {
921         while( node->left )
922         {
923             CvDTreeSplit* split = node->split;
924             float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
925             node = val <= split->ord.c ? node->left : node->right;
926         }
927     }
928     else // categorical
929     {
930         while( node->left )
931         {
932             CvDTreeSplit* split = node->split;
933             int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
934             node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
935         }
936     }
937     return node;
938 }
939 
write(FileStorage & fs,const Mat & featureMap)940 void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
941 {
942     int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
943     int subsetN = (maxCatCount + 31)/32;
944     queue<CvDTreeNode*> internalNodesQueue;
945     int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
946     std::vector<float> leafVals(size);
947     int leafValIdx = 0;
948     int internalNodeIdx = 1;
949     CvDTreeNode* tempNode;
950 
951     CV_DbgAssert( root );
952     internalNodesQueue.push( root );
953 
954     fs << "{";
955     fs << CC_INTERNAL_NODES << "[:";
956     while (!internalNodesQueue.empty())
957     {
958         tempNode = internalNodesQueue.front();
959         CV_Assert( tempNode->left );
960         if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
961         {
962             leafVals[-leafValIdx] = (float)tempNode->left->value;
963             fs << leafValIdx-- ;
964         }
965         else
966         {
967             internalNodesQueue.push( tempNode->left );
968             fs << internalNodeIdx++;
969         }
970         CV_Assert( tempNode->right );
971         if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
972         {
973             leafVals[-leafValIdx] = (float)tempNode->right->value;
974             fs << leafValIdx--;
975         }
976         else
977         {
978             internalNodesQueue.push( tempNode->right );
979             fs << internalNodeIdx++;
980         }
981         int fidx = tempNode->split->var_idx;
982         fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
983         fs << fidx;
984         if ( !maxCatCount )
985             fs << tempNode->split->ord.c;
986         else
987             for( int i = 0; i < subsetN; i++ )
988                 fs << tempNode->split->subset[i];
989         internalNodesQueue.pop();
990     }
991     fs << "]"; // CC_INTERNAL_NODES
992 
993     fs << CC_LEAF_VALUES << "[:";
994     for (int ni = 0; ni < -leafValIdx; ni++)
995         fs << leafVals[ni];
996     fs << "]"; // CC_LEAF_VALUES
997     fs << "}";
998 }
999 
read(const FileNode & node,CvBoost * _ensemble,CvDTreeTrainData * _data)1000 void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
1001                                 CvDTreeTrainData* _data )
1002 {
1003     int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
1004     int subsetN = (maxCatCount + 31)/32;
1005     int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
1006 
1007     queue<CvDTreeNode*> internalNodesQueue;
1008     FileNodeIterator internalNodesIt, leafValsuesIt;
1009     CvDTreeNode* prntNode, *cldNode;
1010 
1011     clear();
1012     data = _data;
1013     ensemble = _ensemble;
1014     pruned_tree_idx = 0;
1015 
1016     // read tree nodes
1017     FileNode rnode = node[CC_INTERNAL_NODES];
1018     internalNodesIt = rnode.end();
1019     leafValsuesIt = node[CC_LEAF_VALUES].end();
1020     internalNodesIt--; leafValsuesIt--;
1021     for( size_t i = 0; i < rnode.size()/step; i++ )
1022     {
1023         prntNode = data->new_node( 0, 0, 0, 0 );
1024         if ( maxCatCount > 0 )
1025         {
1026             prntNode->split = data->new_split_cat( 0, 0 );
1027             for( int j = subsetN-1; j>=0; j--)
1028             {
1029                 *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
1030             }
1031         }
1032         else
1033         {
1034             float split_value;
1035             *internalNodesIt >> split_value; internalNodesIt--;
1036             prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
1037         }
1038         *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
1039         int ridx, lidx;
1040         *internalNodesIt >> ridx; internalNodesIt--;
1041         *internalNodesIt >> lidx;internalNodesIt--;
1042         if ( ridx <= 0)
1043         {
1044             prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
1045             *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1046             cldNode->parent = prntNode;
1047         }
1048         else
1049         {
1050             prntNode->right = internalNodesQueue.front();
1051             prntNode->right->parent = prntNode;
1052             internalNodesQueue.pop();
1053         }
1054 
1055         if ( lidx <= 0)
1056         {
1057             prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
1058             *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1059             cldNode->parent = prntNode;
1060         }
1061         else
1062         {
1063             prntNode->left = internalNodesQueue.front();
1064             prntNode->left->parent = prntNode;
1065             internalNodesQueue.pop();
1066         }
1067 
1068         internalNodesQueue.push( prntNode );
1069     }
1070 
1071     root = internalNodesQueue.front();
1072     internalNodesQueue.pop();
1073 }
1074 
split_node_data(CvDTreeNode * node)1075 void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
1076 {
1077     int n = node->sample_count, nl, nr, scount = data->sample_count;
1078     char* dir = (char*)data->direction->data.ptr;
1079     CvDTreeNode *left = 0, *right = 0;
1080     int* newIdx = data->split_buf->data.i;
1081     int newBufIdx = data->get_child_buf_idx( node );
1082     int workVarCount = data->get_work_var_count();
1083     CvMat* buf = data->buf;
1084     size_t length_buf_row = data->get_length_subbuf();
1085     cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
1086     int* tempBuf = (int*)inn_buf.data();
1087     bool splitInputData;
1088 
1089     complete_node_dir(node);
1090 
1091     for( int i = nl = nr = 0; i < n; i++ )
1092     {
1093         int d = dir[i];
1094         // initialize new indices for splitting ordered variables
1095         newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
1096         nr += d;
1097         nl += d^1;
1098     }
1099 
1100     node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
1101     node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
1102 
1103     splitInputData = node->depth + 1 < data->params.max_depth &&
1104         (node->left->sample_count > data->params.min_sample_count ||
1105         node->right->sample_count > data->params.min_sample_count);
1106 
1107     // split ordered variables, keep both halves sorted.
1108     for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
1109     {
1110         int ci = data->get_var_type(vi);
1111         if( ci >= 0 || !splitInputData )
1112             continue;
1113 
1114         int n1 = node->get_num_valid(vi);
1115         float *src_val_buf = (float*)(tempBuf + n);
1116         int *src_sorted_idx_buf = (int*)(src_val_buf + n);
1117         int *src_sample_idx_buf = src_sorted_idx_buf + n;
1118         const int* src_sorted_idx = 0;
1119         const float* src_val = 0;
1120         data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
1121 
1122         for(int i = 0; i < n; i++)
1123             tempBuf[i] = src_sorted_idx[i];
1124 
1125         if (data->is_buf_16u)
1126         {
1127             ushort *ldst, *rdst;
1128             ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
1129                 vi*scount + left->offset);
1130             rdst = (ushort*)(ldst + nl);
1131 
1132             // split sorted
1133             for( int i = 0; i < n1; i++ )
1134             {
1135                 int idx = tempBuf[i];
1136                 int d = dir[idx];
1137                 idx = newIdx[idx];
1138                 if (d)
1139                 {
1140                     *rdst = (ushort)idx;
1141                     rdst++;
1142                 }
1143                 else
1144                 {
1145                     *ldst = (ushort)idx;
1146                     ldst++;
1147                 }
1148             }
1149             CV_Assert( n1 == n );
1150         }
1151         else
1152         {
1153             int *ldst, *rdst;
1154             ldst = buf->data.i + left->buf_idx*length_buf_row +
1155                 vi*scount + left->offset;
1156             rdst = buf->data.i + right->buf_idx*length_buf_row +
1157                 vi*scount + right->offset;
1158 
1159             // split sorted
1160             for( int i = 0; i < n1; i++ )
1161             {
1162                 int idx = tempBuf[i];
1163                 int d = dir[idx];
1164                 idx = newIdx[idx];
1165                 if (d)
1166                 {
1167                     *rdst = idx;
1168                     rdst++;
1169                 }
1170                 else
1171                 {
1172                     *ldst = idx;
1173                     ldst++;
1174                 }
1175             }
1176             CV_Assert( n1 == n );
1177         }
1178     }
1179 
1180     // split cv_labels using newIdx relocation table
1181     int *src_lbls_buf = tempBuf + n;
1182     const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
1183 
1184     for(int i = 0; i < n; i++)
1185         tempBuf[i] = src_lbls[i];
1186 
1187     if (data->is_buf_16u)
1188     {
1189         unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
1190             (size_t)(workVarCount-1)*scount + left->offset);
1191         unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
1192             (size_t)(workVarCount-1)*scount + right->offset);
1193 
1194         for( int i = 0; i < n; i++ )
1195         {
1196             int idx = tempBuf[i];
1197             if (dir[i])
1198             {
1199                 *rdst = (unsigned short)idx;
1200                 rdst++;
1201             }
1202             else
1203             {
1204                 *ldst = (unsigned short)idx;
1205                 ldst++;
1206             }
1207         }
1208 
1209     }
1210     else
1211     {
1212         int *ldst = buf->data.i + left->buf_idx*length_buf_row +
1213             (size_t)(workVarCount-1)*scount + left->offset;
1214         int *rdst = buf->data.i + right->buf_idx*length_buf_row +
1215             (size_t)(workVarCount-1)*scount + right->offset;
1216 
1217         for( int i = 0; i < n; i++ )
1218         {
1219             int idx = tempBuf[i];
1220             if (dir[i])
1221             {
1222                 *rdst = idx;
1223                 rdst++;
1224             }
1225             else
1226             {
1227                 *ldst = idx;
1228                 ldst++;
1229             }
1230         }
1231     }
1232 
1233     // split sample indices
1234     int *sampleIdx_src_buf = tempBuf + n;
1235     const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
1236 
1237     for(int i = 0; i < n; i++)
1238         tempBuf[i] = sampleIdx_src[i];
1239 
1240     if (data->is_buf_16u)
1241     {
1242         unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
1243             (size_t)workVarCount*scount + left->offset);
1244         unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
1245             (size_t)workVarCount*scount + right->offset);
1246         for (int i = 0; i < n; i++)
1247         {
1248             unsigned short idx = (unsigned short)tempBuf[i];
1249             if (dir[i])
1250             {
1251                 *rdst = idx;
1252                 rdst++;
1253             }
1254             else
1255             {
1256                 *ldst = idx;
1257                 ldst++;
1258             }
1259         }
1260     }
1261     else
1262     {
1263         int* ldst = buf->data.i + left->buf_idx*length_buf_row +
1264             (size_t)workVarCount*scount + left->offset;
1265         int* rdst = buf->data.i + right->buf_idx*length_buf_row +
1266             (size_t)workVarCount*scount + right->offset;
1267         for (int i = 0; i < n; i++)
1268         {
1269             int idx = tempBuf[i];
1270             if (dir[i])
1271             {
1272                 *rdst = idx;
1273                 rdst++;
1274             }
1275             else
1276             {
1277                 *ldst = idx;
1278                 ldst++;
1279             }
1280         }
1281     }
1282 
1283     for( int vi = 0; vi < data->var_count; vi++ )
1284     {
1285         left->set_num_valid(vi, (int)(nl));
1286         right->set_num_valid(vi, (int)(nr));
1287     }
1288 
1289     // deallocate the parent node data that is not needed anymore
1290     data->free_node_data(node);
1291 }
1292 
auxMarkFeaturesInMap(const CvDTreeNode * node,Mat & featureMap)1293 static void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
1294 {
1295     if ( node && node->split )
1296     {
1297         featureMap.ptr<int>(0)[node->split->var_idx] = 1;
1298         auxMarkFeaturesInMap( node->left, featureMap );
1299         auxMarkFeaturesInMap( node->right, featureMap );
1300     }
1301 }
1302 
markFeaturesInMap(Mat & featureMap)1303 void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
1304 {
1305     auxMarkFeaturesInMap( root, featureMap );
1306 }
1307 
1308 //----------------------------------- CascadeBoost --------------------------------------
1309 
train(const CvFeatureEvaluator * _featureEvaluator,int _numSamples,int _precalcValBufSize,int _precalcIdxBufSize,const CvCascadeBoostParams & _params)1310 bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
1311                            int _numSamples,
1312                            int _precalcValBufSize, int _precalcIdxBufSize,
1313                            const CvCascadeBoostParams& _params )
1314 {
1315     bool isTrained = false;
1316     CV_Assert( !data );
1317     clear();
1318     data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
1319                                         _precalcValBufSize, _precalcIdxBufSize, _params );
1320     CvMemStorage *storage = cvCreateMemStorage();
1321     weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1322     storage = 0;
1323 
1324     set_params( _params );
1325     if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
1326         data->do_responses_copy();
1327 
1328     update_weights( 0 );
1329 
1330     cout << "+----+---------+---------+" << endl;
1331     cout << "|  N |    HR   |    FA   |" << endl;
1332     cout << "+----+---------+---------+" << endl;
1333 
1334     do
1335     {
1336         CvCascadeBoostTree* tree = new CvCascadeBoostTree;
1337         if( !tree->train( data, subsample_mask, this ) )
1338         {
1339             delete tree;
1340             break;
1341         }
1342         cvSeqPush( weak, &tree );
1343         update_weights( tree );
1344         trim_weights();
1345         if( cvCountNonZero(subsample_mask) == 0 )
1346             break;
1347     }
1348     while( !isErrDesired() && (weak->total < params.weak_count) );
1349 
1350     if(weak->total > 0)
1351     {
1352         data->is_classifier = true;
1353         data->free_train_data();
1354         isTrained = true;
1355     }
1356     else
1357         clear();
1358 
1359     return isTrained;
1360 }
1361 
predict(int sampleIdx,bool returnSum) const1362 float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1363 {
1364     CV_Assert( weak );
1365     double sum = 0;
1366     CvSeqReader reader;
1367     cvStartReadSeq( weak, &reader );
1368     cvSetSeqReaderPos( &reader, 0 );
1369     for( int i = 0; i < weak->total; i++ )
1370     {
1371         CvBoostTree* wtree;
1372         CV_READ_SEQ_ELEM( wtree, reader );
1373         sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1374     }
1375     if( !returnSum )
1376         sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1377     return (float)sum;
1378 }
1379 
set_params(const CvBoostParams & _params)1380 bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1381 {
1382     minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1383     maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1384     return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1385         ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1386         CvBoost::set_params( _params ));
1387 }
1388 
update_weights(CvBoostTree * tree)1389 void CvCascadeBoost::update_weights( CvBoostTree* tree )
1390 {
1391     int n = data->sample_count;
1392     double sumW = 0.;
1393     int step = 0;
1394     float* fdata = 0;
1395     int *sampleIdxBuf;
1396     const int* sampleIdx = 0;
1397     int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
1398                        ( !tree ? n*sizeof(int) : 0 );
1399     cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1400     uchar* cur_inn_buf_pos = inn_buf.data();
1401     if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1402     {
1403         step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1404             1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1405         fdata = data->responses_copy->data.fl;
1406         sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1407         sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1408     }
1409     CvMat* buf = data->buf;
1410     size_t length_buf_row = data->get_length_subbuf();
1411     if( !tree ) // before training the first tree, initialize weights and other parameters
1412     {
1413         int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
1414         const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
1415         // in case of logitboost and gentle adaboost each weak tree is a regression tree,
1416         // so we need to convert class labels to floating-point values
1417         double w0 = 1./n;
1418         double p[2] = { 1, 1 };
1419 
1420         cvReleaseMat( &orig_response );
1421         cvReleaseMat( &sum_response );
1422         cvReleaseMat( &weak_eval );
1423         cvReleaseMat( &subsample_mask );
1424         cvReleaseMat( &weights );
1425 
1426         orig_response = cvCreateMat( 1, n, CV_32S );
1427         weak_eval = cvCreateMat( 1, n, CV_64F );
1428         subsample_mask = cvCreateMat( 1, n, CV_8U );
1429         weights = cvCreateMat( 1, n, CV_64F );
1430         subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1431 
1432         if (data->is_buf_16u)
1433         {
1434             unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
1435                 data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count);
1436             for( int i = 0; i < n; i++ )
1437             {
1438                 // save original categorical responses {0,1}, convert them to {-1,1}
1439                 orig_response->data.i[i] = classLabels[i]*2 - 1;
1440                 // make all the samples active at start.
1441                 // later, in trim_weights() deactivate/reactive again some, if need
1442                 subsample_mask->data.ptr[i] = (uchar)1;
1443                 // make all the initial weights the same.
1444                 weights->data.db[i] = w0*p[classLabels[i]];
1445                 // set the labels to find (from within weak tree learning proc)
1446                 // the particular sample weight, and where to store the response.
1447                 labels[i] = (unsigned short)i;
1448             }
1449         }
1450         else
1451         {
1452             int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
1453                 data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count;
1454 
1455             for( int i = 0; i < n; i++ )
1456             {
1457                 // save original categorical responses {0,1}, convert them to {-1,1}
1458                 orig_response->data.i[i] = classLabels[i]*2 - 1;
1459                 subsample_mask->data.ptr[i] = (uchar)1;
1460                 weights->data.db[i] = w0*p[classLabels[i]];
1461                 labels[i] = i;
1462             }
1463         }
1464 
1465         if( params.boost_type == LOGIT )
1466         {
1467             sum_response = cvCreateMat( 1, n, CV_64F );
1468 
1469             for( int i = 0; i < n; i++ )
1470             {
1471                 sum_response->data.db[i] = 0;
1472                 fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1473             }
1474 
1475             // in case of logitboost each weak tree is a regression tree.
1476             // the target function values are recalculated for each of the trees
1477             data->is_classifier = false;
1478         }
1479         else if( params.boost_type == GENTLE )
1480         {
1481             for( int i = 0; i < n; i++ )
1482                 fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1483 
1484             data->is_classifier = false;
1485         }
1486     }
1487     else
1488     {
1489         // at this moment, for all the samples that participated in the training of the most
1490         // recent weak classifier we know the responses. For other samples we need to compute them
1491         if( have_subsample )
1492         {
1493             // invert the subsample mask
1494             cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1495 
1496             // run tree through all the non-processed samples
1497             for( int i = 0; i < n; i++ )
1498                 if( subsample_mask->data.ptr[i] )
1499                 {
1500                     weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1501                 }
1502         }
1503 
1504         // now update weights and other parameters for each type of boosting
1505         if( params.boost_type == DISCRETE )
1506         {
1507             // Discrete AdaBoost:
1508             //   weak_eval[i] (=f(x_i)) is in {-1,1}
1509             //   err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1510             //   C = log((1-err)/err)
1511             //   w_i *= exp(C*(f(x_i) != y_i))
1512 
1513             double C, err = 0.;
1514             double scale[] = { 1., 0. };
1515 
1516             for( int i = 0; i < n; i++ )
1517             {
1518                 double w = weights->data.db[i];
1519                 sumW += w;
1520                 err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1521             }
1522 
1523             if( sumW != 0 )
1524                 err /= sumW;
1525             C = err = -logRatio( err );
1526             scale[1] = exp(err);
1527 
1528             sumW = 0;
1529             for( int i = 0; i < n; i++ )
1530             {
1531                 double w = weights->data.db[i]*
1532                     scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1533                 sumW += w;
1534                 weights->data.db[i] = w;
1535             }
1536 
1537             tree->scale( C );
1538         }
1539         else if( params.boost_type == REAL )
1540         {
1541             // Real AdaBoost:
1542             //   weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1543             //   w_i *= exp(-y_i*f(x_i))
1544 
1545             for( int i = 0; i < n; i++ )
1546                 weak_eval->data.db[i] *= -orig_response->data.i[i];
1547 
1548             cvExp( weak_eval, weak_eval );
1549 
1550             for( int i = 0; i < n; i++ )
1551             {
1552                 double w = weights->data.db[i]*weak_eval->data.db[i];
1553                 sumW += w;
1554                 weights->data.db[i] = w;
1555             }
1556         }
1557         else if( params.boost_type == LOGIT )
1558         {
1559             // LogitBoost:
1560             //   weak_eval[i] = f(x_i) in [-z_max,z_max]
1561             //   sum_response = F(x_i).
1562             //   F(x_i) += 0.5*f(x_i)
1563             //   p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1564             //   reuse weak_eval: weak_eval[i] <- p(x_i)
1565             //   w_i = p(x_i)*1(1 - p(x_i))
1566             //   z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1567             //   store z_i to the data->data_root as the new target responses
1568 
1569             const double lbWeightThresh = FLT_EPSILON;
1570             const double lbZMax = 10.;
1571 
1572             for( int i = 0; i < n; i++ )
1573             {
1574                 double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1575                 sum_response->data.db[i] = s;
1576                 weak_eval->data.db[i] = -2*s;
1577             }
1578 
1579             cvExp( weak_eval, weak_eval );
1580 
1581             for( int i = 0; i < n; i++ )
1582             {
1583                 double p = 1./(1. + weak_eval->data.db[i]);
1584                 double w = p*(1 - p), z;
1585                 w = MAX( w, lbWeightThresh );
1586                 weights->data.db[i] = w;
1587                 sumW += w;
1588                 if( orig_response->data.i[i] > 0 )
1589                 {
1590                     z = 1./p;
1591                     fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1592                 }
1593                 else
1594                 {
1595                     z = 1./(1-p);
1596                     fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1597                 }
1598             }
1599         }
1600         else
1601         {
1602             // Gentle AdaBoost:
1603             //   weak_eval[i] = f(x_i) in [-1,1]
1604             //   w_i *= exp(-y_i*f(x_i))
1605             assert( params.boost_type == GENTLE );
1606 
1607             for( int i = 0; i < n; i++ )
1608                 weak_eval->data.db[i] *= -orig_response->data.i[i];
1609 
1610             cvExp( weak_eval, weak_eval );
1611 
1612             for( int i = 0; i < n; i++ )
1613             {
1614                 double w = weights->data.db[i] * weak_eval->data.db[i];
1615                 weights->data.db[i] = w;
1616                 sumW += w;
1617             }
1618         }
1619     }
1620 
1621     // renormalize weights
1622     if( sumW > FLT_EPSILON )
1623     {
1624         sumW = 1./sumW;
1625         for( int i = 0; i < n; ++i )
1626             weights->data.db[i] *= sumW;
1627     }
1628 }
1629 
isErrDesired()1630 bool CvCascadeBoost::isErrDesired()
1631 {
1632     int sCount = data->sample_count,
1633         numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1634     vector<float> eval(sCount);
1635 
1636     for( int i = 0; i < sCount; i++ )
1637         if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1638             eval[numPos++] = predict( i, true );
1639 
1640     std::sort(&eval[0], &eval[0] + numPos);
1641 
1642     int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1643 
1644     threshold = eval[ thresholdIdx ];
1645     numPosTrue = numPos - thresholdIdx;
1646     for( int i = thresholdIdx - 1; i >= 0; i--)
1647         if ( abs( eval[i] - threshold) < FLT_EPSILON )
1648             numPosTrue++;
1649     float hitRate = ((float) numPosTrue) / ((float) numPos);
1650 
1651     for( int i = 0; i < sCount; i++ )
1652     {
1653         if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1654         {
1655             numNeg++;
1656             if( predict( i ) )
1657                 numFalse++;
1658         }
1659     }
1660     float falseAlarm = ((float) numFalse) / ((float) numNeg);
1661 
1662     cout << "|"; cout.width(4); cout << right << weak->total;
1663     cout << "|"; cout.width(9); cout << right << hitRate;
1664     cout << "|"; cout.width(9); cout << right << falseAlarm;
1665     cout << "|" << endl;
1666     cout << "+----+---------+---------+" << endl;
1667 
1668     return falseAlarm <= maxFalseAlarm;
1669 }
1670 
write(FileStorage & fs,const Mat & featureMap) const1671 void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1672 {
1673 //    char cmnt[30];
1674     CvCascadeBoostTree* weakTree;
1675     fs << CC_WEAK_COUNT << weak->total;
1676     fs << CC_STAGE_THRESHOLD << threshold;
1677     fs << CC_WEAK_CLASSIFIERS << "[";
1678     for( int wi = 0; wi < weak->total; wi++)
1679     {
1680         /*sprintf( cmnt, "tree %i", wi );
1681         cvWriteComment( fs, cmnt, 0 );*/
1682         weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1683         weakTree->write( fs, featureMap );
1684     }
1685     fs << "]";
1686 }
1687 
read(const FileNode & node,const CvFeatureEvaluator * _featureEvaluator,const CvCascadeBoostParams & _params)1688 bool CvCascadeBoost::read( const FileNode &node,
1689                            const CvFeatureEvaluator* _featureEvaluator,
1690                            const CvCascadeBoostParams& _params )
1691 {
1692     CvMemStorage* storage;
1693     clear();
1694     data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1695     set_params( _params );
1696 
1697     node[CC_STAGE_THRESHOLD] >> threshold;
1698     FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1699 
1700     storage = cvCreateMemStorage();
1701     weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1702     for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1703     {
1704         CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1705         tree->read( *it, this, data );
1706         cvSeqPush( weak, &tree );
1707     }
1708     return true;
1709 }
1710 
markUsedFeaturesInMap(Mat & featureMap)1711 void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1712 {
1713     for( int wi = 0; wi < weak->total; wi++ )
1714     {
1715         CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1716         weakTree->markFeaturesInMap( featureMap );
1717     }
1718 }
1719