1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2015, Itseez Inc, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of the copyright holders may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Itseez Inc or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41 
42 #include "dpm_cascade.hpp"
43 #include "dpm_nms.hpp"
44 
45 #include <limits>
46 #include <fstream>
47 #include <iostream>
48 #include <stdio.h>
49 
50 using namespace std;
51 
52 namespace cv
53 {
54 namespace dpm
55 {
56 
loadCascadeModel(const string & modelPath)57 void DPMCascade::loadCascadeModel(const string &modelPath)
58 {
59     // load cascade model from xml
60     bool is_success = model.deserialize(modelPath);
61 
62     if (!is_success)
63     {
64         string errorMessage = format("Unable to parse the model: %s", modelPath.c_str());
65         CV_Error(CV_StsBadArg, errorMessage);
66     }
67 
68     model.initModel();
69 }
70 
initDPMCascade()71 void DPMCascade::initDPMCascade()
72 {
73     // compute the size of temporary storage needed by cascade
74     int nlevels = (int) pyramid.size();
75     int numPartFilters = model.getNumPartFilters();
76     int numDefParams = model.getNumDefParams();
77     featDimsProd.resize(nlevels);
78     tempStorageSize = 0;
79 
80     for (int i = 0; i < nlevels; i++)
81     {
82         int w = pyramid[i].cols/feature.dimHOG;
83         int h = pyramid[i].rows;
84         featDimsProd[i] = w*h;
85         tempStorageSize += w*h;
86     }
87 
88     tempStorageSize *= numPartFilters;
89 
90     convValues.resize(tempStorageSize);
91     pcaConvValues.resize(tempStorageSize);
92     dtValues.resize(tempStorageSize);
93     pcaDtValues.resize(tempStorageSize);
94 
95     fill(convValues.begin(), convValues.end(), -numeric_limits<double>::infinity());
96     fill(pcaConvValues.begin(), pcaConvValues.end(), -numeric_limits<double>::infinity());
97     fill(dtValues.begin(), dtValues.end(), -numeric_limits<double>::infinity());
98     fill(pcaDtValues.begin(), pcaDtValues.end(), -numeric_limits<double>::infinity());
99 
100     // each pyramid (convolution and distance transform) is stored
101     // in a 1D array. Since pyramid levels have different sizes,
102     // we build an array of offset values in order to index by
103     // level. The last offset is the total length of the pyramid
104     // storage array.
105     convLevelOffset.resize(nlevels + 1);
106     dtLevelOffset.resize(nlevels + 1);
107     convLevelOffset[0] = 0;
108     dtLevelOffset[0] = 0;
109 
110     for (int i = 1; i < nlevels + 1; i++)
111     {
112         convLevelOffset[i] = convLevelOffset[i-1] + numPartFilters*featDimsProd[i-1];
113         dtLevelOffset[i] = dtLevelOffset[i-1] + numDefParams*featDimsProd[i-1];
114     }
115 
116     // cache of precomputed deformation costs
117     defCostCacheX.resize(numDefParams);
118     defCostCacheY.resize(numDefParams);
119 
120     for (int i = 0; i < numDefParams; i++)
121     {
122         vector< double > def = model.defs[i];
123         CV_Assert((int) def.size() >= 4);
124 
125         defCostCacheX[i].resize(2*halfWindowSize + 1);
126         defCostCacheY[i].resize(2*halfWindowSize + 1);
127 
128         for (int j = 0; j < 2*halfWindowSize + 1; j++)
129         {
130             int delta = j - halfWindowSize;
131             int deltaSquare = delta*delta;
132             defCostCacheX[i][j] = -def[0]*deltaSquare - def[1]*delta;
133             defCostCacheY[i][j] = -def[2]*deltaSquare - def[3]*delta;
134         }
135     }
136 
137     dtArgmaxX.resize(dtLevelOffset[nlevels]);
138     pcaDtArgmaxX.resize(dtLevelOffset[nlevels]);
139     dtArgmaxY.resize(dtLevelOffset[nlevels]);
140     pcaDtArgmaxY.resize(dtLevelOffset[nlevels]);
141 }
142 
detect(Mat & image)143 vector< vector<double> > DPMCascade::detect(Mat &image)
144 {
145     if (image.channels() == 1)
146         cvtColor(image, image, COLOR_GRAY2BGR);
147 
148     if (image.depth() != CV_64F)
149         image.convertTo(image, CV_64FC3);
150 
151     // compute features
152     computeFeatures(image);
153 
154     // pre-allocate storage
155     initDPMCascade();
156 
157     // cascade process
158     vector< vector<double> > detections;
159     process(detections);
160 
161     // non-maximum suppression
162     NonMaximumSuppression nms;
163     nms.process(detections, 0.5);
164 
165     return detections;
166 }
167 
computeFeatures(const Mat & im)168 void DPMCascade::computeFeatures(const Mat &im)
169 {
170     // initialize feature pyramid
171     PyramidParameter params;
172     params.padx = model.maxSizeX;
173     params.pady = model.maxSizeY;
174     params.interval = model.interval;
175     params.binSize = model.sBin;
176 
177     feature = Feature(params);
178 
179     // compute pyramid
180     feature.computeFeaturePyramid(im, pyramid);
181 
182     // compute projected pyramid
183     feature.projectFeaturePyramid(model.pcaCoeff, pyramid, pcaPyramid);
184 }
185 
computeLocationScores(vector<vector<double>> & locationScores)186 void DPMCascade::computeLocationScores(vector< vector< double > >  &locationScores)
187 {
188     vector< vector < double > > locationWeight = model.locationWeight;
189     CV_Assert((int)locationWeight.size() == model.numComponents);
190 
191     Mat locationFeature;
192     int nlevels = (int) pyramid.size();
193     feature.computeLocationFeatures(nlevels, locationFeature);
194 
195     locationScores.resize(model.numComponents);
196 
197     for (int comp = 0; comp < model.numComponents; comp++)
198     {
199         locationScores[comp].resize(locationFeature.cols);
200 
201         for (int level = 0; level < locationFeature.cols; level++)
202         {
203             double val = 0;
204             for (int k = 0; k < locationFeature.rows; k++)
205                 val += locationWeight[comp][k]*
206                     locationFeature.at<double>(k, level);
207 
208             locationScores[comp][level] = val;
209         }
210     }
211 }
212 
computeRootPCAScores(vector<vector<Mat>> & rootScores)213 void DPMCascade::computeRootPCAScores(vector< vector< Mat > > &rootScores)
214 {
215     PyramidParameter params = feature.getPyramidParameters();
216     rootScores.resize(model.numComponents);
217     int nlevels = (int) pyramid.size();
218     int interval = params.interval;
219 
220     for (int comp = 0; comp < model.numComponents; comp++)
221     {
222         rootScores[comp].resize(nlevels);
223         ParalComputeRootPCAScores paralTask(pcaPyramid, model.rootPCAFilters[comp],
224                 model.pcaDim, rootScores[comp]);
225         parallel_for_(Range(interval, nlevels), paralTask);
226     }
227 }
228 
ParalComputeRootPCAScores(const vector<Mat> & pcaPyrad,const Mat & f,int dim,vector<Mat> & sc)229 ParalComputeRootPCAScores::ParalComputeRootPCAScores(
230         const vector< Mat > &pcaPyrad,
231         const Mat &f,
232         int dim,
233         vector< Mat > &sc):
234     pcaPyramid(pcaPyrad),
235     filter(f),
236     pcaDim(dim),
237     scores(sc)
238 {
239 }
240 
operator ()(const Range & range) const241 void ParalComputeRootPCAScores::operator() (const Range &range) const
242 {
243     for (int level = range.start; level != range.end; level++)
244     {
245         Mat feat = pcaPyramid[level];
246 
247         // compute size of output
248         int height = feat.rows - filter.rows + 1;
249         int width = (feat.cols - filter.cols) / pcaDim + 1;
250 
251         Mat result = Mat::zeros(Size(width, height), CV_64F);
252         // convolution engine
253         ConvolutionEngine convEngine;
254         convEngine.convolve(feat, filter, pcaDim, result);
255         scores[level] = result;
256     }
257 }
258 
process(vector<vector<double>> & dets)259 void DPMCascade::process( vector< vector<double> > &dets)
260 {
261     PyramidParameter params = feature.getPyramidParameters();
262     int interval = params.interval;
263     int padx = params.padx;
264     int pady = params.pady;
265     vector<double> scales = params.scales;
266 
267     int nlevels = (int)pyramid.size() - interval;
268     CV_Assert(nlevels > 0);
269 
270     // keep track of the PCA scores for each PCA filter
271     vector< vector< double > > pcaScore(model.numComponents);
272     for (int comp = 0; comp < model.numComponents; comp++)
273         pcaScore[comp].resize(model.numParts[comp]+1);
274 
275     // compute location scores
276     vector< vector< double > > locationScores;
277     computeLocationScores(locationScores);
278 
279     // compute root PCA scores
280     vector< vector< Mat > > rootPCAScores;
281     computeRootPCAScores(rootPCAScores);
282 
283     // process each model component and pyramid level
284     for (int comp = 0; comp < model.numComponents; comp++)
285     {
286         for (int plevel = 0; plevel < nlevels; plevel++)
287         {
288             // root filter pyramid level
289             int rlevel = plevel + interval;
290             double bias = model.bias[comp] + locationScores[comp][rlevel];
291             // get the scores of the first PCA filter
292             Mat rtscore = rootPCAScores[comp][rlevel];
293             // process each location in the current pyramid level
294             for (int rx = (int)ceil(padx/2.0); rx < rtscore.cols - (int)ceil(padx/2.0); rx++)
295             {
296                 for (int ry = (int)ceil(pady/2.0); ry < rtscore.rows - (int)ceil(pady/2.0); ry++)
297                 {
298                     // get stage 0 score
299                     double score = rtscore.at<double>(ry, rx) + bias;
300                     // record PCA score
301                     pcaScore[comp][0] = score - bias;
302                     // cascade stage 1 through 2*numparts + 2
303                     int stage = 1;
304                     int numstages = 2*model.numParts[comp] + 2;
305                     for(; stage < numstages; stage++)
306                     {
307                         double t = model.prunThreshold[comp][2*stage-1];
308                         // check for hypothesis pruning
309                         if (score < t)
310                             break;
311 
312                         // pca == 1 if place filters
313                         // pca == 0 if place non-pca filters
314                         bool isPCA = (stage < model.numParts[comp] + 1 ? true : false);
315                         // get the part index
316                         // root parts have index -1, none-root part are indexed 0:numParts-1
317                         int part = model.partOrder[comp][stage] - 1;// partOrder
318 
319                         if (part == -1)
320                         {
321                             // calculate the root non-pca score
322                             // and replace the PCA score
323                             double rscore = 0.0;
324                             if (isPCA)
325                             {
326                                 rscore = convolutionEngine.convolve(pcaPyramid[rlevel],
327                                         model.rootPCAFilters[comp],
328                                         model.pcaDim, rx, ry);
329                             }
330                             else
331                             {
332                                 rscore = convolutionEngine.convolve(pyramid[rlevel],
333                                         model.rootFilters[comp],
334                                         model.numFeatures, rx, ry);
335                             }
336                             score += rscore - pcaScore[comp][0];
337                         }
338                         else
339                         {
340                             // place a non-root filter
341                             int pId = model.pFind[comp][part];
342                             int px = 2*rx + (int)model.anchors[pId][0];
343                             int py = 2*ry + (int)model.anchors[pId][1];
344 
345                             // look up the filter and deformation model
346                             double defThreshold =
347                                 model.prunThreshold[comp][2*stage] - score;
348 
349                             double ps = computePartScore(plevel, pId, px, py,
350                                     isPCA, defThreshold);
351 
352                             if (isPCA)
353                             {
354                                 // record PCA filter score
355                                 pcaScore[comp][part+1] = ps;
356                                 // update the hypothesis score
357                                 score += ps;
358                             }
359                             else
360                             {
361                                 // update the hypothesis score by replacing
362                                 // the PCA score
363                                 score += ps - pcaScore[comp][part+1];
364                             } // isPCA == false
365                         } // part != -1
366 
367                     } // stages
368 
369                     // check if the hypothesis passed all stages with a
370                     // final score over the global threshold
371                     if (stage == numstages && score >= model.scoreThresh)
372                     {
373                         vector<double> coords;
374                         // compute and record image coordinates of the detection window
375                         double scale = model.sBin/scales[rlevel];
376                         double x1 = (rx-padx)*scale;
377                         double y1 = (ry-pady)*scale;
378                         double x2 = x1 + model.rootFilterDims[comp].width*scale - 1;
379                         double y2 = y1 + model.rootFilterDims[comp].height*scale - 1;
380 
381                         coords.push_back(x1);
382                         coords.push_back(y1);
383                         coords.push_back(x2);
384                         coords.push_back(y2);
385 
386                         // compute and record image coordinates of the part filters
387                         scale = model.sBin/scales[plevel];
388                         int featWidth = pyramid[plevel].cols/feature.dimHOG;
389                         for (int p = 0; p < model.numParts[comp]; p++)
390                         {
391                             int pId = model.pFind[comp][p];
392                             int probx = 2*rx + (int)model.anchors[pId][0];
393                             int proby = 2*ry + (int)model.anchors[pId][1];
394                             int offset = dtLevelOffset[plevel] +
395                                 pId*featDimsProd[plevel] +
396                                 (proby - pady)*featWidth +
397                                 probx - padx;
398                             int px = dtArgmaxX[offset] + padx;
399                             int py = dtArgmaxY[offset] + pady;
400                             x1 = (px - 2*padx)*scale;
401                             y1 = (py - 2*pady)*scale;
402                             x2 = x1 + model.partFilterDims[p].width*scale - 1;
403                             y2 = y1 + model.partFilterDims[p].height*scale - 1;
404                             coords.push_back(x1);
405                             coords.push_back(y1);
406                             coords.push_back(x2);
407                             coords.push_back(y2);
408                         }
409 
410                         // record component number and score
411                         coords.push_back(comp + 1);
412                         coords.push_back(score);
413 
414                         dets.push_back(coords);
415                     }
416                 } // ry
417             } // rx
418         } // for each pyramid level
419     } // for each component
420 }
421 
computePartScore(int plevel,int pId,int px,int py,bool isPCA,double defThreshold)422 double DPMCascade::computePartScore(int plevel, int pId, int px, int py, bool isPCA, double defThreshold)
423 {
424     // remove virtual padding
425     PyramidParameter params = feature.getPyramidParameters();
426     px -= params.padx;
427     py -= params.pady;
428 
429     // check if already computed
430     int levelOffset = dtLevelOffset[plevel];
431     int locationOffset = pId*featDimsProd[plevel]
432         + py*pyramid[plevel].cols/feature.dimHOG
433         + px;
434     int dtBaseOffset = levelOffset + locationOffset;
435 
436     double val;
437 
438     if (isPCA)
439         val = pcaDtValues[dtBaseOffset];
440     else
441         val = dtValues[dtBaseOffset];
442 
443     if (val > -numeric_limits<double>::infinity())
444         return val;
445 
446     // Nope, define the bounds of the convolution and
447     // distance transform region
448     int xstart = px - halfWindowSize;
449     xstart = (xstart < 0 ? 0 : xstart);
450     int xend = px + halfWindowSize;
451 
452     int ystart = py - halfWindowSize;
453     ystart = (ystart < 0 ? 0 : ystart);
454     int yend = py + halfWindowSize;
455 
456     int featWidth = pyramid[plevel].cols/feature.dimHOG;
457     int featHeight = pyramid[plevel].rows;
458     int filterWidth = model.partFilters[pId].cols/feature.dimHOG;
459     int filterHeight = model.partFilters[pId].rows;
460 
461     xend = (filterWidth + xend > featWidth)
462         ? featWidth - filterWidth
463         : xend;
464     yend = (filterHeight + yend > featHeight)
465         ? featHeight - filterHeight
466         : yend;
467 
468     // do convolution and distance transform in region
469     // [xstar, xend, ystart, yend]
470     levelOffset = convLevelOffset[plevel];
471     locationOffset = pId*featDimsProd[plevel];
472     int convBaseOffset = levelOffset + locationOffset;
473 
474     for (int y = ystart; y <= yend; y++)
475     {
476         int loc = convBaseOffset + y*featWidth + xstart - 1;
477         for (int x = xstart; x <= xend; x++)
478         {
479             loc++;
480             // skip if already computed
481             if (isPCA)
482             {
483                 if (pcaConvValues[loc] > -numeric_limits<double>::infinity())
484                     continue;
485             }
486             else if(convValues[loc] > -numeric_limits<double>::infinity())
487                 continue;
488 
489             // check for deformation pruning
490             double defCost = defCostCacheX[pId][px - x + halfWindowSize]
491                 + defCostCacheY[pId][py - y + halfWindowSize];
492 
493             if (defCost < defThreshold)
494                 continue;
495 
496             if (isPCA)
497             {
498                 pcaConvValues[loc] = convolutionEngine.convolve
499                     (pcaPyramid[plevel], model.partPCAFilters[pId],
500                      model.pcaDim, x, y);
501             }
502             else
503             {
504                 convValues[loc] = convolutionEngine.convolve
505                     (pyramid[plevel], model.partFilters[pId],
506                      model.numFeatures, x, y);
507             }
508         } // y
509     } // x
510 
511     // do distance transform over the region.
512     // the region is small enought that brut force DT
513     // is the fastest method
514     double max = -numeric_limits<double>::infinity();
515     int xargmax = 0;
516     int yargmax = 0;
517 
518     for (int y = ystart; y <= yend; y++)
519     {
520         int loc = convBaseOffset + y*featWidth + xstart - 1;
521         for (int x = xstart; x <= xend; x++)
522         {
523             loc++;
524             double v;
525 
526             if (isPCA)
527                 v = pcaConvValues[loc];
528             else
529                 v = convValues[loc];
530 
531             v += defCostCacheX[pId][px - x + halfWindowSize]
532                 + defCostCacheY[pId][py - y + halfWindowSize];
533 
534             if (v > max)
535             {
536                 max = v;
537                 xargmax = x;
538                 yargmax = y;
539             } // if v
540         } // for x
541     } // for y
542 
543     // record max and argmax for DT
544     if (isPCA)
545     {
546         pcaDtArgmaxX[dtBaseOffset] = xargmax;
547         pcaDtArgmaxY[dtBaseOffset] = yargmax;
548         pcaDtValues[dtBaseOffset] = max;
549     }
550     else
551     {
552         dtArgmaxX[dtBaseOffset] = xargmax;
553         dtArgmaxY[dtBaseOffset] = yargmax;
554         dtValues[dtBaseOffset] = max;
555     }
556 
557     return max;
558 }
559 
560 } // namespace dpm
561 } // namespace cv
562