1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2015, Itseez Inc, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 // * Redistribution's of source code must retain the above copyright notice,
20 // this list of conditions and the following disclaimer.
21 //
22 // * Redistribution's in binary form must reproduce the above copyright notice,
23 // this list of conditions and the following disclaimer in the documentation
24 // and/or other materials provided with the distribution.
25 //
26 // * The name of the copyright holders may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Itseez Inc or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41
42 #include "dpm_cascade.hpp"
43 #include "dpm_nms.hpp"
44
45 #include <limits>
46 #include <fstream>
47 #include <iostream>
48 #include <stdio.h>
49
50 using namespace std;
51
52 namespace cv
53 {
54 namespace dpm
55 {
56
loadCascadeModel(const string & modelPath)57 void DPMCascade::loadCascadeModel(const string &modelPath)
58 {
59 // load cascade model from xml
60 bool is_success = model.deserialize(modelPath);
61
62 if (!is_success)
63 {
64 string errorMessage = format("Unable to parse the model: %s", modelPath.c_str());
65 CV_Error(CV_StsBadArg, errorMessage);
66 }
67
68 model.initModel();
69 }
70
initDPMCascade()71 void DPMCascade::initDPMCascade()
72 {
73 // compute the size of temporary storage needed by cascade
74 int nlevels = (int) pyramid.size();
75 int numPartFilters = model.getNumPartFilters();
76 int numDefParams = model.getNumDefParams();
77 featDimsProd.resize(nlevels);
78 tempStorageSize = 0;
79
80 for (int i = 0; i < nlevels; i++)
81 {
82 int w = pyramid[i].cols/feature.dimHOG;
83 int h = pyramid[i].rows;
84 featDimsProd[i] = w*h;
85 tempStorageSize += w*h;
86 }
87
88 tempStorageSize *= numPartFilters;
89
90 convValues.resize(tempStorageSize);
91 pcaConvValues.resize(tempStorageSize);
92 dtValues.resize(tempStorageSize);
93 pcaDtValues.resize(tempStorageSize);
94
95 fill(convValues.begin(), convValues.end(), -numeric_limits<double>::infinity());
96 fill(pcaConvValues.begin(), pcaConvValues.end(), -numeric_limits<double>::infinity());
97 fill(dtValues.begin(), dtValues.end(), -numeric_limits<double>::infinity());
98 fill(pcaDtValues.begin(), pcaDtValues.end(), -numeric_limits<double>::infinity());
99
100 // each pyramid (convolution and distance transform) is stored
101 // in a 1D array. Since pyramid levels have different sizes,
102 // we build an array of offset values in order to index by
103 // level. The last offset is the total length of the pyramid
104 // storage array.
105 convLevelOffset.resize(nlevels + 1);
106 dtLevelOffset.resize(nlevels + 1);
107 convLevelOffset[0] = 0;
108 dtLevelOffset[0] = 0;
109
110 for (int i = 1; i < nlevels + 1; i++)
111 {
112 convLevelOffset[i] = convLevelOffset[i-1] + numPartFilters*featDimsProd[i-1];
113 dtLevelOffset[i] = dtLevelOffset[i-1] + numDefParams*featDimsProd[i-1];
114 }
115
116 // cache of precomputed deformation costs
117 defCostCacheX.resize(numDefParams);
118 defCostCacheY.resize(numDefParams);
119
120 for (int i = 0; i < numDefParams; i++)
121 {
122 vector< double > def = model.defs[i];
123 CV_Assert((int) def.size() >= 4);
124
125 defCostCacheX[i].resize(2*halfWindowSize + 1);
126 defCostCacheY[i].resize(2*halfWindowSize + 1);
127
128 for (int j = 0; j < 2*halfWindowSize + 1; j++)
129 {
130 int delta = j - halfWindowSize;
131 int deltaSquare = delta*delta;
132 defCostCacheX[i][j] = -def[0]*deltaSquare - def[1]*delta;
133 defCostCacheY[i][j] = -def[2]*deltaSquare - def[3]*delta;
134 }
135 }
136
137 dtArgmaxX.resize(dtLevelOffset[nlevels]);
138 pcaDtArgmaxX.resize(dtLevelOffset[nlevels]);
139 dtArgmaxY.resize(dtLevelOffset[nlevels]);
140 pcaDtArgmaxY.resize(dtLevelOffset[nlevels]);
141 }
142
detect(Mat & image)143 vector< vector<double> > DPMCascade::detect(Mat &image)
144 {
145 if (image.channels() == 1)
146 cvtColor(image, image, COLOR_GRAY2BGR);
147
148 if (image.depth() != CV_64F)
149 image.convertTo(image, CV_64FC3);
150
151 // compute features
152 computeFeatures(image);
153
154 // pre-allocate storage
155 initDPMCascade();
156
157 // cascade process
158 vector< vector<double> > detections;
159 process(detections);
160
161 // non-maximum suppression
162 NonMaximumSuppression nms;
163 nms.process(detections, 0.5);
164
165 return detections;
166 }
167
computeFeatures(const Mat & im)168 void DPMCascade::computeFeatures(const Mat &im)
169 {
170 // initialize feature pyramid
171 PyramidParameter params;
172 params.padx = model.maxSizeX;
173 params.pady = model.maxSizeY;
174 params.interval = model.interval;
175 params.binSize = model.sBin;
176
177 feature = Feature(params);
178
179 // compute pyramid
180 feature.computeFeaturePyramid(im, pyramid);
181
182 // compute projected pyramid
183 feature.projectFeaturePyramid(model.pcaCoeff, pyramid, pcaPyramid);
184 }
185
computeLocationScores(vector<vector<double>> & locationScores)186 void DPMCascade::computeLocationScores(vector< vector< double > > &locationScores)
187 {
188 vector< vector < double > > locationWeight = model.locationWeight;
189 CV_Assert((int)locationWeight.size() == model.numComponents);
190
191 Mat locationFeature;
192 int nlevels = (int) pyramid.size();
193 feature.computeLocationFeatures(nlevels, locationFeature);
194
195 locationScores.resize(model.numComponents);
196
197 for (int comp = 0; comp < model.numComponents; comp++)
198 {
199 locationScores[comp].resize(locationFeature.cols);
200
201 for (int level = 0; level < locationFeature.cols; level++)
202 {
203 double val = 0;
204 for (int k = 0; k < locationFeature.rows; k++)
205 val += locationWeight[comp][k]*
206 locationFeature.at<double>(k, level);
207
208 locationScores[comp][level] = val;
209 }
210 }
211 }
212
computeRootPCAScores(vector<vector<Mat>> & rootScores)213 void DPMCascade::computeRootPCAScores(vector< vector< Mat > > &rootScores)
214 {
215 PyramidParameter params = feature.getPyramidParameters();
216 rootScores.resize(model.numComponents);
217 int nlevels = (int) pyramid.size();
218 int interval = params.interval;
219
220 for (int comp = 0; comp < model.numComponents; comp++)
221 {
222 rootScores[comp].resize(nlevels);
223 ParalComputeRootPCAScores paralTask(pcaPyramid, model.rootPCAFilters[comp],
224 model.pcaDim, rootScores[comp]);
225 parallel_for_(Range(interval, nlevels), paralTask);
226 }
227 }
228
ParalComputeRootPCAScores(const vector<Mat> & pcaPyrad,const Mat & f,int dim,vector<Mat> & sc)229 ParalComputeRootPCAScores::ParalComputeRootPCAScores(
230 const vector< Mat > &pcaPyrad,
231 const Mat &f,
232 int dim,
233 vector< Mat > &sc):
234 pcaPyramid(pcaPyrad),
235 filter(f),
236 pcaDim(dim),
237 scores(sc)
238 {
239 }
240
operator ()(const Range & range) const241 void ParalComputeRootPCAScores::operator() (const Range &range) const
242 {
243 for (int level = range.start; level != range.end; level++)
244 {
245 Mat feat = pcaPyramid[level];
246
247 // compute size of output
248 int height = feat.rows - filter.rows + 1;
249 int width = (feat.cols - filter.cols) / pcaDim + 1;
250
251 Mat result = Mat::zeros(Size(width, height), CV_64F);
252 // convolution engine
253 ConvolutionEngine convEngine;
254 convEngine.convolve(feat, filter, pcaDim, result);
255 scores[level] = result;
256 }
257 }
258
process(vector<vector<double>> & dets)259 void DPMCascade::process( vector< vector<double> > &dets)
260 {
261 PyramidParameter params = feature.getPyramidParameters();
262 int interval = params.interval;
263 int padx = params.padx;
264 int pady = params.pady;
265 vector<double> scales = params.scales;
266
267 int nlevels = (int)pyramid.size() - interval;
268 CV_Assert(nlevels > 0);
269
270 // keep track of the PCA scores for each PCA filter
271 vector< vector< double > > pcaScore(model.numComponents);
272 for (int comp = 0; comp < model.numComponents; comp++)
273 pcaScore[comp].resize(model.numParts[comp]+1);
274
275 // compute location scores
276 vector< vector< double > > locationScores;
277 computeLocationScores(locationScores);
278
279 // compute root PCA scores
280 vector< vector< Mat > > rootPCAScores;
281 computeRootPCAScores(rootPCAScores);
282
283 // process each model component and pyramid level
284 for (int comp = 0; comp < model.numComponents; comp++)
285 {
286 for (int plevel = 0; plevel < nlevels; plevel++)
287 {
288 // root filter pyramid level
289 int rlevel = plevel + interval;
290 double bias = model.bias[comp] + locationScores[comp][rlevel];
291 // get the scores of the first PCA filter
292 Mat rtscore = rootPCAScores[comp][rlevel];
293 // process each location in the current pyramid level
294 for (int rx = (int)ceil(padx/2.0); rx < rtscore.cols - (int)ceil(padx/2.0); rx++)
295 {
296 for (int ry = (int)ceil(pady/2.0); ry < rtscore.rows - (int)ceil(pady/2.0); ry++)
297 {
298 // get stage 0 score
299 double score = rtscore.at<double>(ry, rx) + bias;
300 // record PCA score
301 pcaScore[comp][0] = score - bias;
302 // cascade stage 1 through 2*numparts + 2
303 int stage = 1;
304 int numstages = 2*model.numParts[comp] + 2;
305 for(; stage < numstages; stage++)
306 {
307 double t = model.prunThreshold[comp][2*stage-1];
308 // check for hypothesis pruning
309 if (score < t)
310 break;
311
312 // pca == 1 if place filters
313 // pca == 0 if place non-pca filters
314 bool isPCA = (stage < model.numParts[comp] + 1 ? true : false);
315 // get the part index
316 // root parts have index -1, none-root part are indexed 0:numParts-1
317 int part = model.partOrder[comp][stage] - 1;// partOrder
318
319 if (part == -1)
320 {
321 // calculate the root non-pca score
322 // and replace the PCA score
323 double rscore = 0.0;
324 if (isPCA)
325 {
326 rscore = convolutionEngine.convolve(pcaPyramid[rlevel],
327 model.rootPCAFilters[comp],
328 model.pcaDim, rx, ry);
329 }
330 else
331 {
332 rscore = convolutionEngine.convolve(pyramid[rlevel],
333 model.rootFilters[comp],
334 model.numFeatures, rx, ry);
335 }
336 score += rscore - pcaScore[comp][0];
337 }
338 else
339 {
340 // place a non-root filter
341 int pId = model.pFind[comp][part];
342 int px = 2*rx + (int)model.anchors[pId][0];
343 int py = 2*ry + (int)model.anchors[pId][1];
344
345 // look up the filter and deformation model
346 double defThreshold =
347 model.prunThreshold[comp][2*stage] - score;
348
349 double ps = computePartScore(plevel, pId, px, py,
350 isPCA, defThreshold);
351
352 if (isPCA)
353 {
354 // record PCA filter score
355 pcaScore[comp][part+1] = ps;
356 // update the hypothesis score
357 score += ps;
358 }
359 else
360 {
361 // update the hypothesis score by replacing
362 // the PCA score
363 score += ps - pcaScore[comp][part+1];
364 } // isPCA == false
365 } // part != -1
366
367 } // stages
368
369 // check if the hypothesis passed all stages with a
370 // final score over the global threshold
371 if (stage == numstages && score >= model.scoreThresh)
372 {
373 vector<double> coords;
374 // compute and record image coordinates of the detection window
375 double scale = model.sBin/scales[rlevel];
376 double x1 = (rx-padx)*scale;
377 double y1 = (ry-pady)*scale;
378 double x2 = x1 + model.rootFilterDims[comp].width*scale - 1;
379 double y2 = y1 + model.rootFilterDims[comp].height*scale - 1;
380
381 coords.push_back(x1);
382 coords.push_back(y1);
383 coords.push_back(x2);
384 coords.push_back(y2);
385
386 // compute and record image coordinates of the part filters
387 scale = model.sBin/scales[plevel];
388 int featWidth = pyramid[plevel].cols/feature.dimHOG;
389 for (int p = 0; p < model.numParts[comp]; p++)
390 {
391 int pId = model.pFind[comp][p];
392 int probx = 2*rx + (int)model.anchors[pId][0];
393 int proby = 2*ry + (int)model.anchors[pId][1];
394 int offset = dtLevelOffset[plevel] +
395 pId*featDimsProd[plevel] +
396 (proby - pady)*featWidth +
397 probx - padx;
398 int px = dtArgmaxX[offset] + padx;
399 int py = dtArgmaxY[offset] + pady;
400 x1 = (px - 2*padx)*scale;
401 y1 = (py - 2*pady)*scale;
402 x2 = x1 + model.partFilterDims[p].width*scale - 1;
403 y2 = y1 + model.partFilterDims[p].height*scale - 1;
404 coords.push_back(x1);
405 coords.push_back(y1);
406 coords.push_back(x2);
407 coords.push_back(y2);
408 }
409
410 // record component number and score
411 coords.push_back(comp + 1);
412 coords.push_back(score);
413
414 dets.push_back(coords);
415 }
416 } // ry
417 } // rx
418 } // for each pyramid level
419 } // for each component
420 }
421
computePartScore(int plevel,int pId,int px,int py,bool isPCA,double defThreshold)422 double DPMCascade::computePartScore(int plevel, int pId, int px, int py, bool isPCA, double defThreshold)
423 {
424 // remove virtual padding
425 PyramidParameter params = feature.getPyramidParameters();
426 px -= params.padx;
427 py -= params.pady;
428
429 // check if already computed
430 int levelOffset = dtLevelOffset[plevel];
431 int locationOffset = pId*featDimsProd[plevel]
432 + py*pyramid[plevel].cols/feature.dimHOG
433 + px;
434 int dtBaseOffset = levelOffset + locationOffset;
435
436 double val;
437
438 if (isPCA)
439 val = pcaDtValues[dtBaseOffset];
440 else
441 val = dtValues[dtBaseOffset];
442
443 if (val > -numeric_limits<double>::infinity())
444 return val;
445
446 // Nope, define the bounds of the convolution and
447 // distance transform region
448 int xstart = px - halfWindowSize;
449 xstart = (xstart < 0 ? 0 : xstart);
450 int xend = px + halfWindowSize;
451
452 int ystart = py - halfWindowSize;
453 ystart = (ystart < 0 ? 0 : ystart);
454 int yend = py + halfWindowSize;
455
456 int featWidth = pyramid[plevel].cols/feature.dimHOG;
457 int featHeight = pyramid[plevel].rows;
458 int filterWidth = model.partFilters[pId].cols/feature.dimHOG;
459 int filterHeight = model.partFilters[pId].rows;
460
461 xend = (filterWidth + xend > featWidth)
462 ? featWidth - filterWidth
463 : xend;
464 yend = (filterHeight + yend > featHeight)
465 ? featHeight - filterHeight
466 : yend;
467
468 // do convolution and distance transform in region
469 // [xstar, xend, ystart, yend]
470 levelOffset = convLevelOffset[plevel];
471 locationOffset = pId*featDimsProd[plevel];
472 int convBaseOffset = levelOffset + locationOffset;
473
474 for (int y = ystart; y <= yend; y++)
475 {
476 int loc = convBaseOffset + y*featWidth + xstart - 1;
477 for (int x = xstart; x <= xend; x++)
478 {
479 loc++;
480 // skip if already computed
481 if (isPCA)
482 {
483 if (pcaConvValues[loc] > -numeric_limits<double>::infinity())
484 continue;
485 }
486 else if(convValues[loc] > -numeric_limits<double>::infinity())
487 continue;
488
489 // check for deformation pruning
490 double defCost = defCostCacheX[pId][px - x + halfWindowSize]
491 + defCostCacheY[pId][py - y + halfWindowSize];
492
493 if (defCost < defThreshold)
494 continue;
495
496 if (isPCA)
497 {
498 pcaConvValues[loc] = convolutionEngine.convolve
499 (pcaPyramid[plevel], model.partPCAFilters[pId],
500 model.pcaDim, x, y);
501 }
502 else
503 {
504 convValues[loc] = convolutionEngine.convolve
505 (pyramid[plevel], model.partFilters[pId],
506 model.numFeatures, x, y);
507 }
508 } // y
509 } // x
510
511 // do distance transform over the region.
512 // the region is small enought that brut force DT
513 // is the fastest method
514 double max = -numeric_limits<double>::infinity();
515 int xargmax = 0;
516 int yargmax = 0;
517
518 for (int y = ystart; y <= yend; y++)
519 {
520 int loc = convBaseOffset + y*featWidth + xstart - 1;
521 for (int x = xstart; x <= xend; x++)
522 {
523 loc++;
524 double v;
525
526 if (isPCA)
527 v = pcaConvValues[loc];
528 else
529 v = convValues[loc];
530
531 v += defCostCacheX[pId][px - x + halfWindowSize]
532 + defCostCacheY[pId][py - y + halfWindowSize];
533
534 if (v > max)
535 {
536 max = v;
537 xargmax = x;
538 yargmax = y;
539 } // if v
540 } // for x
541 } // for y
542
543 // record max and argmax for DT
544 if (isPCA)
545 {
546 pcaDtArgmaxX[dtBaseOffset] = xargmax;
547 pcaDtArgmaxY[dtBaseOffset] = yargmax;
548 pcaDtValues[dtBaseOffset] = max;
549 }
550 else
551 {
552 dtArgmaxX[dtBaseOffset] = xargmax;
553 dtArgmaxY[dtBaseOffset] = yargmax;
554 dtValues[dtBaseOffset] = max;
555 }
556
557 return max;
558 }
559
560 } // namespace dpm
561 } // namespace cv
562