1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 #include "precomp.hpp"
5 
6 #include "opencv2/core_detect.hpp"
7 
8 
9 namespace cv
10 {
11   namespace dnn_objdetect
12   {
InferBbox(Mat _delta_bbox,Mat _class_scores,Mat _conf_scores)13     InferBbox::InferBbox(Mat _delta_bbox, Mat _class_scores, Mat _conf_scores)
14     {
15       this->delta_bbox = _delta_bbox;
16       this->class_scores = _class_scores;
17       this->conf_scores = _conf_scores;
18 
19       image_width = 416;
20       image_height = 416;
21 
22       W = 23;
23       H = 23;
24       num_classes = 20;
25       anchors_per_grid = 9;
26       anchors = W * H * anchors_per_grid;
27 
28       intersection_thresh = 0.65;
29       nms_intersection_thresh = 0.1;
30       n_top_detections = 64;
31       epsilon = 1e-7;
32 
33       anchors_values.resize(anchors);
34       for (size_t i = 0; i < anchors; ++i)
35       {
36         anchors_values[i].resize(4);
37       }
38 
39       // Anchor shapes predicted from kmeans clustering
40       double arr[9][2] = {{377, 371}, {64, 118}, {129, 326},
41                           {172, 126}, {34, 46}, {353, 204},
42                           {89, 214}, {249, 361}, {209, 239}};
43       for (size_t i = 0; i < anchors_per_grid; ++i)
44       {
45         anchor_shapes.push_back(std::make_pair(arr[i][1], arr[i][0]));
46       }
47       // Generate the anchor centers
48       for (size_t x = 1; x < W + 1; ++x) {
49         double c_x = (x * static_cast<double>(image_width)) / (W+1.0);
50         for (size_t y = 1; y < H + 1; ++y) {
51           double c_y = (y * static_cast<double>(image_height)) / (H+1.0);
52           anchor_center.push_back(std::make_pair(c_x, c_y));
53         }
54       }
55 
56       // Generate the final anchor values
57       for (size_t i = 0, anchor = 0, j = 0; anchor < anchors; ++anchor)
58       {
59         anchors_values[anchor][0] = anchor_center.at(i).first;
60         anchors_values[anchor][1] = anchor_center.at(i).second;
61         anchors_values[anchor][2] = anchor_shapes.at(j).first;
62         anchors_values[anchor][3] = anchor_shapes.at(j).second;
63         if ((anchor+1) % anchors_per_grid == 0)
64         {
65           i += 1;
66           j = 0;
67         }
68         else
69         {
70           ++j;
71         }
72       }
73 
74       // Map the class index to the corresponding labels
75       std::string arrs[20] = {"aeroplane", "bicycle", "bird", "boat",
76                            "bottle", "bus", "car", "cat", "chair",
77                            "cow", "diningtable", "dog", "horse",
78                            "motorbike", "person", "pottedplant",
79                            "sheep", "sofa", "train", "tvmonitor"};
80       for (size_t idx = 0; idx < num_classes; ++idx)
81       {
82         label_map.push_back(arrs[idx]);
83       }
84     }
85 
filter(double thresh)86     void InferBbox::filter(double thresh)
87     {
88       this->intersection_thresh = thresh;
89       // Some containers
90       std::vector<std::vector<double> > transformed_bbox_preds(this->anchors);
91       std::vector<std::vector<double> > min_max_bboxes(this->anchors);
92       std::vector<std::vector<double> > final_probs(this->anchors);
93 
94       for (size_t i = 0; i < this->anchors; ++i)
95       {
96         transformed_bbox_preds[i].resize(4);
97         final_probs[i].resize(num_classes);
98         min_max_bboxes[i].resize(4);
99       }
100 
101       // Transform relative coordinates from ConvDet to bounding box coordinates
102       transform_bboxes(&transformed_bbox_preds);
103 
104       // Do the inverse transformation of the predicted bboxes
105       transform_bboxes_inv(&transformed_bbox_preds, &min_max_bboxes);
106 
107       // Ensure that the predicted bounding boxes are well within the image
108       // dimensions
109       assert_predictions(&min_max_bboxes);
110 
111       // Compute the final probability values
112       final_probability_dist(&final_probs);
113 
114       // Filter the classes of n_top_detections
115       std::vector<std::vector<double> > top_n_boxes(n_top_detections);
116       std::vector<size_t> top_n_idxs(n_top_detections);
117       std::vector<double> top_n_probs(n_top_detections);
118       for (size_t i = 0; i < n_top_detections; ++i)
119       {
120         top_n_boxes[i].resize(4);
121       }
122 
123       filter_top_n(&final_probs, &min_max_bboxes, top_n_boxes,
124         top_n_idxs, top_n_probs);
125 
126       // Apply Non-Maximal-Supression to the n_top_detections
127       nms_wrapper(top_n_boxes, top_n_idxs, top_n_probs);
128 
129     }
130 
transform_bboxes(std::vector<std::vector<double>> * bboxes)131     void InferBbox::transform_bboxes(std::vector<std::vector<double> > *bboxes)
132     {
133       for (unsigned int h = 0; h < H; ++h)
134       {
135         for (unsigned int w = 0; w < W; ++w)
136         {
137           for (unsigned int anchor = 0; anchor < anchors_per_grid; ++anchor)
138           {
139             const int anchor_idx = (h * W + w) * anchors_per_grid + anchor;
140             double delta_x = this->delta_bbox.at<float>(h, w, anchor * 4 + 0);
141             double delta_y = this->delta_bbox.at<float>(h, w, anchor * 4 + 1);
142             double delta_h = this->delta_bbox.at<float>(h, w, anchor * 4 + 2);
143             double delta_w = this->delta_bbox.at<float>(h, w, anchor * 4 + 3);
144 
145             (*bboxes)[anchor_idx][0] = this->anchors_values[anchor_idx][0] +
146                           this->anchors_values[anchor_idx][3] * delta_x;
147             (*bboxes)[anchor_idx][1] = this->anchors_values[anchor_idx][1] +
148                           this->anchors_values[anchor_idx][2] * delta_y;;
149             (*bboxes)[anchor_idx][2] =
150                           this->anchors_values[anchor_idx][2] * exp(delta_h);
151             (*bboxes)[anchor_idx][3] =
152                           this->anchors_values[anchor_idx][3] * exp(delta_w);
153           }
154         }
155       }
156     }
157 
final_probability_dist(std::vector<std::vector<double>> * final_probs)158     void InferBbox::final_probability_dist(
159         std::vector<std::vector<double> > *final_probs)
160     {
161       for (unsigned int h = 0; h < H; ++h)
162       {
163         for (unsigned int w = 0; w < W; ++w)
164         {
165           for (unsigned int ch = 0; ch < anchors_per_grid * num_classes; ++ch)
166           {
167             const int anchor_idx =
168               (h * W + w) * anchors_per_grid + ch / num_classes;
169             double pr_object =
170               conf_scores.at<float>(h, w, ch / num_classes);
171             double pr_class_idx =
172               class_scores.at<float>(anchor_idx, ch % num_classes);
173             (*final_probs)[anchor_idx][ch % num_classes] =
174               pr_object * pr_class_idx;
175           }
176         }
177       }
178     }
179 
transform_bboxes_inv(std::vector<std::vector<double>> * pre,std::vector<std::vector<double>> * post)180     void InferBbox::transform_bboxes_inv(
181       std::vector<std::vector<double> > *pre,
182       std::vector<std::vector<double> > *post)
183     {
184       for (size_t anchor = 0; anchor < anchors; ++anchor)
185       {
186         double c_x = (*pre)[anchor][0];
187         double c_y = (*pre)[anchor][1];
188         double b_h = (*pre)[anchor][2];
189         double b_w = (*pre)[anchor][3];
190 
191         (*post)[anchor][0] = c_x - b_w / 2.0;
192         (*post)[anchor][1] = c_y - b_h / 2.0;
193         (*post)[anchor][2] = c_x + b_w / 2.0;
194         (*post)[anchor][3] = c_y + b_h / 2.0;
195       }
196     }
197 
assert_predictions(std::vector<std::vector<double>> * min_max_boxes)198     void InferBbox::assert_predictions(std::vector<std::vector<double> >
199          *min_max_boxes)
200     {
201       for (size_t anchor = 0; anchor < anchors; ++anchor)
202       {
203         double p_xmin = (*min_max_boxes)[anchor][0];
204         double p_ymin = (*min_max_boxes)[anchor][1];
205         double p_xmax = (*min_max_boxes)[anchor][2];
206         double p_ymax = (*min_max_boxes)[anchor][3];
207 
208         (*min_max_boxes)[anchor][0] = std::min(std::max(
209           static_cast<double>(0.0), p_xmin), image_width -
210           static_cast<double>(1.0));
211         (*min_max_boxes)[anchor][1] = std::min(std::max(
212           static_cast<double>(0.0), p_ymin), image_height -
213           static_cast<double>(1.0));
214         (*min_max_boxes)[anchor][2] = std::max(std::min(
215           image_width - static_cast<double>(1.0), p_xmax),
216           static_cast<double>(0.0));
217         (*min_max_boxes)[anchor][3] = std::max(std::min(
218           image_height - static_cast<double>(1.0), p_ymax),
219           static_cast<double>(0.0));
220       }
221     }
222 
filter_top_n(std::vector<std::vector<double>> * probs,std::vector<std::vector<double>> * boxes,std::vector<std::vector<double>> & top_n_boxes,std::vector<size_t> & top_n_idxs,std::vector<double> & top_n_probs)223     void InferBbox::filter_top_n(std::vector<std::vector<double> >
224       *probs, std::vector<std::vector<double> > *boxes,
225       std::vector<std::vector<double> > &top_n_boxes,
226       std::vector<size_t> &top_n_idxs,
227       std::vector<double> &top_n_probs)
228     {
229       std::vector<double> max_class_probs((*probs).size());
230       std::vector<size_t> args((*probs).size());
231 
232       for (unsigned int box = 0; box < (*boxes).size(); ++box)
233       {
234         size_t _prob_idx =
235             std::max_element((*probs)[box].begin(),
236             (*probs)[box].end()) - (*probs)[box].begin();
237         max_class_probs[box] = (*probs)[box][_prob_idx];
238       }
239 
240       std::vector<std::pair<double, size_t> > temp_sort(max_class_probs.size());
241       for (size_t tidx = 0; tidx < max_class_probs.size(); ++tidx)
242       {
243         temp_sort[tidx] = std::make_pair(max_class_probs[tidx],
244           static_cast<size_t>(tidx));
245       }
246       std::sort(temp_sort.begin(), temp_sort.end(), InferBbox::comparator);
247 
248       for (size_t idx = 0; idx < temp_sort.size(); ++idx)
249       {
250         args[idx] = temp_sort[idx].second;
251       }
252 
253       // Get n_top_detections
254       std::vector<size_t> top_n_order(args.begin(),
255         args.begin() + n_top_detections);
256 
257       // Have a separate copy of all the n_top_detections
258       for (size_t n = 0; n < n_top_detections; ++n)
259       {
260         top_n_probs[n] = max_class_probs[top_n_order[n]];
261         top_n_idxs[n]  =
262             std::max_element((*probs)[top_n_order[n]].begin(),
263             (*probs)[top_n_order[n]].end()) -
264             (*probs)[top_n_order[n]].begin();
265         for (size_t i = 0; i < 4; ++i)
266         {
267           top_n_boxes[n][i] = (*boxes)[top_n_order[n]][i];
268         }
269       }
270     }
271 
nms_wrapper(std::vector<std::vector<double>> & top_n_boxes,std::vector<size_t> & top_n_idxs,std::vector<double> & top_n_probs)272     void InferBbox::nms_wrapper(std::vector<std::vector<double> >
273       &top_n_boxes, std::vector<size_t> &top_n_idxs,
274       std::vector<double> &top_n_probs)
275     {
276       for (size_t c = 0; c < this->num_classes; ++c)
277       {
278         std::vector<size_t> idxs_per_class;
279         for (size_t n = 0; n < n_top_detections; ++n)
280         {
281           if (top_n_idxs[n] == c)
282           {
283             idxs_per_class.push_back(n);
284           }
285         }
286 
287         // Just continue in case there are no objects of this class
288         if (idxs_per_class.size() == 0)
289         {
290           continue;
291         }
292 
293         // Process per class detections
294         std::vector<std::vector<double> > boxes_per_class(idxs_per_class.size());
295         std::vector<double> probs_per_class(idxs_per_class.size());
296         std::vector<bool> keep_per_class;
297         for (std::vector<size_t>::iterator itr = idxs_per_class.begin();
298             itr != idxs_per_class.end(); ++itr)
299         {
300           size_t idx = itr - idxs_per_class.begin();
301           probs_per_class[idx] = top_n_probs[*itr];
302           for (size_t b = 0; b < 4; ++b)
303           {
304             boxes_per_class[idx].push_back(top_n_boxes[*itr][b]);
305           }
306         }
307         keep_per_class =
308             non_maximal_suppression(&boxes_per_class, &probs_per_class);
309         for (std::vector<bool>::iterator itr = keep_per_class.begin();
310             itr != keep_per_class.end(); ++itr)
311         {
312           size_t idx = itr - keep_per_class.begin();
313           if (*itr && probs_per_class[idx] > this->intersection_thresh)
314           {
315             dnn_objdetect::object new_detection;
316 
317             new_detection.class_idx = c;
318             new_detection.label_name = this->label_map[c];
319             new_detection.xmin = (int)boxes_per_class[idx][0];
320             new_detection.ymin = (int)boxes_per_class[idx][1];
321             new_detection.xmax = (int)boxes_per_class[idx][2];
322             new_detection.ymax = (int)boxes_per_class[idx][3];
323             new_detection.class_prob = probs_per_class[idx];
324 
325             this->detections.push_back(new_detection);
326           }
327         }
328       }
329     }
330 
non_maximal_suppression(std::vector<std::vector<double>> * boxes,std::vector<double> * probs)331     std::vector<bool> InferBbox::non_maximal_suppression(
332       std::vector<std::vector<double> > *boxes, std::vector<double>
333       *probs)
334     {
335       std::vector<bool> keep(((*probs).size()));
336       std::fill(keep.begin(), keep.end(), true);
337       std::vector<size_t> prob_args_sorted((*probs).size());
338 
339       std::vector<std::pair<double, size_t> > temp_sort((*probs).size());
340       for (size_t tidx = 0; tidx < (*probs).size(); ++tidx)
341       {
342         temp_sort[tidx] = std::make_pair((*probs)[tidx],
343           static_cast<size_t>(tidx));
344       }
345       std::sort(temp_sort.begin(), temp_sort.end(), InferBbox::comparator);
346 
347       for (size_t idx = 0; idx < temp_sort.size(); ++idx)
348       {
349         prob_args_sorted[idx] = temp_sort[idx].second;
350       }
351 
352       for (std::vector<size_t>::iterator itr = prob_args_sorted.begin();
353           itr != prob_args_sorted.end()-1; ++itr)
354       {
355         size_t idx = itr - prob_args_sorted.begin();
356         std::vector<double> iou_(prob_args_sorted.size() - idx - 1);
357         std::vector<std::vector<double> > temp_boxes(iou_.size());
358         for (size_t bb = 0; bb < temp_boxes.size(); ++bb)
359         {
360           std::vector<double> temp_box(4);
361           for (size_t b = 0; b < 4; ++b)
362           {
363             temp_box[b] = (*boxes)[prob_args_sorted[idx + bb + 1]][b];
364           }
365           temp_boxes[bb] = temp_box;
366         }
367         intersection_over_union(&temp_boxes,
368             &(*boxes)[prob_args_sorted[idx]], &iou_);
369         for (std::vector<double>::iterator _itr = iou_.begin();
370             _itr != iou_.end(); ++_itr)
371         {
372           size_t iou_idx = _itr - iou_.begin();
373           if (*_itr > nms_intersection_thresh)
374           {
375             keep[prob_args_sorted[idx+iou_idx+1]] = false;
376           }
377         }
378       }
379       return keep;
380     }
381 
intersection_over_union(std::vector<std::vector<double>> * boxes,std::vector<double> * base_box,std::vector<double> * iou)382     void InferBbox::intersection_over_union(std::vector<std::vector<double> >
383       *boxes, std::vector<double> *base_box, std::vector<double> *iou)
384     {
385       double g_xmin = (*base_box)[0];
386       double g_ymin = (*base_box)[1];
387       double g_xmax = (*base_box)[2];
388       double g_ymax = (*base_box)[3];
389       double base_box_w = g_xmax - g_xmin;
390       double base_box_h = g_ymax - g_ymin;
391       for (size_t b = 0; b < (*boxes).size(); ++b)
392       {
393         double xmin = std::max((*boxes)[b][0], g_xmin);
394         double ymin = std::max((*boxes)[b][1], g_ymin);
395         double xmax = std::min((*boxes)[b][2], g_xmax);
396         double ymax = std::min((*boxes)[b][3], g_ymax);
397 
398         // Intersection
399         double w = std::max(static_cast<double>(0.0), xmax - xmin);
400         double h = std::max(static_cast<double>(0.0), ymax - ymin);
401         // Union
402         double test_box_w = (*boxes)[b][2] - (*boxes)[b][0];
403         double test_box_h = (*boxes)[b][3] - (*boxes)[b][1];
404 
405         double inter_ = w * h;
406         double union_ = test_box_h * test_box_w + base_box_h * base_box_w - inter_;
407         (*iou)[b] = inter_ / (union_ + epsilon);
408       }
409     }
410 
411   }
412 
413 }
414