1 // This file is part of OpenCV project. 2 // It is subject to the license terms in the LICENSE file found in the top-level directory 3 // of this distribution and at http://opencv.org/license.html. 4 #include "precomp.hpp" 5 6 #include "opencv2/core_detect.hpp" 7 8 9 namespace cv 10 { 11 namespace dnn_objdetect 12 { InferBbox(Mat _delta_bbox,Mat _class_scores,Mat _conf_scores)13 InferBbox::InferBbox(Mat _delta_bbox, Mat _class_scores, Mat _conf_scores) 14 { 15 this->delta_bbox = _delta_bbox; 16 this->class_scores = _class_scores; 17 this->conf_scores = _conf_scores; 18 19 image_width = 416; 20 image_height = 416; 21 22 W = 23; 23 H = 23; 24 num_classes = 20; 25 anchors_per_grid = 9; 26 anchors = W * H * anchors_per_grid; 27 28 intersection_thresh = 0.65; 29 nms_intersection_thresh = 0.1; 30 n_top_detections = 64; 31 epsilon = 1e-7; 32 33 anchors_values.resize(anchors); 34 for (size_t i = 0; i < anchors; ++i) 35 { 36 anchors_values[i].resize(4); 37 } 38 39 // Anchor shapes predicted from kmeans clustering 40 double arr[9][2] = {{377, 371}, {64, 118}, {129, 326}, 41 {172, 126}, {34, 46}, {353, 204}, 42 {89, 214}, {249, 361}, {209, 239}}; 43 for (size_t i = 0; i < anchors_per_grid; ++i) 44 { 45 anchor_shapes.push_back(std::make_pair(arr[i][1], arr[i][0])); 46 } 47 // Generate the anchor centers 48 for (size_t x = 1; x < W + 1; ++x) { 49 double c_x = (x * static_cast<double>(image_width)) / (W+1.0); 50 for (size_t y = 1; y < H + 1; ++y) { 51 double c_y = (y * static_cast<double>(image_height)) / (H+1.0); 52 anchor_center.push_back(std::make_pair(c_x, c_y)); 53 } 54 } 55 56 // Generate the final anchor values 57 for (size_t i = 0, anchor = 0, j = 0; anchor < anchors; ++anchor) 58 { 59 anchors_values[anchor][0] = anchor_center.at(i).first; 60 anchors_values[anchor][1] = anchor_center.at(i).second; 61 anchors_values[anchor][2] = anchor_shapes.at(j).first; 62 anchors_values[anchor][3] = anchor_shapes.at(j).second; 63 if ((anchor+1) % anchors_per_grid == 0) 64 { 65 i += 1; 66 j = 0; 67 } 68 else 69 { 70 ++j; 71 } 72 } 73 74 // Map the class index to the corresponding labels 75 std::string arrs[20] = {"aeroplane", "bicycle", "bird", "boat", 76 "bottle", "bus", "car", "cat", "chair", 77 "cow", "diningtable", "dog", "horse", 78 "motorbike", "person", "pottedplant", 79 "sheep", "sofa", "train", "tvmonitor"}; 80 for (size_t idx = 0; idx < num_classes; ++idx) 81 { 82 label_map.push_back(arrs[idx]); 83 } 84 } 85 filter(double thresh)86 void InferBbox::filter(double thresh) 87 { 88 this->intersection_thresh = thresh; 89 // Some containers 90 std::vector<std::vector<double> > transformed_bbox_preds(this->anchors); 91 std::vector<std::vector<double> > min_max_bboxes(this->anchors); 92 std::vector<std::vector<double> > final_probs(this->anchors); 93 94 for (size_t i = 0; i < this->anchors; ++i) 95 { 96 transformed_bbox_preds[i].resize(4); 97 final_probs[i].resize(num_classes); 98 min_max_bboxes[i].resize(4); 99 } 100 101 // Transform relative coordinates from ConvDet to bounding box coordinates 102 transform_bboxes(&transformed_bbox_preds); 103 104 // Do the inverse transformation of the predicted bboxes 105 transform_bboxes_inv(&transformed_bbox_preds, &min_max_bboxes); 106 107 // Ensure that the predicted bounding boxes are well within the image 108 // dimensions 109 assert_predictions(&min_max_bboxes); 110 111 // Compute the final probability values 112 final_probability_dist(&final_probs); 113 114 // Filter the classes of n_top_detections 115 std::vector<std::vector<double> > top_n_boxes(n_top_detections); 116 std::vector<size_t> top_n_idxs(n_top_detections); 117 std::vector<double> top_n_probs(n_top_detections); 118 for (size_t i = 0; i < n_top_detections; ++i) 119 { 120 top_n_boxes[i].resize(4); 121 } 122 123 filter_top_n(&final_probs, &min_max_bboxes, top_n_boxes, 124 top_n_idxs, top_n_probs); 125 126 // Apply Non-Maximal-Supression to the n_top_detections 127 nms_wrapper(top_n_boxes, top_n_idxs, top_n_probs); 128 129 } 130 transform_bboxes(std::vector<std::vector<double>> * bboxes)131 void InferBbox::transform_bboxes(std::vector<std::vector<double> > *bboxes) 132 { 133 for (unsigned int h = 0; h < H; ++h) 134 { 135 for (unsigned int w = 0; w < W; ++w) 136 { 137 for (unsigned int anchor = 0; anchor < anchors_per_grid; ++anchor) 138 { 139 const int anchor_idx = (h * W + w) * anchors_per_grid + anchor; 140 double delta_x = this->delta_bbox.at<float>(h, w, anchor * 4 + 0); 141 double delta_y = this->delta_bbox.at<float>(h, w, anchor * 4 + 1); 142 double delta_h = this->delta_bbox.at<float>(h, w, anchor * 4 + 2); 143 double delta_w = this->delta_bbox.at<float>(h, w, anchor * 4 + 3); 144 145 (*bboxes)[anchor_idx][0] = this->anchors_values[anchor_idx][0] + 146 this->anchors_values[anchor_idx][3] * delta_x; 147 (*bboxes)[anchor_idx][1] = this->anchors_values[anchor_idx][1] + 148 this->anchors_values[anchor_idx][2] * delta_y;; 149 (*bboxes)[anchor_idx][2] = 150 this->anchors_values[anchor_idx][2] * exp(delta_h); 151 (*bboxes)[anchor_idx][3] = 152 this->anchors_values[anchor_idx][3] * exp(delta_w); 153 } 154 } 155 } 156 } 157 final_probability_dist(std::vector<std::vector<double>> * final_probs)158 void InferBbox::final_probability_dist( 159 std::vector<std::vector<double> > *final_probs) 160 { 161 for (unsigned int h = 0; h < H; ++h) 162 { 163 for (unsigned int w = 0; w < W; ++w) 164 { 165 for (unsigned int ch = 0; ch < anchors_per_grid * num_classes; ++ch) 166 { 167 const int anchor_idx = 168 (h * W + w) * anchors_per_grid + ch / num_classes; 169 double pr_object = 170 conf_scores.at<float>(h, w, ch / num_classes); 171 double pr_class_idx = 172 class_scores.at<float>(anchor_idx, ch % num_classes); 173 (*final_probs)[anchor_idx][ch % num_classes] = 174 pr_object * pr_class_idx; 175 } 176 } 177 } 178 } 179 transform_bboxes_inv(std::vector<std::vector<double>> * pre,std::vector<std::vector<double>> * post)180 void InferBbox::transform_bboxes_inv( 181 std::vector<std::vector<double> > *pre, 182 std::vector<std::vector<double> > *post) 183 { 184 for (size_t anchor = 0; anchor < anchors; ++anchor) 185 { 186 double c_x = (*pre)[anchor][0]; 187 double c_y = (*pre)[anchor][1]; 188 double b_h = (*pre)[anchor][2]; 189 double b_w = (*pre)[anchor][3]; 190 191 (*post)[anchor][0] = c_x - b_w / 2.0; 192 (*post)[anchor][1] = c_y - b_h / 2.0; 193 (*post)[anchor][2] = c_x + b_w / 2.0; 194 (*post)[anchor][3] = c_y + b_h / 2.0; 195 } 196 } 197 assert_predictions(std::vector<std::vector<double>> * min_max_boxes)198 void InferBbox::assert_predictions(std::vector<std::vector<double> > 199 *min_max_boxes) 200 { 201 for (size_t anchor = 0; anchor < anchors; ++anchor) 202 { 203 double p_xmin = (*min_max_boxes)[anchor][0]; 204 double p_ymin = (*min_max_boxes)[anchor][1]; 205 double p_xmax = (*min_max_boxes)[anchor][2]; 206 double p_ymax = (*min_max_boxes)[anchor][3]; 207 208 (*min_max_boxes)[anchor][0] = std::min(std::max( 209 static_cast<double>(0.0), p_xmin), image_width - 210 static_cast<double>(1.0)); 211 (*min_max_boxes)[anchor][1] = std::min(std::max( 212 static_cast<double>(0.0), p_ymin), image_height - 213 static_cast<double>(1.0)); 214 (*min_max_boxes)[anchor][2] = std::max(std::min( 215 image_width - static_cast<double>(1.0), p_xmax), 216 static_cast<double>(0.0)); 217 (*min_max_boxes)[anchor][3] = std::max(std::min( 218 image_height - static_cast<double>(1.0), p_ymax), 219 static_cast<double>(0.0)); 220 } 221 } 222 filter_top_n(std::vector<std::vector<double>> * probs,std::vector<std::vector<double>> * boxes,std::vector<std::vector<double>> & top_n_boxes,std::vector<size_t> & top_n_idxs,std::vector<double> & top_n_probs)223 void InferBbox::filter_top_n(std::vector<std::vector<double> > 224 *probs, std::vector<std::vector<double> > *boxes, 225 std::vector<std::vector<double> > &top_n_boxes, 226 std::vector<size_t> &top_n_idxs, 227 std::vector<double> &top_n_probs) 228 { 229 std::vector<double> max_class_probs((*probs).size()); 230 std::vector<size_t> args((*probs).size()); 231 232 for (unsigned int box = 0; box < (*boxes).size(); ++box) 233 { 234 size_t _prob_idx = 235 std::max_element((*probs)[box].begin(), 236 (*probs)[box].end()) - (*probs)[box].begin(); 237 max_class_probs[box] = (*probs)[box][_prob_idx]; 238 } 239 240 std::vector<std::pair<double, size_t> > temp_sort(max_class_probs.size()); 241 for (size_t tidx = 0; tidx < max_class_probs.size(); ++tidx) 242 { 243 temp_sort[tidx] = std::make_pair(max_class_probs[tidx], 244 static_cast<size_t>(tidx)); 245 } 246 std::sort(temp_sort.begin(), temp_sort.end(), InferBbox::comparator); 247 248 for (size_t idx = 0; idx < temp_sort.size(); ++idx) 249 { 250 args[idx] = temp_sort[idx].second; 251 } 252 253 // Get n_top_detections 254 std::vector<size_t> top_n_order(args.begin(), 255 args.begin() + n_top_detections); 256 257 // Have a separate copy of all the n_top_detections 258 for (size_t n = 0; n < n_top_detections; ++n) 259 { 260 top_n_probs[n] = max_class_probs[top_n_order[n]]; 261 top_n_idxs[n] = 262 std::max_element((*probs)[top_n_order[n]].begin(), 263 (*probs)[top_n_order[n]].end()) - 264 (*probs)[top_n_order[n]].begin(); 265 for (size_t i = 0; i < 4; ++i) 266 { 267 top_n_boxes[n][i] = (*boxes)[top_n_order[n]][i]; 268 } 269 } 270 } 271 nms_wrapper(std::vector<std::vector<double>> & top_n_boxes,std::vector<size_t> & top_n_idxs,std::vector<double> & top_n_probs)272 void InferBbox::nms_wrapper(std::vector<std::vector<double> > 273 &top_n_boxes, std::vector<size_t> &top_n_idxs, 274 std::vector<double> &top_n_probs) 275 { 276 for (size_t c = 0; c < this->num_classes; ++c) 277 { 278 std::vector<size_t> idxs_per_class; 279 for (size_t n = 0; n < n_top_detections; ++n) 280 { 281 if (top_n_idxs[n] == c) 282 { 283 idxs_per_class.push_back(n); 284 } 285 } 286 287 // Just continue in case there are no objects of this class 288 if (idxs_per_class.size() == 0) 289 { 290 continue; 291 } 292 293 // Process per class detections 294 std::vector<std::vector<double> > boxes_per_class(idxs_per_class.size()); 295 std::vector<double> probs_per_class(idxs_per_class.size()); 296 std::vector<bool> keep_per_class; 297 for (std::vector<size_t>::iterator itr = idxs_per_class.begin(); 298 itr != idxs_per_class.end(); ++itr) 299 { 300 size_t idx = itr - idxs_per_class.begin(); 301 probs_per_class[idx] = top_n_probs[*itr]; 302 for (size_t b = 0; b < 4; ++b) 303 { 304 boxes_per_class[idx].push_back(top_n_boxes[*itr][b]); 305 } 306 } 307 keep_per_class = 308 non_maximal_suppression(&boxes_per_class, &probs_per_class); 309 for (std::vector<bool>::iterator itr = keep_per_class.begin(); 310 itr != keep_per_class.end(); ++itr) 311 { 312 size_t idx = itr - keep_per_class.begin(); 313 if (*itr && probs_per_class[idx] > this->intersection_thresh) 314 { 315 dnn_objdetect::object new_detection; 316 317 new_detection.class_idx = c; 318 new_detection.label_name = this->label_map[c]; 319 new_detection.xmin = (int)boxes_per_class[idx][0]; 320 new_detection.ymin = (int)boxes_per_class[idx][1]; 321 new_detection.xmax = (int)boxes_per_class[idx][2]; 322 new_detection.ymax = (int)boxes_per_class[idx][3]; 323 new_detection.class_prob = probs_per_class[idx]; 324 325 this->detections.push_back(new_detection); 326 } 327 } 328 } 329 } 330 non_maximal_suppression(std::vector<std::vector<double>> * boxes,std::vector<double> * probs)331 std::vector<bool> InferBbox::non_maximal_suppression( 332 std::vector<std::vector<double> > *boxes, std::vector<double> 333 *probs) 334 { 335 std::vector<bool> keep(((*probs).size())); 336 std::fill(keep.begin(), keep.end(), true); 337 std::vector<size_t> prob_args_sorted((*probs).size()); 338 339 std::vector<std::pair<double, size_t> > temp_sort((*probs).size()); 340 for (size_t tidx = 0; tidx < (*probs).size(); ++tidx) 341 { 342 temp_sort[tidx] = std::make_pair((*probs)[tidx], 343 static_cast<size_t>(tidx)); 344 } 345 std::sort(temp_sort.begin(), temp_sort.end(), InferBbox::comparator); 346 347 for (size_t idx = 0; idx < temp_sort.size(); ++idx) 348 { 349 prob_args_sorted[idx] = temp_sort[idx].second; 350 } 351 352 for (std::vector<size_t>::iterator itr = prob_args_sorted.begin(); 353 itr != prob_args_sorted.end()-1; ++itr) 354 { 355 size_t idx = itr - prob_args_sorted.begin(); 356 std::vector<double> iou_(prob_args_sorted.size() - idx - 1); 357 std::vector<std::vector<double> > temp_boxes(iou_.size()); 358 for (size_t bb = 0; bb < temp_boxes.size(); ++bb) 359 { 360 std::vector<double> temp_box(4); 361 for (size_t b = 0; b < 4; ++b) 362 { 363 temp_box[b] = (*boxes)[prob_args_sorted[idx + bb + 1]][b]; 364 } 365 temp_boxes[bb] = temp_box; 366 } 367 intersection_over_union(&temp_boxes, 368 &(*boxes)[prob_args_sorted[idx]], &iou_); 369 for (std::vector<double>::iterator _itr = iou_.begin(); 370 _itr != iou_.end(); ++_itr) 371 { 372 size_t iou_idx = _itr - iou_.begin(); 373 if (*_itr > nms_intersection_thresh) 374 { 375 keep[prob_args_sorted[idx+iou_idx+1]] = false; 376 } 377 } 378 } 379 return keep; 380 } 381 intersection_over_union(std::vector<std::vector<double>> * boxes,std::vector<double> * base_box,std::vector<double> * iou)382 void InferBbox::intersection_over_union(std::vector<std::vector<double> > 383 *boxes, std::vector<double> *base_box, std::vector<double> *iou) 384 { 385 double g_xmin = (*base_box)[0]; 386 double g_ymin = (*base_box)[1]; 387 double g_xmax = (*base_box)[2]; 388 double g_ymax = (*base_box)[3]; 389 double base_box_w = g_xmax - g_xmin; 390 double base_box_h = g_ymax - g_ymin; 391 for (size_t b = 0; b < (*boxes).size(); ++b) 392 { 393 double xmin = std::max((*boxes)[b][0], g_xmin); 394 double ymin = std::max((*boxes)[b][1], g_ymin); 395 double xmax = std::min((*boxes)[b][2], g_xmax); 396 double ymax = std::min((*boxes)[b][3], g_ymax); 397 398 // Intersection 399 double w = std::max(static_cast<double>(0.0), xmax - xmin); 400 double h = std::max(static_cast<double>(0.0), ymax - ymin); 401 // Union 402 double test_box_w = (*boxes)[b][2] - (*boxes)[b][0]; 403 double test_box_h = (*boxes)[b][3] - (*boxes)[b][1]; 404 405 double inter_ = w * h; 406 double union_ = test_box_h * test_box_w + base_box_h * base_box_w - inter_; 407 (*iou)[b] = inter_ / (union_ + epsilon); 408 } 409 } 410 411 } 412 413 } 414