1 // This file is part of OpenCV project. 2 // It is subject to the license terms in the LICENSE file found in the top-level directory 3 // of this distribution and at http://opencv.org/license.html. 4 5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP 6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP 7 8 #include "../../op_cuda.hpp" 9 10 #include "../csl/stream.hpp" 11 #include "../csl/cudnn.hpp" 12 #include "../csl/tensor_ops.hpp" 13 14 #include "../kernels/region.hpp" 15 16 #include "../../nms.inl.hpp" 17 18 #include <opencv2/core.hpp> 19 20 #include <cstddef> 21 #include <utility> 22 #include <vector> 23 24 namespace cv { namespace dnn { namespace cuda4dnn { 25 26 enum class SquashMethod { 27 SOFTMAX, 28 SIGMOID 29 }; 30 31 template <class T> 32 struct RegionConfiguration { 33 /* The image is divided into (H, W) cells. 34 * 35 * Each cell is interested in exactly one object and predicts `boxes_per_cell` bounding boxes 36 * for that object. 37 * 38 * Each bounding box contains: 39 * - 4 box coordinates 40 * - objectness confidence score 41 * - `classes` number of class scores 42 * 43 * The object score is reduced to a probability using sigmoid and the class scores are reduced to 44 * probabilities by either applying sigmoid or softmax (which is a configuration option). 45 * 46 * object_prob = sigmoid(object_score) 47 * conditional_class_prob = sigmoid, softmax across all classes 48 * 49 * actual class probability = conditional_class_prob * object_prob 50 */ 51 std::size_t classes, boxes_per_cell; 52 std::size_t width_norm, height_norm; 53 T scale_x_y; 54 55 /* method for reducing class scores to probabilities */ 56 SquashMethod squash_method; 57 58 /* prob cutoffs below which the prediction is nulled */ 59 T object_prob_cutoff; 60 T class_prob_cutoff; 61 62 T nms_iou_threshold; 63 }; 64 65 template <class T> 66 class RegionOp final : public CUDABackendNode { 67 public: 68 using wrapper_type = GetCUDABackendWrapperType<T>; 69 70 template <class V> RegionOp(csl::Stream stream_,const cv::Mat & bias,const RegionConfiguration<V> & config)71 RegionOp(csl::Stream stream_, const cv::Mat& bias, const RegionConfiguration<V>& config) 72 : stream(std::move(stream_)) 73 { 74 biasTensor = csl::makeTensorHeader<T>(bias); 75 csl::copyMatToTensor<T>(bias, biasTensor, stream); 76 77 classes = config.classes; 78 boxes_per_cell = config.boxes_per_cell; 79 80 width_norm = config.width_norm; 81 height_norm = config.height_norm; 82 83 scale_x_y = config.scale_x_y; 84 85 squash_type = config.squash_method; 86 object_prob_cutoff = config.object_prob_cutoff; 87 class_prob_cutoff = config.class_prob_cutoff; 88 89 nms_iou_threshold = config.nms_iou_threshold; 90 } 91 forward(const std::vector<cv::Ptr<BackendWrapper>> & inputs,const std::vector<cv::Ptr<BackendWrapper>> & outputs,csl::Workspace & workspace)92 void forward( 93 const std::vector<cv::Ptr<BackendWrapper>>& inputs, 94 const std::vector<cv::Ptr<BackendWrapper>>& outputs, 95 csl::Workspace& workspace) override 96 { 97 CV_Assert(outputs.size() == 1); 98 99 auto input_wrapper = inputs[0].dynamicCast<wrapper_type>(); 100 auto input = input_wrapper->getView(); 101 102 auto output_wrapper = outputs[0].dynamicCast<wrapper_type>(); 103 auto output = output_wrapper->getSpan(); 104 105 auto rows = input.get_axis_size(1); 106 auto cols = input.get_axis_size(2); 107 108 auto cell_box_size = classes + 4 + 1; 109 110 /* we squash class scores into probabilities using softmax or sigmoid */ 111 bool if_true_sigmoid_else_softmax = (squash_type == SquashMethod::SIGMOID); 112 113 kernels::region<T>(stream, output, input, biasTensor, 114 object_prob_cutoff, class_prob_cutoff, 115 boxes_per_cell, cell_box_size, 116 rows, cols, scale_x_y, 117 height_norm, width_norm, 118 if_true_sigmoid_else_softmax 119 ); 120 121 if (nms_iou_threshold > 0) { 122 auto output_mat = output_wrapper->getMutableHostMat(); 123 CV_Assert(output_mat.type() == CV_32F); 124 for (int i = 0; i < input.get_axis_size(0); i++) { 125 auto sample_size = rows * cols * boxes_per_cell * cell_box_size; 126 do_nms_sort(reinterpret_cast<float*>(output_mat.data) + i * sample_size, rows * cols * boxes_per_cell, class_prob_cutoff, nms_iou_threshold); 127 } 128 } 129 } 130 131 private: do_nms_sort(float * detections,int total,float score_thresh,float nms_thresh)132 void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh) 133 { 134 std::vector<Rect2d> boxes(total); 135 std::vector<float> scores(total); 136 137 for (int i = 0; i < total; ++i) 138 { 139 Rect2d &b = boxes[i]; 140 int box_index = i * (classes + 4 + 1); 141 b.width = detections[box_index + 2]; 142 b.height = detections[box_index + 3]; 143 b.x = detections[box_index + 0] - b.width / 2; 144 b.y = detections[box_index + 1] - b.height / 2; 145 } 146 147 std::vector<int> indices; 148 for (int k = 0; k < classes; ++k) 149 { 150 for (int i = 0; i < total; ++i) 151 { 152 int box_index = i * (classes + 4 + 1); 153 int class_index = box_index + 5; 154 scores[i] = detections[class_index + k]; 155 detections[class_index + k] = 0; 156 } 157 NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices); 158 for (int i = 0, n = indices.size(); i < n; ++i) 159 { 160 int box_index = indices[i] * (classes + 4 + 1); 161 int class_index = box_index + 5; 162 detections[class_index + k] = scores[indices[i]]; 163 } 164 } 165 } 166 167 private: 168 csl::Stream stream; 169 170 csl::Tensor<T> biasTensor; 171 std::size_t classes, boxes_per_cell; 172 std::size_t width_norm, height_norm; 173 T scale_x_y; 174 175 SquashMethod squash_type; 176 T object_prob_cutoff, class_prob_cutoff; 177 178 T nms_iou_threshold; 179 }; 180 181 }}} /* namespace cv::dnn::cuda4dnn */ 182 183 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP */ 184