1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP
7 
8 #include "../../op_cuda.hpp"
9 
10 #include "../csl/stream.hpp"
11 #include "../csl/cudnn.hpp"
12 #include "../csl/tensor_ops.hpp"
13 
14 #include "../kernels/region.hpp"
15 
16 #include "../../nms.inl.hpp"
17 
18 #include <opencv2/core.hpp>
19 
20 #include <cstddef>
21 #include <utility>
22 #include <vector>
23 
24 namespace cv { namespace dnn { namespace cuda4dnn {
25 
26     enum class SquashMethod {
27         SOFTMAX,
28         SIGMOID
29     };
30 
31     template <class T>
32     struct RegionConfiguration {
33         /* The image is divided into (H, W) cells.
34          *
35          * Each cell is interested in exactly one object and predicts `boxes_per_cell` bounding boxes
36          * for that object.
37          *
38          * Each bounding box contains:
39          * - 4 box coordinates
40          * - objectness confidence score
41          * - `classes` number of class scores
42          *
43          * The object score is reduced to a probability using sigmoid and the class scores are reduced to
44          * probabilities by either applying sigmoid or softmax (which is a configuration option).
45          *
46          * object_prob = sigmoid(object_score)
47          * conditional_class_prob = sigmoid, softmax across all classes
48          *
49          * actual class probability = conditional_class_prob * object_prob
50          */
51         std::size_t classes, boxes_per_cell;
52         std::size_t width_norm, height_norm;
53         T scale_x_y;
54 
55         /* method for reducing class scores to probabilities */
56         SquashMethod squash_method;
57 
58         /* prob cutoffs below which the prediction is nulled */
59         T object_prob_cutoff;
60         T class_prob_cutoff;
61 
62         T nms_iou_threshold;
63     };
64 
65     template <class T>
66     class RegionOp final : public CUDABackendNode {
67     public:
68         using wrapper_type = GetCUDABackendWrapperType<T>;
69 
70         template <class V>
RegionOp(csl::Stream stream_,const cv::Mat & bias,const RegionConfiguration<V> & config)71         RegionOp(csl::Stream stream_, const cv::Mat& bias, const RegionConfiguration<V>& config)
72             : stream(std::move(stream_))
73         {
74             biasTensor = csl::makeTensorHeader<T>(bias);
75             csl::copyMatToTensor<T>(bias, biasTensor, stream);
76 
77             classes = config.classes;
78             boxes_per_cell = config.boxes_per_cell;
79 
80             width_norm = config.width_norm;
81             height_norm = config.height_norm;
82 
83             scale_x_y = config.scale_x_y;
84 
85             squash_type = config.squash_method;
86             object_prob_cutoff = config.object_prob_cutoff;
87             class_prob_cutoff = config.class_prob_cutoff;
88 
89             nms_iou_threshold = config.nms_iou_threshold;
90         }
91 
forward(const std::vector<cv::Ptr<BackendWrapper>> & inputs,const std::vector<cv::Ptr<BackendWrapper>> & outputs,csl::Workspace & workspace)92         void forward(
93             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
94             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
95             csl::Workspace& workspace) override
96         {
97             CV_Assert(outputs.size() == 1);
98 
99             auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
100             auto input = input_wrapper->getView();
101 
102             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
103             auto output = output_wrapper->getSpan();
104 
105             auto rows = input.get_axis_size(1);
106             auto cols = input.get_axis_size(2);
107 
108             auto cell_box_size = classes + 4 + 1;
109 
110             /* we squash class scores into probabilities using softmax or sigmoid */
111             bool if_true_sigmoid_else_softmax = (squash_type == SquashMethod::SIGMOID);
112 
113             kernels::region<T>(stream, output, input, biasTensor,
114                 object_prob_cutoff, class_prob_cutoff,
115                 boxes_per_cell, cell_box_size,
116                 rows, cols, scale_x_y,
117                 height_norm, width_norm,
118                 if_true_sigmoid_else_softmax
119             );
120 
121             if (nms_iou_threshold > 0) {
122                 auto output_mat = output_wrapper->getMutableHostMat();
123                 CV_Assert(output_mat.type() == CV_32F);
124                 for (int i = 0; i < input.get_axis_size(0); i++) {
125                     auto sample_size = rows * cols * boxes_per_cell * cell_box_size;
126                     do_nms_sort(reinterpret_cast<float*>(output_mat.data) + i * sample_size, rows * cols * boxes_per_cell, class_prob_cutoff, nms_iou_threshold);
127                 }
128             }
129         }
130 
131     private:
do_nms_sort(float * detections,int total,float score_thresh,float nms_thresh)132         void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
133         {
134             std::vector<Rect2d> boxes(total);
135             std::vector<float> scores(total);
136 
137             for (int i = 0; i < total; ++i)
138             {
139                 Rect2d &b = boxes[i];
140                 int box_index = i * (classes + 4 + 1);
141                 b.width = detections[box_index + 2];
142                 b.height = detections[box_index + 3];
143                 b.x = detections[box_index + 0] - b.width / 2;
144                 b.y = detections[box_index + 1] - b.height / 2;
145             }
146 
147             std::vector<int> indices;
148             for (int k = 0; k < classes; ++k)
149             {
150                 for (int i = 0; i < total; ++i)
151                 {
152                     int box_index = i * (classes + 4 + 1);
153                     int class_index = box_index + 5;
154                     scores[i] = detections[class_index + k];
155                     detections[class_index + k] = 0;
156                 }
157                 NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
158                 for (int i = 0, n = indices.size(); i < n; ++i)
159                 {
160                     int box_index = indices[i] * (classes + 4 + 1);
161                     int class_index = box_index + 5;
162                     detections[class_index + k] = scores[indices[i]];
163                 }
164             }
165         }
166 
167     private:
168         csl::Stream stream;
169 
170         csl::Tensor<T> biasTensor;
171         std::size_t classes, boxes_per_cell;
172         std::size_t width_norm, height_norm;
173         T scale_x_y;
174 
175         SquashMethod squash_type;
176         T object_prob_cutoff, class_prob_cutoff;
177 
178         T nms_iou_threshold;
179     };
180 
181 }}} /* namespace cv::dnn::cuda4dnn */
182 
183 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP */
184