1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #include "yolov3detectionoutput.h"
16 
17 #include "layer_type.h"
18 
19 #include <float.h>
20 #include <math.h>
21 
22 namespace ncnn {
23 
Yolov3DetectionOutput()24 Yolov3DetectionOutput::Yolov3DetectionOutput()
25 {
26     one_blob_only = false;
27     support_inplace = false;
28 
29     //softmax = ncnn::create_layer(ncnn::LayerType::Softmax);
30 
31     // set param
32     ncnn::ParamDict pd;
33     pd.set(0, 0); // axis
34 
35     //softmax->load_param(pd);
36 }
37 
~Yolov3DetectionOutput()38 Yolov3DetectionOutput::~Yolov3DetectionOutput()
39 {
40     //delete softmax;
41 }
42 
load_param(const ParamDict & pd)43 int Yolov3DetectionOutput::load_param(const ParamDict& pd)
44 {
45     num_class = pd.get(0, 20);
46     num_box = pd.get(1, 5);
47     confidence_threshold = pd.get(2, 0.01f);
48     nms_threshold = pd.get(3, 0.45f);
49     biases = pd.get(4, Mat());
50     mask = pd.get(5, Mat());
51     anchors_scale = pd.get(6, Mat());
52     return 0;
53 }
54 
intersection_area(const Yolov3DetectionOutput::BBoxRect & a,const Yolov3DetectionOutput::BBoxRect & b)55 static inline float intersection_area(const Yolov3DetectionOutput::BBoxRect& a, const Yolov3DetectionOutput::BBoxRect& b)
56 {
57     if (a.xmin > b.xmax || a.xmax < b.xmin || a.ymin > b.ymax || a.ymax < b.ymin)
58     {
59         // no intersection
60         return 0.f;
61     }
62 
63     float inter_width = std::min(a.xmax, b.xmax) - std::max(a.xmin, b.xmin);
64     float inter_height = std::min(a.ymax, b.ymax) - std::max(a.ymin, b.ymin);
65 
66     return inter_width * inter_height;
67 }
68 
qsort_descent_inplace(std::vector<BBoxRect> & datas,int left,int right) const69 void Yolov3DetectionOutput::qsort_descent_inplace(std::vector<BBoxRect>& datas, int left, int right) const
70 {
71     int i = left;
72     int j = right;
73     float p = datas[(left + right) / 2].score;
74 
75     while (i <= j)
76     {
77         while (datas[i].score > p)
78             i++;
79 
80         while (datas[j].score < p)
81             j--;
82 
83         if (i <= j)
84         {
85             // swap
86             std::swap(datas[i], datas[j]);
87 
88             i++;
89             j--;
90         }
91     }
92 
93     if (left < j)
94         qsort_descent_inplace(datas, left, j);
95 
96     if (i < right)
97         qsort_descent_inplace(datas, i, right);
98 }
99 
qsort_descent_inplace(std::vector<BBoxRect> & datas) const100 void Yolov3DetectionOutput::qsort_descent_inplace(std::vector<BBoxRect>& datas) const
101 {
102     if (datas.empty())
103         return;
104 
105     qsort_descent_inplace(datas, 0, static_cast<int>(datas.size() - 1));
106 }
107 
nms_sorted_bboxes(std::vector<BBoxRect> & bboxes,std::vector<size_t> & picked,float nms_threshold) const108 void Yolov3DetectionOutput::nms_sorted_bboxes(std::vector<BBoxRect>& bboxes, std::vector<size_t>& picked, float nms_threshold) const
109 {
110     picked.clear();
111 
112     const size_t n = bboxes.size();
113 
114     for (size_t i = 0; i < n; i++)
115     {
116         const BBoxRect& a = bboxes[i];
117 
118         int keep = 1;
119         for (int j = 0; j < (int)picked.size(); j++)
120         {
121             const BBoxRect& b = bboxes[picked[j]];
122 
123             // intersection over union
124             float inter_area = intersection_area(a, b);
125             float union_area = a.area + b.area - inter_area;
126             // float IoU = inter_area / union_area
127             if (inter_area > nms_threshold * union_area)
128             {
129                 keep = 0;
130                 break;
131             }
132         }
133 
134         if (keep)
135             picked.push_back(i);
136     }
137 }
138 
sigmoid(float x)139 static inline float sigmoid(float x)
140 {
141     return static_cast<float>(1.f / (1.f + exp(-x)));
142 }
143 
forward(const std::vector<Mat> & bottom_blobs,std::vector<Mat> & top_blobs,const Option & opt) const144 int Yolov3DetectionOutput::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
145 {
146     // gather all box
147     std::vector<BBoxRect> all_bbox_rects;
148 
149     for (size_t b = 0; b < bottom_blobs.size(); b++)
150     {
151         std::vector<std::vector<BBoxRect> > all_box_bbox_rects;
152         all_box_bbox_rects.resize(num_box);
153         const Mat& bottom_top_blobs = bottom_blobs[b];
154 
155         int w = bottom_top_blobs.w;
156         int h = bottom_top_blobs.h;
157         int channels = bottom_top_blobs.c;
158         //printf("%d %d %d\n", w, h, channels);
159         const int channels_per_box = channels / num_box;
160 
161         // anchor coord + box score + num_class
162         if (channels_per_box != 4 + 1 + num_class)
163             return -1;
164         size_t mask_offset = b * num_box;
165         int net_w = (int)(anchors_scale[b] * w);
166         int net_h = (int)(anchors_scale[b] * h);
167         //printf("%d %d\n", net_w, net_h);
168 
169         //printf("%d %d %d\n", w, h, channels);
170         #pragma omp parallel for num_threads(opt.num_threads)
171         for (int pp = 0; pp < num_box; pp++)
172         {
173             int p = pp * channels_per_box;
174             int biases_index = static_cast<int>(mask[pp + mask_offset]);
175             //printf("%d\n", biases_index);
176             const float bias_w = biases[biases_index * 2];
177             const float bias_h = biases[biases_index * 2 + 1];
178             //printf("%f %f\n", bias_w, bias_h);
179             const float* xptr = bottom_top_blobs.channel(p);
180             const float* yptr = bottom_top_blobs.channel(p + 1);
181             const float* wptr = bottom_top_blobs.channel(p + 2);
182             const float* hptr = bottom_top_blobs.channel(p + 3);
183 
184             const float* box_score_ptr = bottom_top_blobs.channel(p + 4);
185 
186             // softmax class scores
187             Mat scores = bottom_top_blobs.channel_range(p + 5, num_class);
188             //softmax->forward_inplace(scores, opt);
189 
190             for (int i = 0; i < h; i++)
191             {
192                 for (int j = 0; j < w; j++)
193                 {
194                     // find class index with max class score
195                     int class_index = 0;
196                     float class_score = -FLT_MAX;
197                     for (int q = 0; q < num_class; q++)
198                     {
199                         float score = scores.channel(q).row(i)[j];
200                         if (score > class_score)
201                         {
202                             class_index = q;
203                             class_score = score;
204                         }
205                     }
206 
207                     //sigmoid(box_score) * sigmoid(class_score)
208                     float confidence = 1.f / ((1.f + exp(-box_score_ptr[0]) * (1.f + exp(-class_score))));
209                     if (confidence >= confidence_threshold)
210                     {
211                         // region box
212                         float bbox_cx = (j + sigmoid(xptr[0])) / w;
213                         float bbox_cy = (i + sigmoid(yptr[0])) / h;
214                         float bbox_w = static_cast<float>(exp(wptr[0]) * bias_w / net_w);
215                         float bbox_h = static_cast<float>(exp(hptr[0]) * bias_h / net_h);
216 
217                         float bbox_xmin = bbox_cx - bbox_w * 0.5f;
218                         float bbox_ymin = bbox_cy - bbox_h * 0.5f;
219                         float bbox_xmax = bbox_cx + bbox_w * 0.5f;
220                         float bbox_ymax = bbox_cy + bbox_h * 0.5f;
221 
222                         float area = bbox_w * bbox_h;
223 
224                         BBoxRect c = {confidence, bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, area, class_index};
225                         all_box_bbox_rects[pp].push_back(c);
226                     }
227 
228                     xptr++;
229                     yptr++;
230                     wptr++;
231                     hptr++;
232 
233                     box_score_ptr++;
234                 }
235             }
236         }
237 
238         for (int i = 0; i < num_box; i++)
239         {
240             const std::vector<BBoxRect>& box_bbox_rects = all_box_bbox_rects[i];
241 
242             all_bbox_rects.insert(all_bbox_rects.end(), box_bbox_rects.begin(), box_bbox_rects.end());
243         }
244     }
245 
246     // global sort inplace
247     qsort_descent_inplace(all_bbox_rects);
248 
249     // apply nms
250     std::vector<size_t> picked;
251     nms_sorted_bboxes(all_bbox_rects, picked, nms_threshold);
252 
253     // select
254     std::vector<BBoxRect> bbox_rects;
255 
256     for (size_t i = 0; i < picked.size(); i++)
257     {
258         size_t z = picked[i];
259         bbox_rects.push_back(all_bbox_rects[z]);
260     }
261 
262     // fill result
263     int num_detected = static_cast<int>(bbox_rects.size());
264     if (num_detected == 0)
265         return 0;
266 
267     Mat& top_blob = top_blobs[0];
268     top_blob.create(6, num_detected, 4u, opt.blob_allocator);
269     if (top_blob.empty())
270         return -100;
271 
272     for (int i = 0; i < num_detected; i++)
273     {
274         const BBoxRect& r = bbox_rects[i];
275         float score = r.score;
276         float* outptr = top_blob.row(i);
277 
278         outptr[0] = static_cast<float>(r.label + 1); // +1 for prepend background class
279         outptr[1] = score;
280         outptr[2] = r.xmin;
281         outptr[3] = r.ymin;
282         outptr[4] = r.xmax;
283         outptr[5] = r.ymax;
284     }
285 
286     return 0;
287 }
288 
289 } // namespace ncnn
290