1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #include "yolodetectionoutput.h"
16
17 #include "layer_type.h"
18
19 #include <math.h>
20
21 namespace ncnn {
22
YoloDetectionOutput()23 YoloDetectionOutput::YoloDetectionOutput()
24 {
25 one_blob_only = false;
26 support_inplace = true;
27 }
28
load_param(const ParamDict & pd)29 int YoloDetectionOutput::load_param(const ParamDict& pd)
30 {
31 num_class = pd.get(0, 20);
32 num_box = pd.get(1, 5);
33 confidence_threshold = pd.get(2, 0.01f);
34 nms_threshold = pd.get(3, 0.45f);
35 biases = pd.get(4, Mat());
36
37 return 0;
38 }
39
create_pipeline(const Option & opt)40 int YoloDetectionOutput::create_pipeline(const Option& opt)
41 {
42 {
43 softmax = ncnn::create_layer(ncnn::LayerType::Softmax);
44
45 ncnn::ParamDict pd;
46 pd.set(0, 0); // axis
47
48 softmax->load_param(pd);
49
50 softmax->create_pipeline(opt);
51 }
52
53 return 0;
54 }
55
destroy_pipeline(const Option & opt)56 int YoloDetectionOutput::destroy_pipeline(const Option& opt)
57 {
58 if (softmax)
59 {
60 softmax->destroy_pipeline(opt);
61 delete softmax;
62 softmax = 0;
63 }
64
65 return 0;
66 }
67
68 struct BBoxRect
69 {
70 float xmin;
71 float ymin;
72 float xmax;
73 float ymax;
74 int label;
75 };
76
intersection_area(const BBoxRect & a,const BBoxRect & b)77 static inline float intersection_area(const BBoxRect& a, const BBoxRect& b)
78 {
79 if (a.xmin > b.xmax || a.xmax < b.xmin || a.ymin > b.ymax || a.ymax < b.ymin)
80 {
81 // no intersection
82 return 0.f;
83 }
84
85 float inter_width = std::min(a.xmax, b.xmax) - std::max(a.xmin, b.xmin);
86 float inter_height = std::min(a.ymax, b.ymax) - std::max(a.ymin, b.ymin);
87
88 return inter_width * inter_height;
89 }
90
91 template<typename T>
qsort_descent_inplace(std::vector<T> & datas,std::vector<float> & scores,int left,int right)92 static void qsort_descent_inplace(std::vector<T>& datas, std::vector<float>& scores, int left, int right)
93 {
94 int i = left;
95 int j = right;
96 float p = scores[(left + right) / 2];
97
98 while (i <= j)
99 {
100 while (scores[i] > p)
101 i++;
102
103 while (scores[j] < p)
104 j--;
105
106 if (i <= j)
107 {
108 // swap
109 std::swap(datas[i], datas[j]);
110 std::swap(scores[i], scores[j]);
111
112 i++;
113 j--;
114 }
115 }
116
117 if (left < j)
118 qsort_descent_inplace(datas, scores, left, j);
119
120 if (i < right)
121 qsort_descent_inplace(datas, scores, i, right);
122 }
123
124 template<typename T>
qsort_descent_inplace(std::vector<T> & datas,std::vector<float> & scores)125 static void qsort_descent_inplace(std::vector<T>& datas, std::vector<float>& scores)
126 {
127 if (datas.empty() || scores.empty())
128 return;
129
130 qsort_descent_inplace(datas, scores, 0, static_cast<int>(scores.size() - 1));
131 }
132
nms_sorted_bboxes(const std::vector<BBoxRect> & bboxes,std::vector<size_t> & picked,float nms_threshold)133 static void nms_sorted_bboxes(const std::vector<BBoxRect>& bboxes, std::vector<size_t>& picked, float nms_threshold)
134 {
135 picked.clear();
136
137 const size_t n = bboxes.size();
138
139 std::vector<float> areas(n);
140 for (size_t i = 0; i < n; i++)
141 {
142 const BBoxRect& r = bboxes[i];
143
144 float width = r.xmax - r.xmin;
145 float height = r.ymax - r.ymin;
146
147 areas[i] = width * height;
148 }
149
150 for (size_t i = 0; i < n; i++)
151 {
152 const BBoxRect& a = bboxes[i];
153
154 int keep = 1;
155 for (int j = 0; j < (int)picked.size(); j++)
156 {
157 const BBoxRect& b = bboxes[picked[j]];
158
159 // intersection over union
160 float inter_area = intersection_area(a, b);
161 float union_area = areas[i] + areas[picked[j]] - inter_area;
162 // float IoU = inter_area / union_area
163 if (inter_area / union_area > nms_threshold)
164 keep = 0;
165 }
166
167 if (keep)
168 picked.push_back(i);
169 }
170 }
171
sigmoid(float x)172 static inline float sigmoid(float x)
173 {
174 return static_cast<float>(1.f / (1.f + exp(-x)));
175 }
176
forward_inplace(std::vector<Mat> & bottom_top_blobs,const Option & opt) const177 int YoloDetectionOutput::forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt) const
178 {
179 // gather all box
180 std::vector<BBoxRect> all_bbox_rects;
181 std::vector<float> all_bbox_scores;
182
183 for (size_t b = 0; b < bottom_top_blobs.size(); b++)
184 {
185 Mat& bottom_top_blob = bottom_top_blobs[b];
186
187 int w = bottom_top_blob.w;
188 int h = bottom_top_blob.h;
189 int channels = bottom_top_blob.c;
190
191 const int channels_per_box = channels / num_box;
192
193 // anchor coord + box score + num_class
194 if (channels_per_box != 4 + 1 + num_class)
195 return -1;
196
197 std::vector<std::vector<BBoxRect> > all_box_bbox_rects;
198 std::vector<std::vector<float> > all_box_bbox_scores;
199 all_box_bbox_rects.resize(num_box);
200 all_box_bbox_scores.resize(num_box);
201
202 #pragma omp parallel for num_threads(opt.num_threads)
203 for (int pp = 0; pp < num_box; pp++)
204 {
205 int p = pp * channels_per_box;
206
207 const float bias_w = biases[pp * 2];
208 const float bias_h = biases[pp * 2 + 1];
209
210 const float* xptr = bottom_top_blob.channel(p);
211 const float* yptr = bottom_top_blob.channel(p + 1);
212 const float* wptr = bottom_top_blob.channel(p + 2);
213 const float* hptr = bottom_top_blob.channel(p + 3);
214
215 const float* box_score_ptr = bottom_top_blob.channel(p + 4);
216
217 // softmax class scores
218 Mat scores = bottom_top_blob.channel_range(p + 5, num_class);
219 softmax->forward_inplace(scores, opt);
220
221 for (int i = 0; i < h; i++)
222 {
223 for (int j = 0; j < w; j++)
224 {
225 // region box
226 float bbox_cx = (j + sigmoid(xptr[0])) / w;
227 float bbox_cy = (i + sigmoid(yptr[0])) / h;
228 float bbox_w = static_cast<float>(exp(wptr[0]) * bias_w / w);
229 float bbox_h = static_cast<float>(exp(hptr[0]) * bias_h / h);
230
231 float bbox_xmin = bbox_cx - bbox_w * 0.5f;
232 float bbox_ymin = bbox_cy - bbox_h * 0.5f;
233 float bbox_xmax = bbox_cx + bbox_w * 0.5f;
234 float bbox_ymax = bbox_cy + bbox_h * 0.5f;
235
236 // box score
237 float box_score = sigmoid(box_score_ptr[0]);
238
239 // find class index with max class score
240 int class_index = 0;
241 float class_score = 0.f;
242 for (int q = 0; q < num_class; q++)
243 {
244 float score = scores.channel(q).row(i)[j];
245 if (score > class_score)
246 {
247 class_index = q;
248 class_score = score;
249 }
250 }
251
252 // NCNN_LOGE("%d %f %f", class_index, box_score, class_score);
253
254 float confidence = box_score * class_score;
255 if (confidence >= confidence_threshold)
256 {
257 BBoxRect c = {bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index};
258 all_box_bbox_rects[pp].push_back(c);
259 all_box_bbox_scores[pp].push_back(confidence);
260 }
261
262 xptr++;
263 yptr++;
264 wptr++;
265 hptr++;
266
267 box_score_ptr++;
268 }
269 }
270 }
271
272 for (int i = 0; i < num_box; i++)
273 {
274 const std::vector<BBoxRect>& box_bbox_rects = all_box_bbox_rects[i];
275 const std::vector<float>& box_bbox_scores = all_box_bbox_scores[i];
276
277 all_bbox_rects.insert(all_bbox_rects.end(), box_bbox_rects.begin(), box_bbox_rects.end());
278 all_bbox_scores.insert(all_bbox_scores.end(), box_bbox_scores.begin(), box_bbox_scores.end());
279 }
280 }
281
282 // global sort inplace
283 qsort_descent_inplace(all_bbox_rects, all_bbox_scores);
284
285 // apply nms
286 std::vector<size_t> picked;
287 nms_sorted_bboxes(all_bbox_rects, picked, nms_threshold);
288
289 // select
290 std::vector<BBoxRect> bbox_rects;
291 std::vector<float> bbox_scores;
292
293 for (size_t i = 0; i < picked.size(); i++)
294 {
295 size_t z = picked[i];
296 bbox_rects.push_back(all_bbox_rects[z]);
297 bbox_scores.push_back(all_bbox_scores[z]);
298 }
299
300 // fill result
301 int num_detected = static_cast<int>(bbox_rects.size());
302 if (num_detected == 0)
303 return 0;
304
305 Mat& top_blob = bottom_top_blobs[0];
306 top_blob.create(6, num_detected, 4u, opt.blob_allocator);
307 if (top_blob.empty())
308 return -100;
309
310 for (int i = 0; i < num_detected; i++)
311 {
312 const BBoxRect& r = bbox_rects[i];
313 float score = bbox_scores[i];
314 float* outptr = top_blob.row(i);
315
316 outptr[0] = static_cast<float>(r.label + 1); // +1 for prepend background class
317 outptr[1] = score;
318 outptr[2] = r.xmin;
319 outptr[3] = r.ymin;
320 outptr[4] = r.xmax;
321 outptr[5] = r.ymax;
322 }
323
324 return 0;
325 }
326
327 } // namespace ncnn
328