1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #include "yolov3detectionoutput.h"
16
17 #include "layer_type.h"
18
19 #include <float.h>
20 #include <math.h>
21
22 namespace ncnn {
23
Yolov3DetectionOutput()24 Yolov3DetectionOutput::Yolov3DetectionOutput()
25 {
26 one_blob_only = false;
27 support_inplace = false;
28
29 //softmax = ncnn::create_layer(ncnn::LayerType::Softmax);
30
31 // set param
32 ncnn::ParamDict pd;
33 pd.set(0, 0); // axis
34
35 //softmax->load_param(pd);
36 }
37
~Yolov3DetectionOutput()38 Yolov3DetectionOutput::~Yolov3DetectionOutput()
39 {
40 //delete softmax;
41 }
42
load_param(const ParamDict & pd)43 int Yolov3DetectionOutput::load_param(const ParamDict& pd)
44 {
45 num_class = pd.get(0, 20);
46 num_box = pd.get(1, 5);
47 confidence_threshold = pd.get(2, 0.01f);
48 nms_threshold = pd.get(3, 0.45f);
49 biases = pd.get(4, Mat());
50 mask = pd.get(5, Mat());
51 anchors_scale = pd.get(6, Mat());
52 return 0;
53 }
54
intersection_area(const Yolov3DetectionOutput::BBoxRect & a,const Yolov3DetectionOutput::BBoxRect & b)55 static inline float intersection_area(const Yolov3DetectionOutput::BBoxRect& a, const Yolov3DetectionOutput::BBoxRect& b)
56 {
57 if (a.xmin > b.xmax || a.xmax < b.xmin || a.ymin > b.ymax || a.ymax < b.ymin)
58 {
59 // no intersection
60 return 0.f;
61 }
62
63 float inter_width = std::min(a.xmax, b.xmax) - std::max(a.xmin, b.xmin);
64 float inter_height = std::min(a.ymax, b.ymax) - std::max(a.ymin, b.ymin);
65
66 return inter_width * inter_height;
67 }
68
qsort_descent_inplace(std::vector<BBoxRect> & datas,int left,int right) const69 void Yolov3DetectionOutput::qsort_descent_inplace(std::vector<BBoxRect>& datas, int left, int right) const
70 {
71 int i = left;
72 int j = right;
73 float p = datas[(left + right) / 2].score;
74
75 while (i <= j)
76 {
77 while (datas[i].score > p)
78 i++;
79
80 while (datas[j].score < p)
81 j--;
82
83 if (i <= j)
84 {
85 // swap
86 std::swap(datas[i], datas[j]);
87
88 i++;
89 j--;
90 }
91 }
92
93 if (left < j)
94 qsort_descent_inplace(datas, left, j);
95
96 if (i < right)
97 qsort_descent_inplace(datas, i, right);
98 }
99
qsort_descent_inplace(std::vector<BBoxRect> & datas) const100 void Yolov3DetectionOutput::qsort_descent_inplace(std::vector<BBoxRect>& datas) const
101 {
102 if (datas.empty())
103 return;
104
105 qsort_descent_inplace(datas, 0, static_cast<int>(datas.size() - 1));
106 }
107
nms_sorted_bboxes(std::vector<BBoxRect> & bboxes,std::vector<size_t> & picked,float nms_threshold) const108 void Yolov3DetectionOutput::nms_sorted_bboxes(std::vector<BBoxRect>& bboxes, std::vector<size_t>& picked, float nms_threshold) const
109 {
110 picked.clear();
111
112 const size_t n = bboxes.size();
113
114 for (size_t i = 0; i < n; i++)
115 {
116 const BBoxRect& a = bboxes[i];
117
118 int keep = 1;
119 for (int j = 0; j < (int)picked.size(); j++)
120 {
121 const BBoxRect& b = bboxes[picked[j]];
122
123 // intersection over union
124 float inter_area = intersection_area(a, b);
125 float union_area = a.area + b.area - inter_area;
126 // float IoU = inter_area / union_area
127 if (inter_area > nms_threshold * union_area)
128 {
129 keep = 0;
130 break;
131 }
132 }
133
134 if (keep)
135 picked.push_back(i);
136 }
137 }
138
sigmoid(float x)139 static inline float sigmoid(float x)
140 {
141 return static_cast<float>(1.f / (1.f + exp(-x)));
142 }
143
forward(const std::vector<Mat> & bottom_blobs,std::vector<Mat> & top_blobs,const Option & opt) const144 int Yolov3DetectionOutput::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
145 {
146 // gather all box
147 std::vector<BBoxRect> all_bbox_rects;
148
149 for (size_t b = 0; b < bottom_blobs.size(); b++)
150 {
151 std::vector<std::vector<BBoxRect> > all_box_bbox_rects;
152 all_box_bbox_rects.resize(num_box);
153 const Mat& bottom_top_blobs = bottom_blobs[b];
154
155 int w = bottom_top_blobs.w;
156 int h = bottom_top_blobs.h;
157 int channels = bottom_top_blobs.c;
158 //printf("%d %d %d\n", w, h, channels);
159 const int channels_per_box = channels / num_box;
160
161 // anchor coord + box score + num_class
162 if (channels_per_box != 4 + 1 + num_class)
163 return -1;
164 size_t mask_offset = b * num_box;
165 int net_w = (int)(anchors_scale[b] * w);
166 int net_h = (int)(anchors_scale[b] * h);
167 //printf("%d %d\n", net_w, net_h);
168
169 //printf("%d %d %d\n", w, h, channels);
170 #pragma omp parallel for num_threads(opt.num_threads)
171 for (int pp = 0; pp < num_box; pp++)
172 {
173 int p = pp * channels_per_box;
174 int biases_index = static_cast<int>(mask[pp + mask_offset]);
175 //printf("%d\n", biases_index);
176 const float bias_w = biases[biases_index * 2];
177 const float bias_h = biases[biases_index * 2 + 1];
178 //printf("%f %f\n", bias_w, bias_h);
179 const float* xptr = bottom_top_blobs.channel(p);
180 const float* yptr = bottom_top_blobs.channel(p + 1);
181 const float* wptr = bottom_top_blobs.channel(p + 2);
182 const float* hptr = bottom_top_blobs.channel(p + 3);
183
184 const float* box_score_ptr = bottom_top_blobs.channel(p + 4);
185
186 // softmax class scores
187 Mat scores = bottom_top_blobs.channel_range(p + 5, num_class);
188 //softmax->forward_inplace(scores, opt);
189
190 for (int i = 0; i < h; i++)
191 {
192 for (int j = 0; j < w; j++)
193 {
194 // find class index with max class score
195 int class_index = 0;
196 float class_score = -FLT_MAX;
197 for (int q = 0; q < num_class; q++)
198 {
199 float score = scores.channel(q).row(i)[j];
200 if (score > class_score)
201 {
202 class_index = q;
203 class_score = score;
204 }
205 }
206
207 //sigmoid(box_score) * sigmoid(class_score)
208 float confidence = 1.f / ((1.f + exp(-box_score_ptr[0]) * (1.f + exp(-class_score))));
209 if (confidence >= confidence_threshold)
210 {
211 // region box
212 float bbox_cx = (j + sigmoid(xptr[0])) / w;
213 float bbox_cy = (i + sigmoid(yptr[0])) / h;
214 float bbox_w = static_cast<float>(exp(wptr[0]) * bias_w / net_w);
215 float bbox_h = static_cast<float>(exp(hptr[0]) * bias_h / net_h);
216
217 float bbox_xmin = bbox_cx - bbox_w * 0.5f;
218 float bbox_ymin = bbox_cy - bbox_h * 0.5f;
219 float bbox_xmax = bbox_cx + bbox_w * 0.5f;
220 float bbox_ymax = bbox_cy + bbox_h * 0.5f;
221
222 float area = bbox_w * bbox_h;
223
224 BBoxRect c = {confidence, bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, area, class_index};
225 all_box_bbox_rects[pp].push_back(c);
226 }
227
228 xptr++;
229 yptr++;
230 wptr++;
231 hptr++;
232
233 box_score_ptr++;
234 }
235 }
236 }
237
238 for (int i = 0; i < num_box; i++)
239 {
240 const std::vector<BBoxRect>& box_bbox_rects = all_box_bbox_rects[i];
241
242 all_bbox_rects.insert(all_bbox_rects.end(), box_bbox_rects.begin(), box_bbox_rects.end());
243 }
244 }
245
246 // global sort inplace
247 qsort_descent_inplace(all_bbox_rects);
248
249 // apply nms
250 std::vector<size_t> picked;
251 nms_sorted_bboxes(all_bbox_rects, picked, nms_threshold);
252
253 // select
254 std::vector<BBoxRect> bbox_rects;
255
256 for (size_t i = 0; i < picked.size(); i++)
257 {
258 size_t z = picked[i];
259 bbox_rects.push_back(all_bbox_rects[z]);
260 }
261
262 // fill result
263 int num_detected = static_cast<int>(bbox_rects.size());
264 if (num_detected == 0)
265 return 0;
266
267 Mat& top_blob = top_blobs[0];
268 top_blob.create(6, num_detected, 4u, opt.blob_allocator);
269 if (top_blob.empty())
270 return -100;
271
272 for (int i = 0; i < num_detected; i++)
273 {
274 const BBoxRect& r = bbox_rects[i];
275 float score = r.score;
276 float* outptr = top_blob.row(i);
277
278 outptr[0] = static_cast<float>(r.label + 1); // +1 for prepend background class
279 outptr[1] = score;
280 outptr[2] = r.xmin;
281 outptr[3] = r.ymin;
282 outptr[4] = r.xmax;
283 outptr[5] = r.ymax;
284 }
285
286 return 0;
287 }
288
289 } // namespace ncnn
290