1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #include "yolodetectionoutput.h"
16 
17 #include "layer_type.h"
18 
19 #include <math.h>
20 
21 namespace ncnn {
22 
YoloDetectionOutput()23 YoloDetectionOutput::YoloDetectionOutput()
24 {
25     one_blob_only = false;
26     support_inplace = true;
27 }
28 
load_param(const ParamDict & pd)29 int YoloDetectionOutput::load_param(const ParamDict& pd)
30 {
31     num_class = pd.get(0, 20);
32     num_box = pd.get(1, 5);
33     confidence_threshold = pd.get(2, 0.01f);
34     nms_threshold = pd.get(3, 0.45f);
35     biases = pd.get(4, Mat());
36 
37     return 0;
38 }
39 
create_pipeline(const Option & opt)40 int YoloDetectionOutput::create_pipeline(const Option& opt)
41 {
42     {
43         softmax = ncnn::create_layer(ncnn::LayerType::Softmax);
44 
45         ncnn::ParamDict pd;
46         pd.set(0, 0); // axis
47 
48         softmax->load_param(pd);
49 
50         softmax->create_pipeline(opt);
51     }
52 
53     return 0;
54 }
55 
destroy_pipeline(const Option & opt)56 int YoloDetectionOutput::destroy_pipeline(const Option& opt)
57 {
58     if (softmax)
59     {
60         softmax->destroy_pipeline(opt);
61         delete softmax;
62         softmax = 0;
63     }
64 
65     return 0;
66 }
67 
68 struct BBoxRect
69 {
70     float xmin;
71     float ymin;
72     float xmax;
73     float ymax;
74     int label;
75 };
76 
intersection_area(const BBoxRect & a,const BBoxRect & b)77 static inline float intersection_area(const BBoxRect& a, const BBoxRect& b)
78 {
79     if (a.xmin > b.xmax || a.xmax < b.xmin || a.ymin > b.ymax || a.ymax < b.ymin)
80     {
81         // no intersection
82         return 0.f;
83     }
84 
85     float inter_width = std::min(a.xmax, b.xmax) - std::max(a.xmin, b.xmin);
86     float inter_height = std::min(a.ymax, b.ymax) - std::max(a.ymin, b.ymin);
87 
88     return inter_width * inter_height;
89 }
90 
91 template<typename T>
qsort_descent_inplace(std::vector<T> & datas,std::vector<float> & scores,int left,int right)92 static void qsort_descent_inplace(std::vector<T>& datas, std::vector<float>& scores, int left, int right)
93 {
94     int i = left;
95     int j = right;
96     float p = scores[(left + right) / 2];
97 
98     while (i <= j)
99     {
100         while (scores[i] > p)
101             i++;
102 
103         while (scores[j] < p)
104             j--;
105 
106         if (i <= j)
107         {
108             // swap
109             std::swap(datas[i], datas[j]);
110             std::swap(scores[i], scores[j]);
111 
112             i++;
113             j--;
114         }
115     }
116 
117     if (left < j)
118         qsort_descent_inplace(datas, scores, left, j);
119 
120     if (i < right)
121         qsort_descent_inplace(datas, scores, i, right);
122 }
123 
124 template<typename T>
qsort_descent_inplace(std::vector<T> & datas,std::vector<float> & scores)125 static void qsort_descent_inplace(std::vector<T>& datas, std::vector<float>& scores)
126 {
127     if (datas.empty() || scores.empty())
128         return;
129 
130     qsort_descent_inplace(datas, scores, 0, static_cast<int>(scores.size() - 1));
131 }
132 
nms_sorted_bboxes(const std::vector<BBoxRect> & bboxes,std::vector<size_t> & picked,float nms_threshold)133 static void nms_sorted_bboxes(const std::vector<BBoxRect>& bboxes, std::vector<size_t>& picked, float nms_threshold)
134 {
135     picked.clear();
136 
137     const size_t n = bboxes.size();
138 
139     std::vector<float> areas(n);
140     for (size_t i = 0; i < n; i++)
141     {
142         const BBoxRect& r = bboxes[i];
143 
144         float width = r.xmax - r.xmin;
145         float height = r.ymax - r.ymin;
146 
147         areas[i] = width * height;
148     }
149 
150     for (size_t i = 0; i < n; i++)
151     {
152         const BBoxRect& a = bboxes[i];
153 
154         int keep = 1;
155         for (int j = 0; j < (int)picked.size(); j++)
156         {
157             const BBoxRect& b = bboxes[picked[j]];
158 
159             // intersection over union
160             float inter_area = intersection_area(a, b);
161             float union_area = areas[i] + areas[picked[j]] - inter_area;
162             //             float IoU = inter_area / union_area
163             if (inter_area / union_area > nms_threshold)
164                 keep = 0;
165         }
166 
167         if (keep)
168             picked.push_back(i);
169     }
170 }
171 
sigmoid(float x)172 static inline float sigmoid(float x)
173 {
174     return static_cast<float>(1.f / (1.f + exp(-x)));
175 }
176 
forward_inplace(std::vector<Mat> & bottom_top_blobs,const Option & opt) const177 int YoloDetectionOutput::forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt) const
178 {
179     // gather all box
180     std::vector<BBoxRect> all_bbox_rects;
181     std::vector<float> all_bbox_scores;
182 
183     for (size_t b = 0; b < bottom_top_blobs.size(); b++)
184     {
185         Mat& bottom_top_blob = bottom_top_blobs[b];
186 
187         int w = bottom_top_blob.w;
188         int h = bottom_top_blob.h;
189         int channels = bottom_top_blob.c;
190 
191         const int channels_per_box = channels / num_box;
192 
193         // anchor coord + box score + num_class
194         if (channels_per_box != 4 + 1 + num_class)
195             return -1;
196 
197         std::vector<std::vector<BBoxRect> > all_box_bbox_rects;
198         std::vector<std::vector<float> > all_box_bbox_scores;
199         all_box_bbox_rects.resize(num_box);
200         all_box_bbox_scores.resize(num_box);
201 
202         #pragma omp parallel for num_threads(opt.num_threads)
203         for (int pp = 0; pp < num_box; pp++)
204         {
205             int p = pp * channels_per_box;
206 
207             const float bias_w = biases[pp * 2];
208             const float bias_h = biases[pp * 2 + 1];
209 
210             const float* xptr = bottom_top_blob.channel(p);
211             const float* yptr = bottom_top_blob.channel(p + 1);
212             const float* wptr = bottom_top_blob.channel(p + 2);
213             const float* hptr = bottom_top_blob.channel(p + 3);
214 
215             const float* box_score_ptr = bottom_top_blob.channel(p + 4);
216 
217             // softmax class scores
218             Mat scores = bottom_top_blob.channel_range(p + 5, num_class);
219             softmax->forward_inplace(scores, opt);
220 
221             for (int i = 0; i < h; i++)
222             {
223                 for (int j = 0; j < w; j++)
224                 {
225                     // region box
226                     float bbox_cx = (j + sigmoid(xptr[0])) / w;
227                     float bbox_cy = (i + sigmoid(yptr[0])) / h;
228                     float bbox_w = static_cast<float>(exp(wptr[0]) * bias_w / w);
229                     float bbox_h = static_cast<float>(exp(hptr[0]) * bias_h / h);
230 
231                     float bbox_xmin = bbox_cx - bbox_w * 0.5f;
232                     float bbox_ymin = bbox_cy - bbox_h * 0.5f;
233                     float bbox_xmax = bbox_cx + bbox_w * 0.5f;
234                     float bbox_ymax = bbox_cy + bbox_h * 0.5f;
235 
236                     // box score
237                     float box_score = sigmoid(box_score_ptr[0]);
238 
239                     // find class index with max class score
240                     int class_index = 0;
241                     float class_score = 0.f;
242                     for (int q = 0; q < num_class; q++)
243                     {
244                         float score = scores.channel(q).row(i)[j];
245                         if (score > class_score)
246                         {
247                             class_index = q;
248                             class_score = score;
249                         }
250                     }
251 
252                     //                 NCNN_LOGE("%d %f %f", class_index, box_score, class_score);
253 
254                     float confidence = box_score * class_score;
255                     if (confidence >= confidence_threshold)
256                     {
257                         BBoxRect c = {bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index};
258                         all_box_bbox_rects[pp].push_back(c);
259                         all_box_bbox_scores[pp].push_back(confidence);
260                     }
261 
262                     xptr++;
263                     yptr++;
264                     wptr++;
265                     hptr++;
266 
267                     box_score_ptr++;
268                 }
269             }
270         }
271 
272         for (int i = 0; i < num_box; i++)
273         {
274             const std::vector<BBoxRect>& box_bbox_rects = all_box_bbox_rects[i];
275             const std::vector<float>& box_bbox_scores = all_box_bbox_scores[i];
276 
277             all_bbox_rects.insert(all_bbox_rects.end(), box_bbox_rects.begin(), box_bbox_rects.end());
278             all_bbox_scores.insert(all_bbox_scores.end(), box_bbox_scores.begin(), box_bbox_scores.end());
279         }
280     }
281 
282     // global sort inplace
283     qsort_descent_inplace(all_bbox_rects, all_bbox_scores);
284 
285     // apply nms
286     std::vector<size_t> picked;
287     nms_sorted_bboxes(all_bbox_rects, picked, nms_threshold);
288 
289     // select
290     std::vector<BBoxRect> bbox_rects;
291     std::vector<float> bbox_scores;
292 
293     for (size_t i = 0; i < picked.size(); i++)
294     {
295         size_t z = picked[i];
296         bbox_rects.push_back(all_bbox_rects[z]);
297         bbox_scores.push_back(all_bbox_scores[z]);
298     }
299 
300     // fill result
301     int num_detected = static_cast<int>(bbox_rects.size());
302     if (num_detected == 0)
303         return 0;
304 
305     Mat& top_blob = bottom_top_blobs[0];
306     top_blob.create(6, num_detected, 4u, opt.blob_allocator);
307     if (top_blob.empty())
308         return -100;
309 
310     for (int i = 0; i < num_detected; i++)
311     {
312         const BBoxRect& r = bbox_rects[i];
313         float score = bbox_scores[i];
314         float* outptr = top_blob.row(i);
315 
316         outptr[0] = static_cast<float>(r.label + 1); // +1 for prepend background class
317         outptr[1] = score;
318         outptr[2] = r.xmin;
319         outptr[3] = r.ymin;
320         outptr[4] = r.xmax;
321         outptr[5] = r.ymax;
322     }
323 
324     return 0;
325 }
326 
327 } // namespace ncnn
328