1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #include "net.h"
16
17 #include <opencv2/core/core.hpp>
18 #include <opencv2/highgui/highgui.hpp>
19 #include <opencv2/imgproc/imgproc.hpp>
20 #include <stdlib.h>
21 #include <float.h>
22 #include <stdio.h>
23 #include <vector>
24
25 struct Object
26 {
27 cv::Rect_<float> rect;
28 int label;
29 float prob;
30 };
31
intersection_area(const Object & a,const Object & b)32 static inline float intersection_area(const Object& a, const Object& b)
33 {
34 cv::Rect_<float> inter = a.rect & b.rect;
35 return inter.area();
36 }
37
qsort_descent_inplace(std::vector<Object> & faceobjects,int left,int right)38 static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
39 {
40 int i = left;
41 int j = right;
42 float p = faceobjects[(left + right) / 2].prob;
43
44 while (i <= j)
45 {
46 while (faceobjects[i].prob > p)
47 i++;
48
49 while (faceobjects[j].prob < p)
50 j--;
51
52 if (i <= j)
53 {
54 // swap
55 std::swap(faceobjects[i], faceobjects[j]);
56
57 i++;
58 j--;
59 }
60 }
61
62 #pragma omp parallel sections
63 {
64 #pragma omp section
65 {
66 if (left < j) qsort_descent_inplace(faceobjects, left, j);
67 }
68 #pragma omp section
69 {
70 if (i < right) qsort_descent_inplace(faceobjects, i, right);
71 }
72 }
73 }
74
qsort_descent_inplace(std::vector<Object> & faceobjects)75 static void qsort_descent_inplace(std::vector<Object>& faceobjects)
76 {
77 if (faceobjects.empty())
78 return;
79
80 qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
81 }
82
nms_sorted_bboxes(const std::vector<Object> & faceobjects,std::vector<int> & picked,float nms_threshold)83 static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
84 {
85 picked.clear();
86
87 const int n = faceobjects.size();
88
89 std::vector<float> areas(n);
90 for (int i = 0; i < n; i++)
91 {
92 areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height;
93 }
94
95 for (int i = 0; i < n; i++)
96 {
97 const Object& a = faceobjects[i];
98
99 int keep = 1;
100 for (int j = 0; j < (int)picked.size(); j++)
101 {
102 const Object& b = faceobjects[picked[j]];
103
104 // intersection over union
105 float inter_area = intersection_area(a, b);
106 float union_area = areas[i] + areas[picked[j]] - inter_area;
107 // float IoU = inter_area / union_area
108 if (inter_area / union_area > nms_threshold)
109 keep = 0;
110 }
111
112 if (keep)
113 picked.push_back(i);
114 }
115 }
116
generate_proposals(const ncnn::Mat & cls_pred,const ncnn::Mat & dis_pred,int stride,const ncnn::Mat & in_pad,float prob_threshold,std::vector<Object> & objects)117 static void generate_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
118 {
119 const int num_grid = cls_pred.h;
120
121 int num_grid_x;
122 int num_grid_y;
123 if (in_pad.w > in_pad.h)
124 {
125 num_grid_x = in_pad.w / stride;
126 num_grid_y = num_grid / num_grid_x;
127 }
128 else
129 {
130 num_grid_y = in_pad.h / stride;
131 num_grid_x = num_grid / num_grid_y;
132 }
133
134 const int num_class = cls_pred.w;
135 const int reg_max_1 = dis_pred.w / 4;
136
137 for (int i = 0; i < num_grid_y; i++)
138 {
139 for (int j = 0; j < num_grid_x; j++)
140 {
141 const int idx = i * num_grid_x + j;
142
143 const float* scores = cls_pred.row(idx);
144
145 // find label with max score
146 int label = -1;
147 float score = -FLT_MAX;
148 for (int k = 0; k < num_class; k++)
149 {
150 if (scores[k] > score)
151 {
152 label = k;
153 score = scores[k];
154 }
155 }
156
157 if (score >= prob_threshold)
158 {
159 ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
160 {
161 ncnn::Layer* softmax = ncnn::create_layer("Softmax");
162
163 ncnn::ParamDict pd;
164 pd.set(0, 1); // axis
165 pd.set(1, 1);
166 softmax->load_param(pd);
167
168 ncnn::Option opt;
169 opt.num_threads = 1;
170 opt.use_packing_layout = false;
171
172 softmax->create_pipeline(opt);
173
174 softmax->forward_inplace(bbox_pred, opt);
175
176 softmax->destroy_pipeline(opt);
177
178 delete softmax;
179 }
180
181 float dis_pred[4];
182 for (int k = 0; k < 4; k++)
183 {
184 float dis = 0.f;
185 const float* dis_after_sm = bbox_pred.row(k);
186 for (int l = 0; l < reg_max_1; l++)
187 {
188 dis += l * dis_after_sm[l];
189 }
190
191 dis_pred[k] = dis * stride;
192 }
193
194 float pb_cx = (j + 0.5f) * stride;
195 float pb_cy = (i + 0.5f) * stride;
196
197 float x0 = pb_cx - dis_pred[0];
198 float y0 = pb_cy - dis_pred[1];
199 float x1 = pb_cx + dis_pred[2];
200 float y1 = pb_cy + dis_pred[3];
201
202 Object obj;
203 obj.rect.x = x0;
204 obj.rect.y = y0;
205 obj.rect.width = x1 - x0;
206 obj.rect.height = y1 - y0;
207 obj.label = label;
208 obj.prob = score;
209
210 objects.push_back(obj);
211 }
212 }
213 }
214 }
215
detect_nanodet(const cv::Mat & bgr,std::vector<Object> & objects)216 static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects)
217 {
218 ncnn::Net nanodet;
219
220 nanodet.opt.use_vulkan_compute = true;
221 // nanodet.opt.use_bf16_storage = true;
222
223 // original pretrained model from https://github.com/RangiLyu/nanodet
224 // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
225 nanodet.load_param("nanodet_m.param");
226 nanodet.load_model("nanodet_m.bin");
227
228 int width = bgr.cols;
229 int height = bgr.rows;
230
231 const int target_size = 320;
232 const float prob_threshold = 0.4f;
233 const float nms_threshold = 0.5f;
234
235 // pad to multiple of 32
236 int w = width;
237 int h = height;
238 float scale = 1.f;
239 if (w > h)
240 {
241 scale = (float)target_size / w;
242 w = target_size;
243 h = h * scale;
244 }
245 else
246 {
247 scale = (float)target_size / h;
248 h = target_size;
249 w = w * scale;
250 }
251
252 ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, width, height, w, h);
253
254 // pad to target_size rectangle
255 int wpad = (w + 31) / 32 * 32 - w;
256 int hpad = (h + 31) / 32 * 32 - h;
257 ncnn::Mat in_pad;
258 ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
259
260 const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
261 const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
262 in_pad.substract_mean_normalize(mean_vals, norm_vals);
263
264 ncnn::Extractor ex = nanodet.create_extractor();
265
266 ex.input("input.1", in_pad);
267
268 std::vector<Object> proposals;
269
270 // stride 8
271 {
272 ncnn::Mat cls_pred;
273 ncnn::Mat dis_pred;
274 ex.extract("792", cls_pred);
275 ex.extract("795", dis_pred);
276
277 std::vector<Object> objects8;
278 generate_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
279
280 proposals.insert(proposals.end(), objects8.begin(), objects8.end());
281 }
282
283 // stride 16
284 {
285 ncnn::Mat cls_pred;
286 ncnn::Mat dis_pred;
287 ex.extract("814", cls_pred);
288 ex.extract("817", dis_pred);
289
290 std::vector<Object> objects16;
291 generate_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
292
293 proposals.insert(proposals.end(), objects16.begin(), objects16.end());
294 }
295
296 // stride 32
297 {
298 ncnn::Mat cls_pred;
299 ncnn::Mat dis_pred;
300 ex.extract("836", cls_pred);
301 ex.extract("839", dis_pred);
302
303 std::vector<Object> objects32;
304 generate_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
305
306 proposals.insert(proposals.end(), objects32.begin(), objects32.end());
307 }
308
309 // sort all proposals by score from highest to lowest
310 qsort_descent_inplace(proposals);
311
312 // apply nms with nms_threshold
313 std::vector<int> picked;
314 nms_sorted_bboxes(proposals, picked, nms_threshold);
315
316 int count = picked.size();
317
318 objects.resize(count);
319 for (int i = 0; i < count; i++)
320 {
321 objects[i] = proposals[picked[i]];
322
323 // adjust offset to original unpadded
324 float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
325 float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
326 float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
327 float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
328
329 // clip
330 x0 = std::max(std::min(x0, (float)(width - 1)), 0.f);
331 y0 = std::max(std::min(y0, (float)(height - 1)), 0.f);
332 x1 = std::max(std::min(x1, (float)(width - 1)), 0.f);
333 y1 = std::max(std::min(y1, (float)(height - 1)), 0.f);
334
335 objects[i].rect.x = x0;
336 objects[i].rect.y = y0;
337 objects[i].rect.width = x1 - x0;
338 objects[i].rect.height = y1 - y0;
339 }
340
341 return 0;
342 }
343
draw_objects(const cv::Mat & bgr,const std::vector<Object> & objects)344 static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
345 {
346 static const char* class_names[] = {
347 "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
348 "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
349 "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
350 "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
351 "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
352 "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
353 "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
354 "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
355 "hair drier", "toothbrush"
356 };
357
358 cv::Mat image = bgr.clone();
359
360 for (size_t i = 0; i < objects.size(); i++)
361 {
362 const Object& obj = objects[i];
363
364 fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
365 obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
366
367 cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
368
369 char text[256];
370 sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
371
372 int baseLine = 0;
373 cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
374
375 int x = obj.rect.x;
376 int y = obj.rect.y - label_size.height - baseLine;
377 if (y < 0)
378 y = 0;
379 if (x + label_size.width > image.cols)
380 x = image.cols - label_size.width;
381
382 cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
383 cv::Scalar(255, 255, 255), -1);
384
385 cv::putText(image, text, cv::Point(x, y + label_size.height),
386 cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
387 }
388
389 cv::imshow("image", image);
390 cv::waitKey(0);
391 }
392
main(int argc,char ** argv)393 int main(int argc, char** argv)
394 {
395 if (argc != 2)
396 {
397 fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
398 return -1;
399 }
400
401 const char* imagepath = argv[1];
402
403 cv::Mat m = cv::imread(imagepath, 1);
404 if (m.empty())
405 {
406 fprintf(stderr, "cv::imread %s failed\n", imagepath);
407 return -1;
408 }
409
410 std::vector<Object> objects;
411 detect_nanodet(m, objects);
412
413 draw_objects(m, objects);
414
415 return 0;
416 }
417