1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #include "net.h"
16 
17 #include <opencv2/core/core.hpp>
18 #include <opencv2/highgui/highgui.hpp>
19 #include <opencv2/imgproc/imgproc.hpp>
20 #include <stdlib.h>
21 #include <float.h>
22 #include <stdio.h>
23 #include <vector>
24 
25 struct Object
26 {
27     cv::Rect_<float> rect;
28     int label;
29     float prob;
30 };
31 
intersection_area(const Object & a,const Object & b)32 static inline float intersection_area(const Object& a, const Object& b)
33 {
34     cv::Rect_<float> inter = a.rect & b.rect;
35     return inter.area();
36 }
37 
qsort_descent_inplace(std::vector<Object> & faceobjects,int left,int right)38 static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
39 {
40     int i = left;
41     int j = right;
42     float p = faceobjects[(left + right) / 2].prob;
43 
44     while (i <= j)
45     {
46         while (faceobjects[i].prob > p)
47             i++;
48 
49         while (faceobjects[j].prob < p)
50             j--;
51 
52         if (i <= j)
53         {
54             // swap
55             std::swap(faceobjects[i], faceobjects[j]);
56 
57             i++;
58             j--;
59         }
60     }
61 
62     #pragma omp parallel sections
63     {
64         #pragma omp section
65         {
66             if (left < j) qsort_descent_inplace(faceobjects, left, j);
67         }
68         #pragma omp section
69         {
70             if (i < right) qsort_descent_inplace(faceobjects, i, right);
71         }
72     }
73 }
74 
qsort_descent_inplace(std::vector<Object> & faceobjects)75 static void qsort_descent_inplace(std::vector<Object>& faceobjects)
76 {
77     if (faceobjects.empty())
78         return;
79 
80     qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
81 }
82 
nms_sorted_bboxes(const std::vector<Object> & faceobjects,std::vector<int> & picked,float nms_threshold)83 static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
84 {
85     picked.clear();
86 
87     const int n = faceobjects.size();
88 
89     std::vector<float> areas(n);
90     for (int i = 0; i < n; i++)
91     {
92         areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height;
93     }
94 
95     for (int i = 0; i < n; i++)
96     {
97         const Object& a = faceobjects[i];
98 
99         int keep = 1;
100         for (int j = 0; j < (int)picked.size(); j++)
101         {
102             const Object& b = faceobjects[picked[j]];
103 
104             // intersection over union
105             float inter_area = intersection_area(a, b);
106             float union_area = areas[i] + areas[picked[j]] - inter_area;
107             // float IoU = inter_area / union_area
108             if (inter_area / union_area > nms_threshold)
109                 keep = 0;
110         }
111 
112         if (keep)
113             picked.push_back(i);
114     }
115 }
116 
generate_proposals(const ncnn::Mat & cls_pred,const ncnn::Mat & dis_pred,int stride,const ncnn::Mat & in_pad,float prob_threshold,std::vector<Object> & objects)117 static void generate_proposals(const ncnn::Mat& cls_pred, const ncnn::Mat& dis_pred, int stride, const ncnn::Mat& in_pad, float prob_threshold, std::vector<Object>& objects)
118 {
119     const int num_grid = cls_pred.h;
120 
121     int num_grid_x;
122     int num_grid_y;
123     if (in_pad.w > in_pad.h)
124     {
125         num_grid_x = in_pad.w / stride;
126         num_grid_y = num_grid / num_grid_x;
127     }
128     else
129     {
130         num_grid_y = in_pad.h / stride;
131         num_grid_x = num_grid / num_grid_y;
132     }
133 
134     const int num_class = cls_pred.w;
135     const int reg_max_1 = dis_pred.w / 4;
136 
137     for (int i = 0; i < num_grid_y; i++)
138     {
139         for (int j = 0; j < num_grid_x; j++)
140         {
141             const int idx = i * num_grid_x + j;
142 
143             const float* scores = cls_pred.row(idx);
144 
145             // find label with max score
146             int label = -1;
147             float score = -FLT_MAX;
148             for (int k = 0; k < num_class; k++)
149             {
150                 if (scores[k] > score)
151                 {
152                     label = k;
153                     score = scores[k];
154                 }
155             }
156 
157             if (score >= prob_threshold)
158             {
159                 ncnn::Mat bbox_pred(reg_max_1, 4, (void*)dis_pred.row(idx));
160                 {
161                     ncnn::Layer* softmax = ncnn::create_layer("Softmax");
162 
163                     ncnn::ParamDict pd;
164                     pd.set(0, 1); // axis
165                     pd.set(1, 1);
166                     softmax->load_param(pd);
167 
168                     ncnn::Option opt;
169                     opt.num_threads = 1;
170                     opt.use_packing_layout = false;
171 
172                     softmax->create_pipeline(opt);
173 
174                     softmax->forward_inplace(bbox_pred, opt);
175 
176                     softmax->destroy_pipeline(opt);
177 
178                     delete softmax;
179                 }
180 
181                 float dis_pred[4];
182                 for (int k = 0; k < 4; k++)
183                 {
184                     float dis = 0.f;
185                     const float* dis_after_sm = bbox_pred.row(k);
186                     for (int l = 0; l < reg_max_1; l++)
187                     {
188                         dis += l * dis_after_sm[l];
189                     }
190 
191                     dis_pred[k] = dis * stride;
192                 }
193 
194                 float pb_cx = (j + 0.5f) * stride;
195                 float pb_cy = (i + 0.5f) * stride;
196 
197                 float x0 = pb_cx - dis_pred[0];
198                 float y0 = pb_cy - dis_pred[1];
199                 float x1 = pb_cx + dis_pred[2];
200                 float y1 = pb_cy + dis_pred[3];
201 
202                 Object obj;
203                 obj.rect.x = x0;
204                 obj.rect.y = y0;
205                 obj.rect.width = x1 - x0;
206                 obj.rect.height = y1 - y0;
207                 obj.label = label;
208                 obj.prob = score;
209 
210                 objects.push_back(obj);
211             }
212         }
213     }
214 }
215 
detect_nanodet(const cv::Mat & bgr,std::vector<Object> & objects)216 static int detect_nanodet(const cv::Mat& bgr, std::vector<Object>& objects)
217 {
218     ncnn::Net nanodet;
219 
220     nanodet.opt.use_vulkan_compute = true;
221     // nanodet.opt.use_bf16_storage = true;
222 
223     // original pretrained model from https://github.com/RangiLyu/nanodet
224     // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
225     nanodet.load_param("nanodet_m.param");
226     nanodet.load_model("nanodet_m.bin");
227 
228     int width = bgr.cols;
229     int height = bgr.rows;
230 
231     const int target_size = 320;
232     const float prob_threshold = 0.4f;
233     const float nms_threshold = 0.5f;
234 
235     // pad to multiple of 32
236     int w = width;
237     int h = height;
238     float scale = 1.f;
239     if (w > h)
240     {
241         scale = (float)target_size / w;
242         w = target_size;
243         h = h * scale;
244     }
245     else
246     {
247         scale = (float)target_size / h;
248         h = target_size;
249         w = w * scale;
250     }
251 
252     ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, width, height, w, h);
253 
254     // pad to target_size rectangle
255     int wpad = (w + 31) / 32 * 32 - w;
256     int hpad = (h + 31) / 32 * 32 - h;
257     ncnn::Mat in_pad;
258     ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
259 
260     const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
261     const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
262     in_pad.substract_mean_normalize(mean_vals, norm_vals);
263 
264     ncnn::Extractor ex = nanodet.create_extractor();
265 
266     ex.input("input.1", in_pad);
267 
268     std::vector<Object> proposals;
269 
270     // stride 8
271     {
272         ncnn::Mat cls_pred;
273         ncnn::Mat dis_pred;
274         ex.extract("792", cls_pred);
275         ex.extract("795", dis_pred);
276 
277         std::vector<Object> objects8;
278         generate_proposals(cls_pred, dis_pred, 8, in_pad, prob_threshold, objects8);
279 
280         proposals.insert(proposals.end(), objects8.begin(), objects8.end());
281     }
282 
283     // stride 16
284     {
285         ncnn::Mat cls_pred;
286         ncnn::Mat dis_pred;
287         ex.extract("814", cls_pred);
288         ex.extract("817", dis_pred);
289 
290         std::vector<Object> objects16;
291         generate_proposals(cls_pred, dis_pred, 16, in_pad, prob_threshold, objects16);
292 
293         proposals.insert(proposals.end(), objects16.begin(), objects16.end());
294     }
295 
296     // stride 32
297     {
298         ncnn::Mat cls_pred;
299         ncnn::Mat dis_pred;
300         ex.extract("836", cls_pred);
301         ex.extract("839", dis_pred);
302 
303         std::vector<Object> objects32;
304         generate_proposals(cls_pred, dis_pred, 32, in_pad, prob_threshold, objects32);
305 
306         proposals.insert(proposals.end(), objects32.begin(), objects32.end());
307     }
308 
309     // sort all proposals by score from highest to lowest
310     qsort_descent_inplace(proposals);
311 
312     // apply nms with nms_threshold
313     std::vector<int> picked;
314     nms_sorted_bboxes(proposals, picked, nms_threshold);
315 
316     int count = picked.size();
317 
318     objects.resize(count);
319     for (int i = 0; i < count; i++)
320     {
321         objects[i] = proposals[picked[i]];
322 
323         // adjust offset to original unpadded
324         float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
325         float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
326         float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
327         float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
328 
329         // clip
330         x0 = std::max(std::min(x0, (float)(width - 1)), 0.f);
331         y0 = std::max(std::min(y0, (float)(height - 1)), 0.f);
332         x1 = std::max(std::min(x1, (float)(width - 1)), 0.f);
333         y1 = std::max(std::min(y1, (float)(height - 1)), 0.f);
334 
335         objects[i].rect.x = x0;
336         objects[i].rect.y = y0;
337         objects[i].rect.width = x1 - x0;
338         objects[i].rect.height = y1 - y0;
339     }
340 
341     return 0;
342 }
343 
draw_objects(const cv::Mat & bgr,const std::vector<Object> & objects)344 static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
345 {
346     static const char* class_names[] = {
347         "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
348         "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
349         "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
350         "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
351         "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
352         "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
353         "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
354         "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
355         "hair drier", "toothbrush"
356     };
357 
358     cv::Mat image = bgr.clone();
359 
360     for (size_t i = 0; i < objects.size(); i++)
361     {
362         const Object& obj = objects[i];
363 
364         fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
365                 obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
366 
367         cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
368 
369         char text[256];
370         sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
371 
372         int baseLine = 0;
373         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
374 
375         int x = obj.rect.x;
376         int y = obj.rect.y - label_size.height - baseLine;
377         if (y < 0)
378             y = 0;
379         if (x + label_size.width > image.cols)
380             x = image.cols - label_size.width;
381 
382         cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
383                       cv::Scalar(255, 255, 255), -1);
384 
385         cv::putText(image, text, cv::Point(x, y + label_size.height),
386                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
387     }
388 
389     cv::imshow("image", image);
390     cv::waitKey(0);
391 }
392 
main(int argc,char ** argv)393 int main(int argc, char** argv)
394 {
395     if (argc != 2)
396     {
397         fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
398         return -1;
399     }
400 
401     const char* imagepath = argv[1];
402 
403     cv::Mat m = cv::imread(imagepath, 1);
404     if (m.empty())
405     {
406         fprintf(stderr, "cv::imread %s failed\n", imagepath);
407         return -1;
408     }
409 
410     std::vector<Object> objects;
411     detect_nanodet(m, objects);
412 
413     draw_objects(m, objects);
414 
415     return 0;
416 }
417