1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #include "net.h"
16 
17 #include <opencv2/core/core.hpp>
18 #include <opencv2/highgui/highgui.hpp>
19 #include <opencv2/imgproc/imgproc.hpp>
20 
21 #if CV_MAJOR_VERSION >= 3
22 #include <opencv2/videoio/videoio.hpp>
23 #endif
24 
25 #include <vector>
26 
27 #include <stdio.h>
28 
29 #define NCNN_PROFILING
30 #define YOLOV4_TINY //Using yolov4_tiny, if undef, using original yolov4
31 
32 #ifdef NCNN_PROFILING
33 #include "benchmark.h"
34 #endif
35 
36 struct Object
37 {
38     cv::Rect_<float> rect;
39     int label;
40     float prob;
41 };
42 
init_yolov4(ncnn::Net * yolov4,int * target_size)43 static int init_yolov4(ncnn::Net* yolov4, int* target_size)
44 {
45     /* --> Set the params you need for the ncnn inference <-- */
46 
47     yolov4->opt.num_threads = 4; //You need to compile with libgomp for multi thread support
48 
49     yolov4->opt.use_vulkan_compute = true; //You need to compile with libvulkan for gpu support
50 
51     yolov4->opt.use_winograd_convolution = true;
52     yolov4->opt.use_sgemm_convolution = true;
53     yolov4->opt.use_fp16_packed = true;
54     yolov4->opt.use_fp16_storage = true;
55     yolov4->opt.use_fp16_arithmetic = true;
56     yolov4->opt.use_packing_layout = true;
57     yolov4->opt.use_shader_pack8 = false;
58     yolov4->opt.use_image_storage = false;
59 
60     /* --> End of setting params <-- */
61     int ret = 0;
62 
63     // original pretrained model from https://github.com/AlexeyAB/darknet
64     // the ncnn model https://drive.google.com/drive/folders/1YzILvh0SKQPS_lrb33dmGNq7aVTKPWS0?usp=sharing
65     // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
66 #ifdef YOLOV4_TINY
67     const char* yolov4_param = "yolov4-tiny-opt.param";
68     const char* yolov4_model = "yolov4-tiny-opt.bin";
69     *target_size = 416;
70 #else
71     const char* yolov4_param = "yolov4-opt.param";
72     const char* yolov4_model = "yolov4-opt.bin";
73     *target_size = 608;
74 #endif
75 
76     ret = yolov4->load_param(yolov4_param);
77     if (ret != 0)
78     {
79         return ret;
80     }
81 
82     ret = yolov4->load_model(yolov4_model);
83     if (ret != 0)
84     {
85         return ret;
86     }
87 
88     return 0;
89 }
90 
detect_yolov4(const cv::Mat & bgr,std::vector<Object> & objects,int target_size,ncnn::Net * yolov4)91 static int detect_yolov4(const cv::Mat& bgr, std::vector<Object>& objects, int target_size, ncnn::Net* yolov4)
92 {
93     int img_w = bgr.cols;
94     int img_h = bgr.rows;
95 
96     ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_size, target_size);
97 
98     const float mean_vals[3] = {0, 0, 0};
99     const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
100     in.substract_mean_normalize(mean_vals, norm_vals);
101 
102     ncnn::Extractor ex = yolov4->create_extractor();
103 
104     ex.input("data", in);
105 
106     ncnn::Mat out;
107     ex.extract("output", out);
108 
109     objects.clear();
110     for (int i = 0; i < out.h; i++)
111     {
112         const float* values = out.row(i);
113 
114         Object object;
115         object.label = values[0];
116         object.prob = values[1];
117         object.rect.x = values[2] * img_w;
118         object.rect.y = values[3] * img_h;
119         object.rect.width = values[4] * img_w - object.rect.x;
120         object.rect.height = values[5] * img_h - object.rect.y;
121 
122         objects.push_back(object);
123     }
124 
125     return 0;
126 }
127 
draw_objects(const cv::Mat & bgr,const std::vector<Object> & objects,int is_streaming)128 static int draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, int is_streaming)
129 {
130     static const char* class_names[] = {"background", "person", "bicycle",
131                                         "car", "motorbike", "aeroplane", "bus", "train", "truck",
132                                         "boat", "traffic light", "fire hydrant", "stop sign",
133                                         "parking meter", "bench", "bird", "cat", "dog", "horse",
134                                         "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
135                                         "backpack", "umbrella", "handbag", "tie", "suitcase",
136                                         "frisbee", "skis", "snowboard", "sports ball", "kite",
137                                         "baseball bat", "baseball glove", "skateboard", "surfboard",
138                                         "tennis racket", "bottle", "wine glass", "cup", "fork",
139                                         "knife", "spoon", "bowl", "banana", "apple", "sandwich",
140                                         "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
141                                         "cake", "chair", "sofa", "pottedplant", "bed", "diningtable",
142                                         "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard",
143                                         "cell phone", "microwave", "oven", "toaster", "sink",
144                                         "refrigerator", "book", "clock", "vase", "scissors",
145                                         "teddy bear", "hair drier", "toothbrush"
146                                        };
147 
148     cv::Mat image = bgr.clone();
149 
150     for (size_t i = 0; i < objects.size(); i++)
151     {
152         const Object& obj = objects[i];
153 
154         fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
155                 obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
156 
157         cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
158 
159         char text[256];
160         sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
161 
162         int baseLine = 0;
163         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
164 
165         int x = obj.rect.x;
166         int y = obj.rect.y - label_size.height - baseLine;
167         if (y < 0)
168             y = 0;
169         if (x + label_size.width > image.cols)
170             x = image.cols - label_size.width;
171 
172         cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
173                       cv::Scalar(255, 255, 255), -1);
174 
175         cv::putText(image, text, cv::Point(x, y + label_size.height),
176                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
177     }
178 
179     cv::imshow("image", image);
180 
181     if (is_streaming)
182     {
183         cv::waitKey(1);
184     }
185     else
186     {
187         cv::waitKey(0);
188     }
189 
190     return 0;
191 }
192 
main(int argc,char ** argv)193 int main(int argc, char** argv)
194 {
195     cv::Mat frame;
196     std::vector<Object> objects;
197 
198     cv::VideoCapture cap;
199 
200     ncnn::Net yolov4;
201 
202     const char* devicepath;
203 
204     int target_size = 0;
205     int is_streaming = 0;
206 
207     if (argc < 2)
208     {
209         fprintf(stderr, "Usage: %s [v4l input device or image]\n", argv[0]);
210         return -1;
211     }
212 
213     devicepath = argv[1];
214 
215 #ifdef NCNN_PROFILING
216     double t_load_start = ncnn::get_current_time();
217 #endif
218 
219     int ret = init_yolov4(&yolov4, &target_size); //We load model and param first!
220     if (ret != 0)
221     {
222         fprintf(stderr, "Failed to load model or param, error %d", ret);
223         return -1;
224     }
225 
226 #ifdef NCNN_PROFILING
227     double t_load_end = ncnn::get_current_time();
228     fprintf(stdout, "NCNN Init time %.02lfms\n", t_load_end - t_load_start);
229 #endif
230 
231     if (strstr(devicepath, "/dev/video") == NULL)
232     {
233         frame = cv::imread(argv[1], 1);
234         if (frame.empty())
235         {
236             fprintf(stderr, "Failed to read image %s.\n", argv[1]);
237             return -1;
238         }
239     }
240     else
241     {
242         cap.open(devicepath);
243 
244         if (!cap.isOpened())
245         {
246             fprintf(stderr, "Failed to open %s", devicepath);
247             return -1;
248         }
249 
250         cap >> frame;
251 
252         if (frame.empty())
253         {
254             fprintf(stderr, "Failed to read from device %s.\n", devicepath);
255             return -1;
256         }
257 
258         is_streaming = 1;
259     }
260 
261     while (1)
262     {
263         if (is_streaming)
264         {
265 #ifdef NCNN_PROFILING
266             double t_capture_start = ncnn::get_current_time();
267 #endif
268 
269             cap >> frame;
270 
271 #ifdef NCNN_PROFILING
272             double t_capture_end = ncnn::get_current_time();
273             fprintf(stdout, "NCNN OpenCV capture time %.02lfms\n", t_capture_end - t_capture_start);
274 #endif
275             if (frame.empty())
276             {
277                 fprintf(stderr, "OpenCV Failed to Capture from device %s\n", devicepath);
278                 return -1;
279             }
280         }
281 
282 #ifdef NCNN_PROFILING
283         double t_detect_start = ncnn::get_current_time();
284 #endif
285 
286         detect_yolov4(frame, objects, target_size, &yolov4); //Create an extractor and run detection
287 
288 #ifdef NCNN_PROFILING
289         double t_detect_end = ncnn::get_current_time();
290         fprintf(stdout, "NCNN detection time %.02lfms\n", t_detect_end - t_detect_start);
291 #endif
292 
293 #ifdef NCNN_PROFILING
294         double t_draw_start = ncnn::get_current_time();
295 #endif
296 
297         draw_objects(frame, objects, is_streaming); //Draw detection results on opencv image
298 
299 #ifdef NCNN_PROFILING
300         double t_draw_end = ncnn::get_current_time();
301         fprintf(stdout, "NCNN OpenCV draw result time %.02lfms\n", t_draw_end - t_draw_start);
302 #endif
303 
304         if (!is_streaming)
305         {   //If it is a still image, exit!
306             return 0;
307         }
308     }
309 
310     return 0;
311 }
312