1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #include "net.h"
16
17 #include <opencv2/core/core.hpp>
18 #include <opencv2/highgui/highgui.hpp>
19 #include <opencv2/imgproc/imgproc.hpp>
20
21 #if CV_MAJOR_VERSION >= 3
22 #include <opencv2/videoio/videoio.hpp>
23 #endif
24
25 #include <vector>
26
27 #include <stdio.h>
28
29 #define NCNN_PROFILING
30 #define YOLOV4_TINY //Using yolov4_tiny, if undef, using original yolov4
31
32 #ifdef NCNN_PROFILING
33 #include "benchmark.h"
34 #endif
35
36 struct Object
37 {
38 cv::Rect_<float> rect;
39 int label;
40 float prob;
41 };
42
init_yolov4(ncnn::Net * yolov4,int * target_size)43 static int init_yolov4(ncnn::Net* yolov4, int* target_size)
44 {
45 /* --> Set the params you need for the ncnn inference <-- */
46
47 yolov4->opt.num_threads = 4; //You need to compile with libgomp for multi thread support
48
49 yolov4->opt.use_vulkan_compute = true; //You need to compile with libvulkan for gpu support
50
51 yolov4->opt.use_winograd_convolution = true;
52 yolov4->opt.use_sgemm_convolution = true;
53 yolov4->opt.use_fp16_packed = true;
54 yolov4->opt.use_fp16_storage = true;
55 yolov4->opt.use_fp16_arithmetic = true;
56 yolov4->opt.use_packing_layout = true;
57 yolov4->opt.use_shader_pack8 = false;
58 yolov4->opt.use_image_storage = false;
59
60 /* --> End of setting params <-- */
61 int ret = 0;
62
63 // original pretrained model from https://github.com/AlexeyAB/darknet
64 // the ncnn model https://drive.google.com/drive/folders/1YzILvh0SKQPS_lrb33dmGNq7aVTKPWS0?usp=sharing
65 // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
66 #ifdef YOLOV4_TINY
67 const char* yolov4_param = "yolov4-tiny-opt.param";
68 const char* yolov4_model = "yolov4-tiny-opt.bin";
69 *target_size = 416;
70 #else
71 const char* yolov4_param = "yolov4-opt.param";
72 const char* yolov4_model = "yolov4-opt.bin";
73 *target_size = 608;
74 #endif
75
76 ret = yolov4->load_param(yolov4_param);
77 if (ret != 0)
78 {
79 return ret;
80 }
81
82 ret = yolov4->load_model(yolov4_model);
83 if (ret != 0)
84 {
85 return ret;
86 }
87
88 return 0;
89 }
90
detect_yolov4(const cv::Mat & bgr,std::vector<Object> & objects,int target_size,ncnn::Net * yolov4)91 static int detect_yolov4(const cv::Mat& bgr, std::vector<Object>& objects, int target_size, ncnn::Net* yolov4)
92 {
93 int img_w = bgr.cols;
94 int img_h = bgr.rows;
95
96 ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_size, target_size);
97
98 const float mean_vals[3] = {0, 0, 0};
99 const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
100 in.substract_mean_normalize(mean_vals, norm_vals);
101
102 ncnn::Extractor ex = yolov4->create_extractor();
103
104 ex.input("data", in);
105
106 ncnn::Mat out;
107 ex.extract("output", out);
108
109 objects.clear();
110 for (int i = 0; i < out.h; i++)
111 {
112 const float* values = out.row(i);
113
114 Object object;
115 object.label = values[0];
116 object.prob = values[1];
117 object.rect.x = values[2] * img_w;
118 object.rect.y = values[3] * img_h;
119 object.rect.width = values[4] * img_w - object.rect.x;
120 object.rect.height = values[5] * img_h - object.rect.y;
121
122 objects.push_back(object);
123 }
124
125 return 0;
126 }
127
draw_objects(const cv::Mat & bgr,const std::vector<Object> & objects,int is_streaming)128 static int draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, int is_streaming)
129 {
130 static const char* class_names[] = {"background", "person", "bicycle",
131 "car", "motorbike", "aeroplane", "bus", "train", "truck",
132 "boat", "traffic light", "fire hydrant", "stop sign",
133 "parking meter", "bench", "bird", "cat", "dog", "horse",
134 "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
135 "backpack", "umbrella", "handbag", "tie", "suitcase",
136 "frisbee", "skis", "snowboard", "sports ball", "kite",
137 "baseball bat", "baseball glove", "skateboard", "surfboard",
138 "tennis racket", "bottle", "wine glass", "cup", "fork",
139 "knife", "spoon", "bowl", "banana", "apple", "sandwich",
140 "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
141 "cake", "chair", "sofa", "pottedplant", "bed", "diningtable",
142 "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard",
143 "cell phone", "microwave", "oven", "toaster", "sink",
144 "refrigerator", "book", "clock", "vase", "scissors",
145 "teddy bear", "hair drier", "toothbrush"
146 };
147
148 cv::Mat image = bgr.clone();
149
150 for (size_t i = 0; i < objects.size(); i++)
151 {
152 const Object& obj = objects[i];
153
154 fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
155 obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
156
157 cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
158
159 char text[256];
160 sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
161
162 int baseLine = 0;
163 cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
164
165 int x = obj.rect.x;
166 int y = obj.rect.y - label_size.height - baseLine;
167 if (y < 0)
168 y = 0;
169 if (x + label_size.width > image.cols)
170 x = image.cols - label_size.width;
171
172 cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
173 cv::Scalar(255, 255, 255), -1);
174
175 cv::putText(image, text, cv::Point(x, y + label_size.height),
176 cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
177 }
178
179 cv::imshow("image", image);
180
181 if (is_streaming)
182 {
183 cv::waitKey(1);
184 }
185 else
186 {
187 cv::waitKey(0);
188 }
189
190 return 0;
191 }
192
main(int argc,char ** argv)193 int main(int argc, char** argv)
194 {
195 cv::Mat frame;
196 std::vector<Object> objects;
197
198 cv::VideoCapture cap;
199
200 ncnn::Net yolov4;
201
202 const char* devicepath;
203
204 int target_size = 0;
205 int is_streaming = 0;
206
207 if (argc < 2)
208 {
209 fprintf(stderr, "Usage: %s [v4l input device or image]\n", argv[0]);
210 return -1;
211 }
212
213 devicepath = argv[1];
214
215 #ifdef NCNN_PROFILING
216 double t_load_start = ncnn::get_current_time();
217 #endif
218
219 int ret = init_yolov4(&yolov4, &target_size); //We load model and param first!
220 if (ret != 0)
221 {
222 fprintf(stderr, "Failed to load model or param, error %d", ret);
223 return -1;
224 }
225
226 #ifdef NCNN_PROFILING
227 double t_load_end = ncnn::get_current_time();
228 fprintf(stdout, "NCNN Init time %.02lfms\n", t_load_end - t_load_start);
229 #endif
230
231 if (strstr(devicepath, "/dev/video") == NULL)
232 {
233 frame = cv::imread(argv[1], 1);
234 if (frame.empty())
235 {
236 fprintf(stderr, "Failed to read image %s.\n", argv[1]);
237 return -1;
238 }
239 }
240 else
241 {
242 cap.open(devicepath);
243
244 if (!cap.isOpened())
245 {
246 fprintf(stderr, "Failed to open %s", devicepath);
247 return -1;
248 }
249
250 cap >> frame;
251
252 if (frame.empty())
253 {
254 fprintf(stderr, "Failed to read from device %s.\n", devicepath);
255 return -1;
256 }
257
258 is_streaming = 1;
259 }
260
261 while (1)
262 {
263 if (is_streaming)
264 {
265 #ifdef NCNN_PROFILING
266 double t_capture_start = ncnn::get_current_time();
267 #endif
268
269 cap >> frame;
270
271 #ifdef NCNN_PROFILING
272 double t_capture_end = ncnn::get_current_time();
273 fprintf(stdout, "NCNN OpenCV capture time %.02lfms\n", t_capture_end - t_capture_start);
274 #endif
275 if (frame.empty())
276 {
277 fprintf(stderr, "OpenCV Failed to Capture from device %s\n", devicepath);
278 return -1;
279 }
280 }
281
282 #ifdef NCNN_PROFILING
283 double t_detect_start = ncnn::get_current_time();
284 #endif
285
286 detect_yolov4(frame, objects, target_size, &yolov4); //Create an extractor and run detection
287
288 #ifdef NCNN_PROFILING
289 double t_detect_end = ncnn::get_current_time();
290 fprintf(stdout, "NCNN detection time %.02lfms\n", t_detect_end - t_detect_start);
291 #endif
292
293 #ifdef NCNN_PROFILING
294 double t_draw_start = ncnn::get_current_time();
295 #endif
296
297 draw_objects(frame, objects, is_streaming); //Draw detection results on opencv image
298
299 #ifdef NCNN_PROFILING
300 double t_draw_end = ncnn::get_current_time();
301 fprintf(stdout, "NCNN OpenCV draw result time %.02lfms\n", t_draw_end - t_draw_start);
302 #endif
303
304 if (!is_streaming)
305 { //If it is a still image, exit!
306 return 0;
307 }
308 }
309
310 return 0;
311 }
312