1 /**
2 * @file
3 * @brief Source file for CVObjectDetection class
4 * @author Jonathan Thomas <jonathan@openshot.org>
5 * @author Brenno Caldato <brenno.caldato@outlook.com>
6 *
7 * @ref License
8 */
9
10 /* LICENSE
11 *
12 * Copyright (c) 2008-2019 OpenShot Studios, LLC
13 * <http://www.openshotstudios.com/>. This file is part of
14 * OpenShot Library (libopenshot), an open-source project dedicated to
15 * delivering high quality video editing and animation solutions to the
16 * world. For more information visit <http://www.openshot.org/>.
17 *
18 * OpenShot Library (libopenshot) is free software: you can redistribute it
19 * and/or modify it under the terms of the GNU Lesser General Public License
20 * as published by the Free Software Foundation, either version 3 of the
21 * License, or (at your option) any later version.
22 *
23 * OpenShot Library (libopenshot) is distributed in the hope that it will be
24 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU Lesser General Public License for more details.
27 *
28 * You should have received a copy of the GNU Lesser General Public License
29 * along with OpenShot Library. If not, see <http://www.gnu.org/licenses/>.
30 */
31
32 #include <fstream>
33 #include <iomanip>
34 #include <iostream>
35
36 #include "CVObjectDetection.h"
37 #include <google/protobuf/util/time_util.h>
38
39 using namespace std;
40 using namespace openshot;
41 using google::protobuf::util::TimeUtil;
42
CVObjectDetection(std::string processInfoJson,ProcessingController & processingController)43 CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController)
44 : processingController(&processingController), processingDevice("CPU"){
45 SetJson(processInfoJson);
46 confThreshold = 0.5;
47 nmsThreshold = 0.1;
48 }
49
setProcessingDevice()50 void CVObjectDetection::setProcessingDevice(){
51 if(processingDevice == "GPU"){
52 net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
53 net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
54 }
55 else if(processingDevice == "CPU"){
56 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
57 net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
58 }
59 }
60
detectObjectsClip(openshot::Clip & video,size_t _start,size_t _end,bool process_interval)61 void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, size_t _end, bool process_interval)
62 {
63
64 start = _start; end = _end;
65
66 video.Open();
67
68 if(error){
69 return;
70 }
71
72 processingController->SetError(false, "");
73
74 // Load names of classes
75 std::ifstream ifs(classesFile.c_str());
76 std::string line;
77 while (std::getline(ifs, line)) classNames.push_back(line);
78
79 // Load the network
80 if(classesFile == "" || modelConfiguration == "" || modelWeights == "")
81 return;
82 net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights);
83 setProcessingDevice();
84
85 size_t frame_number;
86 if(!process_interval || end <= 1 || end-start == 0){
87 // Get total number of frames in video
88 start = (int)(video.Start() * video.Reader()->info.fps.ToFloat());
89 end = (int)(video.End() * video.Reader()->info.fps.ToFloat());
90 }
91
92 for (frame_number = start; frame_number <= end; frame_number++)
93 {
94 // Stop the feature tracker process
95 if(processingController->ShouldStop()){
96 return;
97 }
98
99 std::shared_ptr<openshot::Frame> f = video.GetFrame(frame_number);
100
101 // Grab OpenCV Mat image
102 cv::Mat cvimage = f->GetImageCV();
103
104 DetectObjects(cvimage, frame_number);
105
106 // Update progress
107 processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));
108
109 }
110 }
111
DetectObjects(const cv::Mat & frame,size_t frameId)112 void CVObjectDetection::DetectObjects(const cv::Mat &frame, size_t frameId){
113 // Get frame as OpenCV Mat
114 cv::Mat blob;
115
116 // Create a 4D blob from the frame.
117 int inpWidth, inpHeight;
118 inpWidth = inpHeight = 416;
119
120 cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0), true, false);
121
122 //Sets the input to the network
123 net.setInput(blob);
124
125 // Runs the forward pass to get output of the output layers
126 std::vector<cv::Mat> outs;
127 net.forward(outs, getOutputsNames(net));
128
129 // Remove the bounding boxes with low confidence
130 postprocess(frame.size(), outs, frameId);
131
132 }
133
134
135 // Remove the bounding boxes with low confidence using non-maxima suppression
postprocess(const cv::Size & frameDims,const std::vector<cv::Mat> & outs,size_t frameId)136 void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector<cv::Mat>& outs, size_t frameId)
137 {
138 std::vector<int> classIds;
139 std::vector<float> confidences;
140 std::vector<cv::Rect> boxes;
141 std::vector<int> objectIds;
142
143 for (size_t i = 0; i < outs.size(); ++i)
144 {
145 // Scan through all the bounding boxes output from the network and keep only the
146 // ones with high confidence scores. Assign the box's class label as the class
147 // with the highest score for the box.
148 float* data = (float*)outs[i].data;
149 for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
150 {
151 cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
152 cv::Point classIdPoint;
153 double confidence;
154 // Get the value and location of the maximum score
155 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
156 if (confidence > confThreshold)
157 {
158 int centerX = (int)(data[0] * frameDims.width);
159 int centerY = (int)(data[1] * frameDims.height);
160 int width = (int)(data[2] * frameDims.width);
161 int height = (int)(data[3] * frameDims.height);
162 int left = centerX - width / 2;
163 int top = centerY - height / 2;
164
165 classIds.push_back(classIdPoint.x);
166 confidences.push_back((float)confidence);
167 boxes.push_back(cv::Rect(left, top, width, height));
168 }
169 }
170 }
171
172 // Perform non maximum suppression to eliminate redundant overlapping boxes with
173 // lower confidences
174 std::vector<int> indices;
175 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
176
177 // Pass boxes to SORT algorithm
178 std::vector<cv::Rect> sortBoxes;
179 for(auto box : boxes)
180 sortBoxes.push_back(box);
181 sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
182
183 // Clear data vectors
184 boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
185 // Get SORT predicted boxes
186 for(auto TBox : sort.frameTrackingResult){
187 if(TBox.frame == frameId){
188 boxes.push_back(TBox.box);
189 confidences.push_back(TBox.confidence);
190 classIds.push_back(TBox.classId);
191 objectIds.push_back(TBox.id);
192 }
193 }
194
195 // Remove boxes based on controids distance
196 for(uint i = 0; i<boxes.size(); i++){
197 for(uint j = i+1; j<boxes.size(); j++){
198 int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].width/2);
199 int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].width/2);
200
201 if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
202 if(classIds[i] == classIds[j]){
203 if(confidences[i] >= confidences[j]){
204 boxes.erase(boxes.begin() + j);
205 classIds.erase(classIds.begin() + j);
206 confidences.erase(confidences.begin() + j);
207 objectIds.erase(objectIds.begin() + j);
208 break;
209 }
210 else{
211 boxes.erase(boxes.begin() + i);
212 classIds.erase(classIds.begin() + i);
213 confidences.erase(confidences.begin() + i);
214 objectIds.erase(objectIds.begin() + i);
215 i = 0;
216 break;
217 }
218 }
219 }
220 }
221 }
222
223 // Remove boxes based in IOU score
224 for(uint i = 0; i<boxes.size(); i++){
225 for(uint j = i+1; j<boxes.size(); j++){
226
227 if( iou(boxes[i], boxes[j])){
228 if(classIds[i] == classIds[j]){
229 if(confidences[i] >= confidences[j]){
230 boxes.erase(boxes.begin() + j);
231 classIds.erase(classIds.begin() + j);
232 confidences.erase(confidences.begin() + j);
233 objectIds.erase(objectIds.begin() + j);
234 break;
235 }
236 else{
237 boxes.erase(boxes.begin() + i);
238 classIds.erase(classIds.begin() + i);
239 confidences.erase(confidences.begin() + i);
240 objectIds.erase(objectIds.begin() + i);
241 i = 0;
242 break;
243 }
244 }
245 }
246 }
247 }
248
249 // Normalize boxes coordinates
250 std::vector<cv::Rect_<float>> normalized_boxes;
251 for(auto box : boxes){
252 cv::Rect_<float> normalized_box;
253 normalized_box.x = (box.x)/(float)frameDims.width;
254 normalized_box.y = (box.y)/(float)frameDims.height;
255 normalized_box.width = (box.width)/(float)frameDims.width;
256 normalized_box.height = (box.height)/(float)frameDims.height;
257 normalized_boxes.push_back(normalized_box);
258 }
259
260 detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds);
261 }
262
263 // Compute IOU between 2 boxes
iou(cv::Rect pred_box,cv::Rect sort_box)264 bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
265 // Determine the (x, y)-coordinates of the intersection rectangle
266 int xA = std::max(pred_box.x, sort_box.x);
267 int yA = std::max(pred_box.y, sort_box.y);
268 int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
269 int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
270
271 // Compute the area of intersection rectangle
272 int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
273
274 // Compute the area of both the prediction and ground-truth rectangles
275 int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
276 int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
277
278 // Compute the intersection over union by taking the intersection
279 float iou = interArea / (float)(boxAArea + boxBArea - interArea);
280
281 // If IOU is above this value the boxes are very close (probably a variation of the same bounding box)
282 if(iou > 0.5)
283 return true;
284 return false;
285 }
286
287 // Get the names of the output layers
getOutputsNames(const cv::dnn::Net & net)288 std::vector<cv::String> CVObjectDetection::getOutputsNames(const cv::dnn::Net& net)
289 {
290 static std::vector<cv::String> names;
291
292 //Get the indices of the output layers, i.e. the layers with unconnected outputs
293 std::vector<int> outLayers = net.getUnconnectedOutLayers();
294
295 //get the names of all the layers in the network
296 std::vector<cv::String> layersNames = net.getLayerNames();
297
298 // Get the names of the output layers in names
299 names.resize(outLayers.size());
300 for (size_t i = 0; i < outLayers.size(); ++i)
301 names[i] = layersNames[outLayers[i] - 1];
302 return names;
303 }
304
GetDetectionData(size_t frameId)305 CVDetectionData CVObjectDetection::GetDetectionData(size_t frameId){
306 // Check if the stabilizer info for the requested frame exists
307 if ( detectionsData.find(frameId) == detectionsData.end() ) {
308
309 return CVDetectionData();
310 } else {
311
312 return detectionsData[frameId];
313 }
314 }
315
SaveObjDetectedData()316 bool CVObjectDetection::SaveObjDetectedData(){
317 // Create tracker message
318 pb_objdetect::ObjDetect objMessage;
319
320 //Save class names in protobuf message
321 for(int i = 0; i<classNames.size(); i++){
322 std::string* className = objMessage.add_classnames();
323 className->assign(classNames.at(i));
324 }
325
326 // Iterate over all frames data and save in protobuf message
327 for(std::map<size_t,CVDetectionData>::iterator it=detectionsData.begin(); it!=detectionsData.end(); ++it){
328 CVDetectionData dData = it->second;
329 pb_objdetect::Frame* pbFrameData;
330 AddFrameDataToProto(objMessage.add_frame(), dData);
331 }
332
333 // Add timestamp
334 *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
335
336 {
337 // Write the new message to disk.
338 std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
339 if (!objMessage.SerializeToOstream(&output)) {
340 cerr << "Failed to write protobuf message." << endl;
341 return false;
342 }
343 }
344
345 // Delete all global objects allocated by libprotobuf.
346 google::protobuf::ShutdownProtobufLibrary();
347
348 return true;
349
350 }
351
352 // Add frame object detection into protobuf message.
AddFrameDataToProto(pb_objdetect::Frame * pbFrameData,CVDetectionData & dData)353 void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CVDetectionData& dData) {
354
355 // Save frame number and rotation
356 pbFrameData->set_id(dData.frameId);
357
358 for(size_t i = 0; i < dData.boxes.size(); i++){
359 pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
360
361 // Save bounding box data
362 box->set_x(dData.boxes.at(i).x);
363 box->set_y(dData.boxes.at(i).y);
364 box->set_w(dData.boxes.at(i).width);
365 box->set_h(dData.boxes.at(i).height);
366 box->set_classid(dData.classIds.at(i));
367 box->set_confidence(dData.confidences.at(i));
368 box->set_objectid(dData.objectIds.at(i));
369
370 }
371 }
372
373 // Load JSON string into this object
SetJson(const std::string value)374 void CVObjectDetection::SetJson(const std::string value) {
375 // Parse JSON string into JSON objects
376 try
377 {
378 const Json::Value root = openshot::stringToJson(value);
379 // Set all values that match
380
381 SetJsonValue(root);
382 }
383 catch (const std::exception& e)
384 {
385 // Error parsing JSON (or missing keys)
386 // throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
387 std::cout<<"JSON is invalid (missing keys or invalid data types)"<<std::endl;
388 }
389 }
390
391 // Load Json::Value into this object
SetJsonValue(const Json::Value root)392 void CVObjectDetection::SetJsonValue(const Json::Value root) {
393
394 // Set data from Json (if key is found)
395 if (!root["protobuf_data_path"].isNull()){
396 protobuf_data_path = (root["protobuf_data_path"].asString());
397 }
398 if (!root["processing-device"].isNull()){
399 processingDevice = (root["processing-device"].asString());
400 }
401 if (!root["model-config"].isNull()){
402 modelConfiguration = (root["model-config"].asString());
403 std::ifstream infile(modelConfiguration);
404 if(!infile.good()){
405 processingController->SetError(true, "Incorrect path to model config file");
406 error = true;
407 }
408
409 }
410 if (!root["model-weights"].isNull()){
411 modelWeights= (root["model-weights"].asString());
412 std::ifstream infile(modelWeights);
413 if(!infile.good()){
414 processingController->SetError(true, "Incorrect path to model weight file");
415 error = true;
416 }
417
418 }
419 if (!root["class-names"].isNull()){
420 classesFile = (root["class-names"].asString());
421
422 std::ifstream infile(classesFile);
423 if(!infile.good()){
424 processingController->SetError(true, "Incorrect path to class name file");
425 error = true;
426 }
427
428 }
429 }
430
431 /*
432 ||||||||||||||||||||||||||||||||||||||||||||||||||
433 ONLY FOR MAKE TEST
434 ||||||||||||||||||||||||||||||||||||||||||||||||||
435 */
436
437 // Load protobuf data file
_LoadObjDetectdData()438 bool CVObjectDetection::_LoadObjDetectdData(){
439 // Create tracker message
440 pb_objdetect::ObjDetect objMessage;
441
442 {
443 // Read the existing tracker message.
444 fstream input(protobuf_data_path, ios::in | ios::binary);
445 if (!objMessage.ParseFromIstream(&input)) {
446 cerr << "Failed to parse protobuf message." << endl;
447 return false;
448 }
449 }
450
451 // Make sure classNames and detectionsData are empty
452 classNames.clear(); detectionsData.clear();
453
454 // Get all classes names and assign a color to them
455 for(int i = 0; i < objMessage.classnames_size(); i++){
456 classNames.push_back(objMessage.classnames(i));
457 }
458
459 // Iterate over all frames of the saved message
460 for (size_t i = 0; i < objMessage.frame_size(); i++) {
461 // Create protobuf message reader
462 const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
463
464 // Get frame Id
465 size_t id = pbFrameData.id();
466
467 // Load bounding box data
468 const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
469
470 // Construct data vectors related to detections in the current frame
471 std::vector<int> classIds;
472 std::vector<float> confidences;
473 std::vector<cv::Rect_<float>> boxes;
474 std::vector<int> objectIds;
475
476 for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
477 // Get bounding box coordinates
478 float x = pBox.Get(i).x(); float y = pBox.Get(i).y();
479 float w = pBox.Get(i).w(); float h = pBox.Get(i).h();
480 // Create OpenCV rectangle with the bouding box info
481 cv::Rect_<float> box(x, y, w, h);
482
483 // Get class Id (which will be assign to a class name) and prediction confidence
484 int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence();
485 // Get object Id
486 int objectId = pBox.Get(i).objectid();
487
488 // Push back data into vectors
489 boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
490 }
491
492 // Assign data to object detector map
493 detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds);
494 }
495
496 // Delete all global objects allocated by libprotobuf.
497 google::protobuf::ShutdownProtobufLibrary();
498
499 return true;
500 }
501