1 /* ============================================================
2  *
3  * This file is a part of digiKam
4  *
5  * Date        : 2019-08-08
6  * Description : Derived class to perform SSD neural network inference
7  *               for face detection
8  *
9  * Copyright (C) 2019 by Thanh Trung Dinh <dinhthanhtrung1996 at gmail dot com>
10  * Copyright (C) 2020 by Gilles Caulier <caulier dot gilles at gmail dot com>
11  *
12  * This program is free software; you can redistribute it
13  * and/or modify it under the terms of the GNU General
14  * Public License as published by the Free Software Foundation;
15  * either version 2, or (at your option)
16  * any later version.
17  *
18  * This program is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * ============================================================ */
24 
25 #include "dnnfacedetectorssd.h"
26 
27 // Qt includes
28 
29 #include <QUrl>
30 #include <QList>
31 #include <QRect>
32 #include <QString>
33 #include <QStandardPaths>
34 
35 // Local includes
36 
37 #include "digikam_debug.h"
38 #include "digikam_config.h"
39 
40 namespace Digikam
41 {
42 
DNNFaceDetectorSSD()43 DNNFaceDetectorSSD::DNNFaceDetectorSSD()
44     : DNNFaceDetectorBase(1.0, cv::Scalar(104.0, 177.0, 123.0), cv::Size(300, 300))
45 {
46     loadModels();
47 }
48 
~DNNFaceDetectorSSD()49 DNNFaceDetectorSSD::~DNNFaceDetectorSSD()
50 {
51 }
52 
loadModels()53 bool DNNFaceDetectorSSD::loadModels()
54 {
55     QString appPath = QStandardPaths::writableLocation(QStandardPaths::AppDataLocation);
56     QUrl    appUrl  = QUrl::fromLocalFile(appPath).adjusted(QUrl::RemoveFilename);
57     appUrl.setPath(appUrl.path() + QLatin1String("digikam/facesengine/"));
58 
59     QString model   = QLatin1String("deploy.prototxt");
60     QString data    = QLatin1String("res10_300x300_ssd_iter_140000_fp16.caffemodel");
61 
62     QString nnmodel = appUrl.toLocalFile() + model;
63     QString nndata  = appUrl.toLocalFile() + data;
64 
65     if (!nnmodel.isEmpty() && !nndata.isEmpty())
66     {
67         try
68         {
69             qCDebug(DIGIKAM_FACEDB_LOG) << "SSD model:" << nnmodel << ", SSD data:" << nndata;
70 
71 #ifdef Q_OS_WIN
72 
73             net = cv::dnn::readNetFromCaffe(nnmodel.toLocal8Bit().constData(),
74                                             nndata.toLocal8Bit().constData());
75 
76 #else
77 
78             net = cv::dnn::readNetFromCaffe(nnmodel.toStdString(),
79                                             nndata.toStdString());
80 
81 #endif
82 
83         }
84         catch (cv::Exception& e)
85         {
86             qCWarning(DIGIKAM_FACEDB_LOG) << "cv::Exception:" << e.what();
87 
88             return false;
89         }
90         catch (...)
91         {
92            qCWarning(DIGIKAM_FACEDB_LOG) << "Default exception from OpenCV";
93 
94            return false;
95         }
96     }
97     else
98     {
99         qCCritical(DIGIKAM_FACEDB_LOG) << "Cannot found faces engine DNN model" << model << "or" << data;
100         qCCritical(DIGIKAM_FACEDB_LOG) << "Faces detection feature cannot be used!";
101 
102         return false;
103     }
104 
105     return true;
106 }
107 
detectFaces(const cv::Mat & inputImage,const cv::Size & paddedSize,std::vector<cv::Rect> & detectedBboxes)108 void DNNFaceDetectorSSD::detectFaces(const cv::Mat& inputImage,
109                                      const cv::Size& paddedSize,
110                                      std::vector<cv::Rect>& detectedBboxes)
111 {
112     if (inputImage.empty())
113     {
114         qCDebug(DIGIKAM_FACESENGINE_LOG) << "Invalid image given, not detecting faces.";
115         return;
116     }
117 
118     cv::Mat detection;
119     cv::Mat inputBlob = cv::dnn::blobFromImage(inputImage, scaleFactor, inputImageSize, meanValToSubtract, true, false);
120 
121     mutex.lock();
122     {
123         net.setInput(inputBlob);
124         detection = net.forward();
125     }
126     mutex.unlock();
127 
128     postprocess(detection, paddedSize, detectedBboxes);
129 }
130 
postprocess(cv::Mat detection,const cv::Size & paddedSize,std::vector<cv::Rect> & detectedBboxes) const131 void DNNFaceDetectorSSD::postprocess(cv::Mat detection,
132                                      const cv::Size& paddedSize,
133                                      std::vector<cv::Rect>& detectedBboxes) const
134 {
135     std::vector<float> goodConfidences, doubtConfidences, confidences;
136     std::vector<cv::Rect> goodBoxes, doubtBoxes, boxes;
137 
138     cv::Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
139 
140     // TODO: model problem, confidence of ssd output too low ===> false detection
141 
142     for (int i = 0 ; i < detectionMat.rows ; ++i)
143     {
144         float confidence = detectionMat.at<float>(i, 2);
145 
146         if (confidence > confidenceThreshold)
147         {
148             float leftRatio   = detectionMat.at<float>(i, 3);
149             float topRatio    = detectionMat.at<float>(i, 4);
150             float rightRatio  = detectionMat.at<float>(i, 5);
151             float bottomRatio = detectionMat.at<float>(i, 6);
152 
153             int left          = (int)(leftRatio   * inputImageSize.width);
154             int right         = (int)(rightRatio  * inputImageSize.width);
155             int top           = (int)(topRatio    * inputImageSize.height);
156             int bottom        = (int)(bottomRatio * inputImageSize.height);
157 
158             selectBbox(paddedSize,
159                        confidence,
160                        left,
161                        right,
162                        top,
163                        bottom,
164                        goodConfidences,
165                        goodBoxes,
166                        doubtConfidences,
167                        doubtBoxes);
168         }
169     }
170 /*
171     qCDebug(DIGIKAM_FACESENGINE_LOG) << "nb of doubtbox = " << doubtBoxes.size();
172     qCDebug(DIGIKAM_FACESENGINE_LOG) << "nb of goodbox = " << goodBoxes.size();
173 */
174     if (goodBoxes.empty())
175     {
176         boxes       = doubtBoxes;
177         confidences = doubtConfidences;
178     }
179     else
180     {
181         boxes       = goodBoxes;
182         confidences = goodConfidences;
183     }
184 
185     // Perform non maximum suppression to eliminate redundant overlapping boxes with lower confidences
186 
187     std::vector<int> indices;
188     cv::dnn::NMSBoxes(boxes, confidences, confidenceThreshold, nmsThreshold, indices);
189 
190     // Get detected bounding boxes
191 
192     for (size_t i = 0 ; i < indices.size() ; ++i)
193     {
194         cv::Rect bbox = boxes[indices[i]];
195         correctBbox(bbox, paddedSize);
196         detectedBboxes.push_back(cv::Rect(bbox.x, bbox.y, bbox.width, bbox.height));
197     }
198 }
199 
200 } // namespace Digikam
201