1 #include "precomp.hpp"
2 using namespace caffe;
3 
4 namespace cv
5 {
6 namespace cnn_3dobj
7 {
descriptorExtractor(const String & device_type,int device_id)8     descriptorExtractor::descriptorExtractor(const String& device_type, int device_id)
9     {
10         net_ready = 0;
11         if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
12         {
13             if (strcmp(device_type.c_str(), "CPU") == 0)
14             {
15                 caffe::Caffe::set_mode(caffe::Caffe::CPU);
16                 deviceType = "CPU";
17                 std::cout << "Using CPU" << std::endl;
18             }
19             else
20             {
21                 caffe::Caffe::set_mode(caffe::Caffe::GPU);
22                 caffe::Caffe::SetDevice(device_id);
23                 deviceType = "GPU";
24                 std::cout << "Using GPU" << std::endl;
25                 std::cout << "Using Device_id=" << device_id << std::endl;
26             }
27             net_set = true;
28         }
29         else
30         {
31             std::cout << "Error: Device name must be 'GPU' together with an device number or 'CPU'." << std::endl;
32             net_set = false;
33         }
34     };
35 
getDeviceType()36     String descriptorExtractor::getDeviceType()
37     {
38         String device_info_out;
39         device_info_out = deviceType;
40         return device_info_out;
41     };
42 
getDeviceId()43     int descriptorExtractor::getDeviceId()
44     {
45         int device_info_out;
46         device_info_out = deviceId;
47         return device_info_out;
48     };
49 
setDeviceType(const String & device_type)50     void descriptorExtractor::setDeviceType(const String& device_type)
51     {
52         if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
53         {
54             if (strcmp(device_type.c_str(), "CPU") == 0)
55             {
56                 caffe::Caffe::set_mode(caffe::Caffe::CPU);
57                 deviceType = "CPU";
58                 std::cout << "Using CPU" << std::endl;
59             }
60             else
61             {
62                 caffe::Caffe::set_mode(caffe::Caffe::GPU);
63                 deviceType = "GPU";
64                 std::cout << "Using GPU" << std::endl;
65             }
66         }
67         else
68         {
69             std::cout << "Error: Device name must be 'GPU' or 'CPU'." << std::endl;
70         }
71     };
72 
setDeviceId(const int & device_id)73     void descriptorExtractor::setDeviceId(const int& device_id)
74     {
75         if (strcmp(deviceType.c_str(), "GPU") == 0)
76         {
77             caffe::Caffe::SetDevice(device_id);
78             deviceId = device_id;
79             std::cout << "Using GPU with Device ID = " << device_id << std::endl;
80         }
81         else
82         {
83             std::cout << "Error: Device ID only need to be set when GPU is used." << std::endl;
84         }
85     };
86 
loadNet(const String & model_file,const String & trained_file,const String & mean_file)87     void descriptorExtractor::loadNet(const String& model_file, const String& trained_file, const String& mean_file)
88     {
89         if (net_set)
90         {
91             /* Load the network. */
92             convnet = new Net<float>(model_file, TEST);
93             convnet->CopyTrainedLayersFrom(trained_file);
94             if (convnet->num_inputs() != 1)
95                 std::cout << "Network should have exactly one input." << std::endl;
96             if (convnet->num_outputs() != 1)
97                 std::cout << "Network should have exactly one output." << std::endl;
98             Blob<float>* input_layer = convnet->input_blobs()[0];
99             num_channels = input_layer->channels();
100             if (num_channels != 3 && num_channels != 1)
101                 std::cout << "Input layer should have 1 or 3 channels." << std::endl;
102             input_geometry = cv::Size(input_layer->width(), input_layer->height());
103             /* Load the binaryproto mean file. */
104             if (!mean_file.empty())
105             {
106                 setMean(mean_file);
107                 net_ready = 2;
108             }
109             else
110             {
111                 net_ready = 1;
112             }
113         }
114         else
115         {
116             std::cout << "Error: Net is not set properly in advance using construtor." << std::endl;
117         }
118     };
119 
120     /* Load the mean file in binaryproto format. */
setMean(const String & mean_file)121     void descriptorExtractor::setMean(const String& mean_file)
122     {
123         BlobProto blob_proto;
124         ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
125         /* Convert from BlobProto to Blob<float> */
126         Blob<float> mean_blob;
127         mean_blob.FromProto(blob_proto);
128         if (mean_blob.channels() != num_channels)
129             std::cout << "Number of channels of mean file doesn't match input layer." << std::endl;
130         /* The format of the mean file is planar 32-bit float BGR or grayscale. */
131         std::vector<cv::Mat> channels;
132         float* data = mean_blob.mutable_cpu_data();
133         for (int i = 0; i < num_channels; ++i)
134         {
135             /* Extract an individual channel. */
136             cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
137             channels.push_back(channel);
138             data += mean_blob.height() * mean_blob.width();
139         }
140         /* Merge the separate channels into a single image. */
141         cv::Mat mean;
142         cv::merge(channels, mean);
143         /* Compute the global mean pixel value and create a mean image
144          * filled with this value. */
145         cv::Scalar channel_mean = cv::mean(mean);
146         mean_ = cv::Mat(input_geometry, mean.type(), channel_mean);
147     };
148 
extract(InputArrayOfArrays inputimg,OutputArray feature,String feature_blob)149     void descriptorExtractor::extract(InputArrayOfArrays inputimg, OutputArray feature, String feature_blob)
150     {
151         if (net_ready)
152         {
153             Blob<float>* input_layer = convnet->input_blobs()[0];
154             input_layer->Reshape(1, num_channels,
155             input_geometry.height, input_geometry.width);
156             /* Forward dimension change to all layers. */
157             convnet->Reshape();
158             std::vector<cv::Mat> input_channels;
159             wrapInput(&input_channels);
160             if (inputimg.kind() == 65536)
161             {/* this is a Mat */
162                 Mat img = inputimg.getMat();
163                 preprocess(img, &input_channels);
164                 convnet->ForwardPrefilled();
165                 /* Copy the output layer to a std::vector */
166                 Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
167                 const float* begin = output_layer->cpu_data();
168                 const float* end = begin + output_layer->channels();
169                 std::vector<float> featureVec = std::vector<float>(begin, end);
170                 cv::Mat feature_mat = cv::Mat(featureVec, true).t();
171                 feature_mat.copyTo(feature);
172             }
173             else
174             {/* This is a vector<Mat> */
175                 vector<Mat> img;
176                 inputimg.getMatVector(img);
177                 Mat feature_vector;
178                 for (unsigned int i = 0; i < img.size(); ++i)
179                 {
180                     preprocess(img[i], &input_channels);
181                     convnet->ForwardPrefilled();
182                     /* Copy the output layer to a std::vector */
183                     Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
184                     const float* begin = output_layer->cpu_data();
185                     const float* end = begin + output_layer->channels();
186                     std::vector<float> featureVec = std::vector<float>(begin, end);
187                     if (i == 0)
188                     {
189                         feature_vector = cv::Mat(featureVec, true).t();
190                         int dim_feature = feature_vector.cols;
191                         feature_vector.resize(img.size(), dim_feature);
192                     }
193                     feature_vector.row(i) = cv::Mat(featureVec, true).t();
194                 }
195                 feature_vector.copyTo(feature);
196             }
197         }
198         else
199           std::cout << "Device must be set properly using constructor and the net must be set in advance using loadNet.";
200     };
201 
202     /* Wrap the input layer of the network in separate cv::Mat objects
203      * (one per channel). This way we save one memcpy operation and we
204      * don't need to rely on cudaMemcpy2D. The last preprocessing
205      * operation will write the separate channels directly to the input
206      * layer. */
wrapInput(std::vector<cv::Mat> * input_channels)207     void descriptorExtractor::wrapInput(std::vector<cv::Mat>* input_channels)
208     {
209         Blob<float>* input_layer = convnet->input_blobs()[0];
210         int width = input_layer->width();
211         int height = input_layer->height();
212         float* input_data = input_layer->mutable_cpu_data();
213         for (int i = 0; i < input_layer->channels(); ++i)
214         {
215             cv::Mat channel(height, width, CV_32FC1, input_data);
216             input_channels->push_back(channel);
217             input_data += width * height;
218         }
219     };
220 
preprocess(const cv::Mat & img,std::vector<cv::Mat> * input_channels)221     void descriptorExtractor::preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels)
222     {
223         /* Convert the input image to the input image format of the network. */
224         cv::Mat sample;
225         if (num_channels == 1)
226             cv::cvtColor(img, sample, COLOR_BGR2GRAY);
227         else if (img.channels() == 4 && num_channels == 3)
228             cv::cvtColor(img, sample, COLOR_BGRA2BGR);
229         else if (img.channels() == 1 && num_channels == 3)
230             cv::cvtColor(img, sample, COLOR_GRAY2BGR);
231         else
232             sample = img;
233 
234         cv::Mat sample_resized;
235         if (sample.size() != input_geometry)
236             cv::resize(sample, sample_resized, input_geometry);
237         else
238         sample_resized = sample;
239 
240         cv::Mat sample_float;
241         sample_resized.convertTo(sample_float, CV_32F);
242 
243         cv::Mat sample_normalized;
244         if (net_ready == 2)
245             cv::subtract(sample_float, mean_, sample_normalized);
246         else
247             sample_normalized = sample_float;
248         /* This operation will write the separate BGR planes directly to the
249          * input layer of the network because it is wrapped by the cv::Mat
250          * objects in input_channels. */
251         cv::split(sample_normalized, *input_channels);
252         if (reinterpret_cast<float*>(input_channels->at(0).data)
253       != convnet->input_blobs()[0]->cpu_data())
254             std::cout << "Input channels are not wrapping the input layer of the network." << std::endl;
255     };
256 } /* namespace cnn_3dobj */
257 } /* namespace cv */
258