1 #include "precomp.hpp" 2 using namespace caffe; 3 4 namespace cv 5 { 6 namespace cnn_3dobj 7 { descriptorExtractor(const String & device_type,int device_id)8 descriptorExtractor::descriptorExtractor(const String& device_type, int device_id) 9 { 10 net_ready = 0; 11 if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0) 12 { 13 if (strcmp(device_type.c_str(), "CPU") == 0) 14 { 15 caffe::Caffe::set_mode(caffe::Caffe::CPU); 16 deviceType = "CPU"; 17 std::cout << "Using CPU" << std::endl; 18 } 19 else 20 { 21 caffe::Caffe::set_mode(caffe::Caffe::GPU); 22 caffe::Caffe::SetDevice(device_id); 23 deviceType = "GPU"; 24 std::cout << "Using GPU" << std::endl; 25 std::cout << "Using Device_id=" << device_id << std::endl; 26 } 27 net_set = true; 28 } 29 else 30 { 31 std::cout << "Error: Device name must be 'GPU' together with an device number or 'CPU'." << std::endl; 32 net_set = false; 33 } 34 }; 35 getDeviceType()36 String descriptorExtractor::getDeviceType() 37 { 38 String device_info_out; 39 device_info_out = deviceType; 40 return device_info_out; 41 }; 42 getDeviceId()43 int descriptorExtractor::getDeviceId() 44 { 45 int device_info_out; 46 device_info_out = deviceId; 47 return device_info_out; 48 }; 49 setDeviceType(const String & device_type)50 void descriptorExtractor::setDeviceType(const String& device_type) 51 { 52 if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0) 53 { 54 if (strcmp(device_type.c_str(), "CPU") == 0) 55 { 56 caffe::Caffe::set_mode(caffe::Caffe::CPU); 57 deviceType = "CPU"; 58 std::cout << "Using CPU" << std::endl; 59 } 60 else 61 { 62 caffe::Caffe::set_mode(caffe::Caffe::GPU); 63 deviceType = "GPU"; 64 std::cout << "Using GPU" << std::endl; 65 } 66 } 67 else 68 { 69 std::cout << "Error: Device name must be 'GPU' or 'CPU'." << std::endl; 70 } 71 }; 72 setDeviceId(const int & device_id)73 void descriptorExtractor::setDeviceId(const int& device_id) 74 { 75 if (strcmp(deviceType.c_str(), "GPU") == 0) 76 { 77 caffe::Caffe::SetDevice(device_id); 78 deviceId = device_id; 79 std::cout << "Using GPU with Device ID = " << device_id << std::endl; 80 } 81 else 82 { 83 std::cout << "Error: Device ID only need to be set when GPU is used." << std::endl; 84 } 85 }; 86 loadNet(const String & model_file,const String & trained_file,const String & mean_file)87 void descriptorExtractor::loadNet(const String& model_file, const String& trained_file, const String& mean_file) 88 { 89 if (net_set) 90 { 91 /* Load the network. */ 92 convnet = new Net<float>(model_file, TEST); 93 convnet->CopyTrainedLayersFrom(trained_file); 94 if (convnet->num_inputs() != 1) 95 std::cout << "Network should have exactly one input." << std::endl; 96 if (convnet->num_outputs() != 1) 97 std::cout << "Network should have exactly one output." << std::endl; 98 Blob<float>* input_layer = convnet->input_blobs()[0]; 99 num_channels = input_layer->channels(); 100 if (num_channels != 3 && num_channels != 1) 101 std::cout << "Input layer should have 1 or 3 channels." << std::endl; 102 input_geometry = cv::Size(input_layer->width(), input_layer->height()); 103 /* Load the binaryproto mean file. */ 104 if (!mean_file.empty()) 105 { 106 setMean(mean_file); 107 net_ready = 2; 108 } 109 else 110 { 111 net_ready = 1; 112 } 113 } 114 else 115 { 116 std::cout << "Error: Net is not set properly in advance using construtor." << std::endl; 117 } 118 }; 119 120 /* Load the mean file in binaryproto format. */ setMean(const String & mean_file)121 void descriptorExtractor::setMean(const String& mean_file) 122 { 123 BlobProto blob_proto; 124 ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); 125 /* Convert from BlobProto to Blob<float> */ 126 Blob<float> mean_blob; 127 mean_blob.FromProto(blob_proto); 128 if (mean_blob.channels() != num_channels) 129 std::cout << "Number of channels of mean file doesn't match input layer." << std::endl; 130 /* The format of the mean file is planar 32-bit float BGR or grayscale. */ 131 std::vector<cv::Mat> channels; 132 float* data = mean_blob.mutable_cpu_data(); 133 for (int i = 0; i < num_channels; ++i) 134 { 135 /* Extract an individual channel. */ 136 cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data); 137 channels.push_back(channel); 138 data += mean_blob.height() * mean_blob.width(); 139 } 140 /* Merge the separate channels into a single image. */ 141 cv::Mat mean; 142 cv::merge(channels, mean); 143 /* Compute the global mean pixel value and create a mean image 144 * filled with this value. */ 145 cv::Scalar channel_mean = cv::mean(mean); 146 mean_ = cv::Mat(input_geometry, mean.type(), channel_mean); 147 }; 148 extract(InputArrayOfArrays inputimg,OutputArray feature,String feature_blob)149 void descriptorExtractor::extract(InputArrayOfArrays inputimg, OutputArray feature, String feature_blob) 150 { 151 if (net_ready) 152 { 153 Blob<float>* input_layer = convnet->input_blobs()[0]; 154 input_layer->Reshape(1, num_channels, 155 input_geometry.height, input_geometry.width); 156 /* Forward dimension change to all layers. */ 157 convnet->Reshape(); 158 std::vector<cv::Mat> input_channels; 159 wrapInput(&input_channels); 160 if (inputimg.kind() == 65536) 161 {/* this is a Mat */ 162 Mat img = inputimg.getMat(); 163 preprocess(img, &input_channels); 164 convnet->ForwardPrefilled(); 165 /* Copy the output layer to a std::vector */ 166 Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get(); 167 const float* begin = output_layer->cpu_data(); 168 const float* end = begin + output_layer->channels(); 169 std::vector<float> featureVec = std::vector<float>(begin, end); 170 cv::Mat feature_mat = cv::Mat(featureVec, true).t(); 171 feature_mat.copyTo(feature); 172 } 173 else 174 {/* This is a vector<Mat> */ 175 vector<Mat> img; 176 inputimg.getMatVector(img); 177 Mat feature_vector; 178 for (unsigned int i = 0; i < img.size(); ++i) 179 { 180 preprocess(img[i], &input_channels); 181 convnet->ForwardPrefilled(); 182 /* Copy the output layer to a std::vector */ 183 Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get(); 184 const float* begin = output_layer->cpu_data(); 185 const float* end = begin + output_layer->channels(); 186 std::vector<float> featureVec = std::vector<float>(begin, end); 187 if (i == 0) 188 { 189 feature_vector = cv::Mat(featureVec, true).t(); 190 int dim_feature = feature_vector.cols; 191 feature_vector.resize(img.size(), dim_feature); 192 } 193 feature_vector.row(i) = cv::Mat(featureVec, true).t(); 194 } 195 feature_vector.copyTo(feature); 196 } 197 } 198 else 199 std::cout << "Device must be set properly using constructor and the net must be set in advance using loadNet."; 200 }; 201 202 /* Wrap the input layer of the network in separate cv::Mat objects 203 * (one per channel). This way we save one memcpy operation and we 204 * don't need to rely on cudaMemcpy2D. The last preprocessing 205 * operation will write the separate channels directly to the input 206 * layer. */ wrapInput(std::vector<cv::Mat> * input_channels)207 void descriptorExtractor::wrapInput(std::vector<cv::Mat>* input_channels) 208 { 209 Blob<float>* input_layer = convnet->input_blobs()[0]; 210 int width = input_layer->width(); 211 int height = input_layer->height(); 212 float* input_data = input_layer->mutable_cpu_data(); 213 for (int i = 0; i < input_layer->channels(); ++i) 214 { 215 cv::Mat channel(height, width, CV_32FC1, input_data); 216 input_channels->push_back(channel); 217 input_data += width * height; 218 } 219 }; 220 preprocess(const cv::Mat & img,std::vector<cv::Mat> * input_channels)221 void descriptorExtractor::preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels) 222 { 223 /* Convert the input image to the input image format of the network. */ 224 cv::Mat sample; 225 if (num_channels == 1) 226 cv::cvtColor(img, sample, COLOR_BGR2GRAY); 227 else if (img.channels() == 4 && num_channels == 3) 228 cv::cvtColor(img, sample, COLOR_BGRA2BGR); 229 else if (img.channels() == 1 && num_channels == 3) 230 cv::cvtColor(img, sample, COLOR_GRAY2BGR); 231 else 232 sample = img; 233 234 cv::Mat sample_resized; 235 if (sample.size() != input_geometry) 236 cv::resize(sample, sample_resized, input_geometry); 237 else 238 sample_resized = sample; 239 240 cv::Mat sample_float; 241 sample_resized.convertTo(sample_float, CV_32F); 242 243 cv::Mat sample_normalized; 244 if (net_ready == 2) 245 cv::subtract(sample_float, mean_, sample_normalized); 246 else 247 sample_normalized = sample_float; 248 /* This operation will write the separate BGR planes directly to the 249 * input layer of the network because it is wrapped by the cv::Mat 250 * objects in input_channels. */ 251 cv::split(sample_normalized, *input_channels); 252 if (reinterpret_cast<float*>(input_channels->at(0).data) 253 != convnet->input_blobs()[0]->cpu_data()) 254 std::cout << "Input channels are not wrapping the input layer of the network." << std::endl; 255 }; 256 } /* namespace cnn_3dobj */ 257 } /* namespace cv */ 258