1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 /*!
21 * Copyright (c) 2018 by Contributors
22 * \file common.hpp
23 * \brief Common functions for GluonCV cpp inference demo
24 * \author Joshua Zhang
25 */
26 #include "mxnet-cpp/MxNetCpp.h"
27 #include <opencv2/opencv.hpp>
28 #include <string>
29 #include <iostream>
30 #include <iomanip>
31 #include <sstream>
32 #include <fstream>
33 #include <map>
34 #include <cmath>
35 #include <random>
36 #include <iomanip>
37
38 using namespace mxnet::cpp;
39
40 // resize short within
ResizeShortWithin(cv::Mat src,int short_size,int max_size,int mult_base)41 inline cv::Mat ResizeShortWithin(cv::Mat src, int short_size, int max_size, int mult_base) {
42 double h = src.rows;
43 double w = src.cols;
44 double im_size_min = h;
45 double im_size_max = w;
46 if (w < h) {
47 im_size_min = w;
48 im_size_max = h;
49 }
50 double mb = mult_base; // this is the factor of the output shapes
51 double scale = static_cast<double>(short_size) / static_cast<double>(im_size_min);
52 if ((std::round(scale * im_size_max / mb) * mb) > max_size) {
53 // fit in max_size
54 scale = std::floor(static_cast<double>(max_size) / mb) * mb / im_size_max;
55 }
56 int new_w = static_cast<int>(std::round(w * scale / mb) * mb);
57 int new_h = static_cast<int>(std::round(h * scale / mb) * mb);
58 cv::Mat dst;
59 cv::resize(src, dst, cv::Size(new_w, new_h));
60 return dst;
61 }
62
63 // Load data from CV BGR image
AsData(cv::Mat bgr_image,Context ctx=Context::cpu ())64 inline NDArray AsData(cv::Mat bgr_image, Context ctx = Context::cpu()) {
65 // convert BGR image from OpenCV to RGB in MXNet.
66 cv::Mat rgb_image;
67 cv::cvtColor(bgr_image, rgb_image, cv::COLOR_BGR2RGB);
68 // convert to float32 from uint8
69 rgb_image.convertTo(rgb_image, CV_32FC3);
70 // flatten to single channel, and single row.
71 cv::Mat flat_image = rgb_image.reshape(1, 1);
72 // a vector of raw pixel values, no copy
73 std::vector<float> data_buffer;
74 data_buffer.insert(
75 data_buffer.end(),
76 flat_image.ptr<float>(0),
77 flat_image.ptr<float>(0) + flat_image.cols);
78 // construct NDArray from data buffer
79 return NDArray(data_buffer, Shape(1, rgb_image.rows, rgb_image.cols, 3), ctx);
80 }
81
82 // Load data from filename
AsData(std::string filename,Context ctx=Context::cpu ())83 inline NDArray AsData(std::string filename, Context ctx = Context::cpu()) {
84 cv::Mat bgr_image = cv::imread(filename, 1);
85 return AsData(bgr_image, ctx);
86 }
87
LoadCheckpoint(const std::string prefix,const unsigned int epoch,Symbol * symbol,std::map<std::string,NDArray> * arg_params,std::map<std::string,NDArray> * aux_params,Context ctx=Context::cpu ())88 inline void LoadCheckpoint(const std::string prefix, const unsigned int epoch,
89 Symbol* symbol, std::map<std::string, NDArray>* arg_params,
90 std::map<std::string, NDArray>* aux_params,
91 Context ctx = Context::cpu()) {
92 // load symbol from JSON
93 Symbol new_symbol = Symbol::Load(prefix + "-symbol.json");
94 // load parameters
95 std::stringstream ss;
96 ss << std::setw(4) << std::setfill('0') << epoch;
97 std::string filepath = prefix + "-" + ss.str() + ".params";
98 std::map<std::string, NDArray> params = NDArray::LoadToMap(filepath);
99 std::map<std::string, NDArray> args;
100 std::map<std::string, NDArray> auxs;
101 for (auto iter : params) {
102 std::string type = iter.first.substr(0, 4);
103 std::string name = iter.first.substr(4);
104 if (type == "arg:")
105 args[name] = iter.second.Copy(ctx);
106 else if (type == "aux:")
107 auxs[name] = iter.second.Copy(ctx);
108 else
109 continue;
110 }
111 NDArray::WaitAll();
112
113 *symbol = new_symbol;
114 *arg_params = args;
115 *aux_params = auxs;
116 }
117
EndsWith(std::string const & value,std::string const & ending)118 inline bool EndsWith(std::string const & value, std::string const & ending)
119 {
120 if (ending.size() > value.size()) return false;
121 return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
122 }
123
LoadClassNames(std::string filename)124 inline std::vector<std::string> LoadClassNames(std::string filename) {
125 std::vector<std::string> classes;
126 std::string line;
127 std::ifstream infile(filename);
128 while(infile >> line) {
129 classes.emplace_back(line);
130 }
131 return classes;
132 }
133
134 namespace viz {
135 // convert color from hsv to bgr for plotting
HSV2BGR(cv::Scalar hsv)136 inline cv::Scalar HSV2BGR(cv::Scalar hsv) {
137 cv::Mat from(1, 1, CV_32FC3, hsv);
138 cv::Mat to;
139 cv::cvtColor(from, to, cv::COLOR_HSV2BGR);
140 auto pixel = to.at<cv::Vec3f>(0, 0);
141 unsigned char b = static_cast<unsigned char>(pixel[0] * 255);
142 unsigned char g = static_cast<unsigned char>(pixel[1] * 255);
143 unsigned char r = static_cast<unsigned char>(pixel[2] * 255);
144 return cv::Scalar(b, g, r);
145 }
146
PutLabel(cv::Mat & im,const std::string label,const cv::Point & orig,cv::Scalar color)147 inline void PutLabel(cv::Mat &im, const std::string label, const cv::Point & orig, cv::Scalar color) {
148 int fontface = cv::FONT_HERSHEY_DUPLEX;
149 double scale = 0.5;
150 int thickness = 1;
151 int baseline = 0;
152 double alpha = 0.6;
153
154 cv::Size text = cv::getTextSize(label, fontface, scale, thickness, &baseline);
155 // make sure roi inside image region
156 cv::Rect blend_rect = cv::Rect(orig + cv::Point(0, baseline),
157 orig + cv::Point(text.width, -text.height)) & cv::Rect(0, 0, im.cols, im.rows);
158 cv::Mat roi = im(blend_rect);
159 cv::Mat blend(roi.size(), CV_8UC3, color);
160 // cv::rectangle(im, orig + cv::Point(0, baseline), orig + cv::Point(text.width, -text.height), CV_RGB(0, 0, 0), CV_FILLED);
161 cv::addWeighted(blend, alpha, roi, 1.0 - alpha, 0.0, roi);
162 cv::putText(im, label, orig, fontface, scale, cv::Scalar(255, 255, 255), thickness, 8);
163 }
164
165 // plot bounding boxes on raw image
PlotBbox(cv::Mat img,NDArray bboxes,NDArray scores,NDArray labels,float thresh,std::vector<std::string> class_names,std::map<int,cv::Scalar> colors,bool verbose)166 inline cv::Mat PlotBbox(cv::Mat img, NDArray bboxes, NDArray scores, NDArray labels,
167 float thresh, std::vector<std::string> class_names,
168 std::map<int, cv::Scalar> colors, bool verbose) {
169 int num = bboxes.GetShape()[1];
170 std::mt19937 eng;
171 std::uniform_real_distribution<float> rng(0, 1);
172 float hue = rng(eng);
173 bboxes.WaitToRead();
174 scores.WaitToRead();
175 labels.WaitToRead();
176 if (verbose) {
177 LOG(INFO) << "Start Ploting with visualize score threshold: " << thresh;
178 }
179 for (int i = 0; i < num; ++i) {
180 float score = scores.At(0, 0, i);
181 float label = labels.At(0, 0, i);
182 if (score < thresh) continue;
183 if (label < 0) continue;
184
185 int cls_id = static_cast<int>(label);
186 if (colors.find(cls_id) == colors.end()) {
187 // create a new color
188 int csize = static_cast<int>(class_names.size());
189 if (class_names.size() > 0) {
190 float hue = label / csize;
191 colors[cls_id] = HSV2BGR(cv::Scalar(hue * 255, 0.75, 0.95));
192 } else {
193 // generate color for this id
194 hue += 0.618033988749895; // golden ratio
195 hue = fmod(hue, 1.0);
196 colors[cls_id] = HSV2BGR(cv::Scalar(hue * 255, 0.75, 0.95));
197 }
198 }
199
200 // draw bounding box
201 auto color = colors[cls_id];
202 cv::Point pt1(bboxes.At(0, i, 0), bboxes.At(0, i, 1));
203 cv::Point pt2(bboxes.At(0, i, 2), bboxes.At(0, i, 3));
204 cv::rectangle(img, pt1, pt2, color, 2);
205
206 if (verbose) {
207 if (cls_id >= class_names.size()) {
208 LOG(INFO) << "id: " << cls_id << ", scores: " << score;
209 } else {
210 LOG(INFO) << "id: " << class_names[cls_id] << ", scores: " << score;
211 }
212
213 }
214
215 // put text
216 std::string txt;
217 if (class_names.size() > cls_id) {
218 txt += class_names[cls_id];
219 }
220 std::stringstream ss;
221 ss << std::fixed << std::setprecision(3) << score;
222 txt += " " + ss.str();
223 // cv::putText(img, txt, cv::Point(pt1.x, pt1.y - 5), , 0.6, color, 1);
224 PutLabel(img, txt, pt1, color);
225 }
226 return img;
227 }
228 } // namespace viz
229