1 #pragma once
2
3 #include <iostream>
4 #include <experimental/filesystem>
5 #include <fstream>
6 #include <random>
7
8 #define EIGEN_DONT_PARALLELIZE 1
9
10 #include <Eigen/Dense>
11
12 // Function: read_mnist_label
read_mnist_label(const std::experimental::filesystem::path & path)13 inline auto read_mnist_label(const std::experimental::filesystem::path& path) {
14
15 // Helper lambda.
16 auto reverse_int = [](int i) {
17 unsigned char c1, c2, c3, c4;
18 c1 = i & 255;
19 c2 = (i >> 8) & 255;
20 c3 = (i >> 16) & 255;
21 c4 = (i >> 24) & 255;
22 return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4;
23 };
24
25 // Read the image.
26 std::ifstream ifs(path, std::ios::binary);
27
28 if(!ifs) {
29 assert(false);
30 }
31
32 int magic_number = 0;
33 int num_imgs = 0;
34
35 ifs.read((char*)&magic_number, sizeof(magic_number));
36 magic_number = reverse_int(magic_number);
37
38 ifs.read((char*)&num_imgs, sizeof(num_imgs));
39 num_imgs = reverse_int(num_imgs);
40
41 Eigen::VectorXi labels(num_imgs);
42 for (int i = 0; i<num_imgs; ++i) {
43 unsigned char temp = 0; // must use unsigned
44 ifs.read((char*)&temp, sizeof(temp));
45 labels[i] = static_cast<int>(temp);
46 }
47 return labels;
48 }
49
50
read_mnist_image(const std::experimental::filesystem::path & path)51 inline auto read_mnist_image(const std::experimental::filesystem::path& path) {
52
53 // Helper lambda.
54 auto reverse_int = [] (int i) {
55 unsigned char c1, c2, c3, c4;
56 c1 = i & 255;
57 c2 = (i >> 8) & 255;
58 c3 = (i >> 16) & 255;
59 c4 = (i >> 24) & 255;
60 return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4;
61 };
62
63 // Read the image.
64 std::ifstream ifs(path, std::ios::binary);
65
66 if(!ifs) {
67 assert(false);
68 }
69
70 int magic_number = 0;
71 int num_imgs = 0;
72 int num_rows = 0;
73 int num_cols = 0;
74
75 ifs.read((char*)&magic_number, sizeof(magic_number));
76 magic_number = reverse_int(magic_number);
77
78 ifs.read((char*)&num_imgs, sizeof(num_imgs));
79 num_imgs = reverse_int(num_imgs);
80
81 ifs.read((char*)&num_rows, sizeof(num_rows));
82 num_rows = reverse_int(num_rows);
83
84 ifs.read((char*)&num_cols, sizeof(num_cols));
85 num_cols = reverse_int(num_cols);
86
87 Eigen::MatrixXf images(num_imgs, num_rows*num_cols);
88
89 for(int i = 0; i < num_imgs; ++i) {
90 for(int r = 0; r < num_rows; ++r) {
91 for(int c = 0; c < num_cols; ++c) {
92 unsigned char p = 0; // must use unsigned
93 ifs.read((char*)&p, sizeof(p));
94 images(i, r*num_cols + c) = static_cast<float>(p);
95 }
96 }
97 }
98
99 for(int i=0; i<images.rows(); i++) {
100 for(int j=0; j<images.cols(); j++) {
101 images(i, j) /= 255.0;
102 }
103 }
104 return images;
105 }
106
time_diff(std::chrono::time_point<std::chrono::high_resolution_clock> & t1,std::chrono::time_point<std::chrono::high_resolution_clock> & t2)107 inline auto time_diff(
108 std::chrono::time_point<std::chrono::high_resolution_clock> &t1,
109 std::chrono::time_point<std::chrono::high_resolution_clock> &t2
110 ) {
111 return std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
112 }
113
114
115
116 // ------------------------------------------------------------------------------------------------
117
118
119 enum class Activation {
120 NONE,
121 RELU,
122 SIGMOID
123 };
124
125 // Procedure: sigmoid
sigmoid(Eigen::MatrixXf & x)126 inline void sigmoid(Eigen::MatrixXf& x) {
127 x = ((1.0f + (-x).array().exp()).inverse()).matrix();
128 }
129
130 // Procedure: relu
relu(Eigen::MatrixXf & x)131 inline void relu(Eigen::MatrixXf& x) {
132 for(int j=0; j<x.cols(); ++j) {
133 for(int i=0; i<x.rows(); ++i) {
134 if(x(i, j) <= 0.0f) {
135 x(i, j) = 0.0f;
136 }
137 }
138 }
139 }
140
activate(Eigen::MatrixXf & mat,Activation act)141 inline void activate(Eigen::MatrixXf& mat, Activation act) {
142 switch(act) {
143 case Activation::NONE: return;
144 case Activation::SIGMOID: sigmoid(mat); return;
145 case Activation::RELU: relu(mat); return;
146 };
147 }
148
149
150 // Function: drelu
drelu(Eigen::MatrixXf & x)151 inline void drelu(Eigen::MatrixXf& x) {
152 for(int j=0; j<x.cols(); ++j) {
153 for(int i=0; i<x.rows(); ++i) {
154 x(i, j) = x(i, j) > 0.0f ? 1.0f : 0.0f;
155 }
156 }
157 }
158
159 // Function: dsigmoid
dsigmoid(Eigen::MatrixXf & x)160 inline void dsigmoid(Eigen::MatrixXf& x) {
161 x = x.array() * (1 - x.array());
162 }
163
deactivate(Eigen::MatrixXf & mat,Activation act)164 inline void deactivate(Eigen::MatrixXf& mat, Activation act) {
165 switch(act) {
166 case Activation::NONE: mat = Eigen::MatrixXf::Ones(mat.rows(), mat.cols()); return;
167 case Activation::SIGMOID: dsigmoid(mat); return ;
168 case Activation::RELU: drelu(mat); return;
169 };
170 }
171
172 struct MNIST {
173
174 // Ctor
MNISTMNIST175 MNIST() {
176 std::string path = std::experimental::filesystem::current_path();
177 path = path.substr(0, path.rfind("taskflow") + 8);
178 path += "/benchmarks/mnist/";
179
180 images = read_mnist_image(path + "./train-images.data");
181 labels = read_mnist_label(path + "./train-labels.data");
182
183 test_images = read_mnist_image(path + "./t10k-images-idx3-ubyte");
184 test_labels = read_mnist_label(path + "./t10k-labels-idx1-ubyte");
185 }
186
add_layerMNIST187 void add_layer(size_t in_degree, size_t out_degree, Activation act) {
188 acts.emplace_back(act);
189 Ys.emplace_back();
190 Ys.back().resize(batch_size, out_degree);
191 Ws.push_back(Eigen::MatrixXf::Random(in_degree, out_degree));
192 Bs.push_back(Eigen::MatrixXf::Random(1, out_degree));
193
194 dW.emplace_back();
195 dW.back().resize(in_degree, out_degree);
196 dB.emplace_back();
197 dB.back().resize(1, out_degree);
198 }
199
forwardMNIST200 void forward(size_t layer, const Eigen::MatrixXf& mat) {
201 Ys[layer] = mat * Ws[layer] + Bs[layer].replicate(mat.rows(), 1);
202 activate(Ys[layer], acts[layer]);
203 }
204
lossMNIST205 void loss(const Eigen::VectorXi& labels) {
206 delta = Ys.back();
207 delta = (delta - delta.rowwise().maxCoeff().replicate(1, delta.cols())).array().exp().matrix();
208 delta = delta.cwiseQuotient(delta.rowwise().sum().replicate(1, delta.cols()));
209 for(size_t i=beg_row, j=0; j<batch_size; i++, j++) {
210 delta(j, labels[i]) -= 1.0;
211 }
212 }
213
backwardMNIST214 void backward(size_t layer, const Eigen::MatrixXf& Xin) {
215 deactivate(Ys[layer], acts[layer]);
216 delta = delta.cwiseProduct(Ys[layer]);
217 dB[layer] = delta.colwise().sum();
218 dW[layer] = Xin * delta;
219
220 if(layer > 0) {
221 delta = delta * Ws[layer].transpose();
222 }
223 }
224
updateMNIST225 void update(size_t layer) {
226 Ws[layer] -= lrate*(dW[layer] + decay*Ws[layer]);
227 Bs[layer] -= lrate*(dB[layer] + decay*Bs[layer]);
228 }
229
shuffleMNIST230 void shuffle(Eigen::MatrixXf& mat, Eigen::VectorXi& vec, const size_t row_num) {
231
232 static thread_local std::mt19937 gen(0);
233
234 Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic> p(row_num);
235 p.setIdentity();
236 std::shuffle(p.indices().data(), p.indices().data() + p.indices().size(), gen);
237
238 mat = p * mat;
239 vec = p * vec;
240 }
241
validateMNIST242 void validate() {
243 Eigen::MatrixXf res = test_images;
244 auto t1 = std::chrono::high_resolution_clock::now();
245 for(size_t i=0; i<acts.size(); i++) {
246 res = res * Ws[i] + Bs[i].replicate(res.rows(), 1);
247 if(acts[i] == Activation::RELU) {
248 relu(res);
249 }
250 else if(acts[i] == Activation::SIGMOID) {
251 sigmoid(res);
252 }
253 }
254 auto t2 = std::chrono::high_resolution_clock::now();
255 std::cout << "Infer runtime: " << time_diff(t1, t2) << " ms\n";
256
257 size_t correct_num {0};
258 for(int k=0; k<res.rows(); k++) {
259 int pred ;
260 res.row(k).maxCoeff(&pred);
261 if(pred == test_labels[k]) {
262 correct_num ++;
263 }
264 }
265 std::cout << "Accuracy: " << correct_num << '/' << res.rows() << '\n';
266 }
267
268
269 // Parameter functions ------------------------------------------------------
epoch_numMNIST270 auto& epoch_num(unsigned e) {
271 epoch = e;
272 return *this;
273 }
batchMNIST274 auto& batch(size_t b) {
275 batch_size = b;
276 assert(images.rows()%batch_size == 0);
277 return *this;
278 }
learning_rateMNIST279 auto& learning_rate(float l) {
280 lrate = l;
281 return *this;
282 }
283
284 std::vector<Eigen::MatrixXf> Ys;
285 std::vector<Eigen::MatrixXf> Ws;
286 std::vector<Eigen::MatrixXf> Bs;
287 std::vector<Eigen::MatrixXf> dW;
288 std::vector<Eigen::MatrixXf> dB;
289
290 std::vector<Activation> acts;
291
292 // Training images # = 60000 x 784 (28 x 28)
293 Eigen::MatrixXf images;
294 Eigen::VectorXi labels;
295 Eigen::MatrixXf delta;
296
297 // Testing images # = 10000 x 784 (28 x 28)
298 Eigen::MatrixXf test_images;
299 Eigen::VectorXi test_labels;
300
301 int beg_row {0};
302
303 float lrate {0.01f};
304 float decay {0.01f};
305
306 unsigned epoch {0};
307 size_t batch_size {1};
308 };
309
310
forward_task(MNIST & D,size_t iter,size_t e,std::vector<Eigen::MatrixXf> & mats,std::vector<Eigen::VectorXi> & vecs)311 inline void forward_task(MNIST& D, size_t iter, size_t e,
312 std::vector<Eigen::MatrixXf>& mats,
313 std::vector<Eigen::VectorXi>& vecs) {
314 if(iter != 0) {
315 D.beg_row += D.batch_size;
316 if(D.beg_row >= D.images.rows()) {
317 D.beg_row = 0;
318 }
319 }
320 for(size_t i=0; i<D.acts.size(); i++) {
321 if(i == 0){
322 D.forward(i, mats[e].middleRows(D.beg_row, D.batch_size));
323 }
324 else {
325 D.forward(i, D.Ys[i-1]);
326 }
327 }
328
329 D.loss(vecs[e]);
330 }
331
backward_task(MNIST & D,size_t i,size_t e,std::vector<Eigen::MatrixXf> & mats)332 inline void backward_task(MNIST& D, size_t i, size_t e, std::vector<Eigen::MatrixXf>& mats) {
333 if(i > 0) {
334 D.backward(i, D.Ys[i-1].transpose());
335 }
336 else {
337 D.backward(i, mats[e].middleRows(D.beg_row, D.batch_size).transpose());
338 }
339 }
340
341
build_dnn(unsigned epoch)342 inline auto build_dnn(unsigned epoch) {
343 MNIST dnn;
344 dnn.epoch_num(epoch).batch(100).learning_rate(0.001);
345 dnn.add_layer(784, 64, Activation::RELU);
346 dnn.add_layer(64, 32, Activation::RELU);
347 dnn.add_layer(32, 16, Activation::RELU);
348 dnn.add_layer(16, 8, Activation::RELU);
349 dnn.add_layer(8, 10, Activation::NONE);
350 return dnn;
351 }
352
353 void run_tbb(MNIST&, unsigned);
354 void run_taskflow(MNIST&, unsigned);
355 void run_omp(MNIST&, unsigned);
356 void run_sequential(MNIST&, unsigned);
357
358
359
360