1 //
2 // CDDL HEADER START
3 //
4 // The contents of this file are subject to the terms of the Common Development
5 // and Distribution License Version 1.0 (the "License").
6 //
7 // You can obtain a copy of the license at
8 // http://www.opensource.org/licenses/CDDL-1.0. See the License for the
9 // specific language governing permissions and limitations under the License.
10 //
11 // When distributing Covered Code, include this CDDL HEADER in each file and
12 // include the License file in a prominent location with the name LICENSE.CDDL.
13 // If applicable, add the following below this CDDL HEADER, with the fields
14 // enclosed by brackets "[]" replaced with your own identifying information:
15 //
16 // Portions Copyright (c) [yyyy] [name of copyright owner]. All rights reserved.
17 //
18 // CDDL HEADER END
19 //
20
21 //
22 // Copyright (c) 2019, Regents of the University of Minnesota.
23 // All rights reserved.
24 //
25 // Contributors:
26 // Mingjian Wen
27 //
28
29 #include "network.h"
30
31 #define LOG_ERROR(msg) \
32 (std::cerr << "ERROR (NeuralNetwork): " << (msg) << std::endl)
33
34 // Nothing to do at this moment
NeuralNetwork()35 NeuralNetwork::NeuralNetwork() :
36 inputSize_(0),
37 Nlayers_(0),
38 fully_connected_(false),
39 ensemble_size_(0)
40 {
41 return;
42 }
43
~NeuralNetwork()44 NeuralNetwork::~NeuralNetwork() {}
45
read_parameter_file(FILE * const filePointer,int desc_size)46 int NeuralNetwork::read_parameter_file(FILE * const filePointer, int desc_size)
47 {
48 int ier;
49 int endOfFileFlag = 0;
50 char nextLine[MAXLINE];
51 char errorMsg[1024];
52 char name[128];
53 int numLayers;
54 int * numNodes;
55
56 // number of layers
57 getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
58 ier = sscanf(nextLine, "%d", &numLayers);
59 if (ier != 1)
60 {
61 sprintf(errorMsg, "unable to read number of layers from line:\n");
62 strcat(errorMsg, nextLine);
63 LOG_ERROR(errorMsg);
64 return true;
65 }
66
67 // number of nodes in each layer
68 numNodes = new int[numLayers];
69 getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
70 ier = getXint(nextLine, numLayers, numNodes);
71 if (ier)
72 {
73 sprintf(errorMsg, "unable to read number of nodes from line:\n");
74 strcat(errorMsg, nextLine);
75 LOG_ERROR(errorMsg);
76 return true;
77 }
78 set_nn_structure(desc_size, numLayers, numNodes);
79
80 // activation function
81 getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
82 ier = sscanf(nextLine, "%s", name);
83 if (ier != 1)
84 {
85 sprintf(errorMsg, "unable to read `activation function` from line:\n");
86 strcat(errorMsg, nextLine);
87 LOG_ERROR(errorMsg);
88 return true;
89 }
90
91 // register activation function
92 lowerCase(name);
93 if (strcmp(name, "sigmoid") != 0 && strcmp(name, "tanh") != 0
94 && strcmp(name, "relu") != 0 && strcmp(name, "elu") != 0)
95 {
96 sprintf(errorMsg,
97 "unsupported activation function. Expecting `sigmoid`, `tanh` "
98 " `relu` or `elu`, given %s.\n",
99 name);
100 LOG_ERROR(errorMsg);
101 return true;
102 }
103 set_activation(name);
104
105 // keep probability
106 double * keep_prob;
107 AllocateAndInitialize1DArray<double>(keep_prob, numLayers);
108
109 getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
110 ier = getXdouble(nextLine, numLayers, keep_prob);
111 if (ier)
112 {
113 sprintf(errorMsg, "unable to read `keep probability` from line:\n");
114 strcat(errorMsg, nextLine);
115 LOG_ERROR(errorMsg);
116 return true;
117 }
118 set_keep_prob(keep_prob);
119 Deallocate1DArray(keep_prob);
120
121 // weights and biases
122 for (int i = 0; i < numLayers; i++)
123 {
124 double ** weight;
125 double * bias;
126 int row;
127 int col;
128
129 if (i == 0)
130 {
131 row = desc_size;
132 col = numNodes[i];
133 }
134 else
135 {
136 row = numNodes[i - 1];
137 col = numNodes[i];
138 }
139
140 AllocateAndInitialize2DArray<double>(weight, row, col);
141 for (int j = 0; j < row; j++)
142 {
143 getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
144 ier = getXdouble(nextLine, col, weight[j]);
145 if (ier)
146 {
147 sprintf(errorMsg, "unable to read `weight` from line:\n");
148 strcat(errorMsg, nextLine);
149 LOG_ERROR(errorMsg);
150 return true;
151 }
152 }
153
154 // bias
155 AllocateAndInitialize1DArray<double>(bias, col);
156 getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
157 ier = getXdouble(nextLine, col, bias);
158 if (ier)
159 {
160 sprintf(errorMsg, "unable to read `bias` from line:\n");
161 strcat(errorMsg, nextLine);
162 LOG_ERROR(errorMsg);
163 return true;
164 }
165
166 // copy to network class
167 add_weight_bias(weight, bias, i);
168
169 Deallocate2DArray(weight);
170 Deallocate1DArray(bias);
171 }
172
173 delete[] numNodes;
174
175 // everything is OK
176 return false;
177 }
178
read_dropout_file(FILE * const filePointer)179 int NeuralNetwork::read_dropout_file(FILE * const filePointer)
180 {
181 int ier;
182 int endOfFileFlag = 0;
183 char nextLine[MAXLINE];
184 char errorMsg[1024];
185
186 getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
187 int ensemble_size;
188 ier = sscanf(nextLine, "%d", &ensemble_size);
189 if (ier != 1)
190 {
191 sprintf(errorMsg, "unable to read ensemble_size from line:\n");
192 strcat(errorMsg, nextLine);
193 LOG_ERROR(errorMsg);
194 return true;
195 }
196 set_ensemble_size(ensemble_size);
197
198 for (int i = 0; i < ensemble_size; i++)
199 {
200 for (int j = 0; j < Nlayers_; j++)
201 {
202 int size;
203 if (j == 0) { size = inputSize_; }
204 else
205 {
206 size = layerSizes_[j - 1];
207 }
208
209 int * row_binary = new int[size];
210 getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
211 ier = getXint(nextLine, size, row_binary);
212 if (ier)
213 {
214 sprintf(errorMsg, "unable to read dropout binary from line:\n");
215 strcat(errorMsg, nextLine);
216 LOG_ERROR(errorMsg);
217 return true;
218 }
219 add_dropout_binary(i, j, size, row_binary);
220 delete[] row_binary;
221 }
222 }
223
224 // everything is OK
225 return false;
226 }
227
set_nn_structure(int size_input,int num_layers,int * layer_sizes)228 void NeuralNetwork::set_nn_structure(int size_input,
229 int num_layers,
230 int * layer_sizes)
231 {
232 inputSize_ = size_input;
233 Nlayers_ = num_layers;
234 for (int i = 0; i < Nlayers_; i++) { layerSizes_.push_back(layer_sizes[i]); }
235
236 weights_.resize(Nlayers_);
237 biases_.resize(Nlayers_);
238 preactiv_.resize(Nlayers_);
239 keep_prob_.resize(Nlayers_);
240 keep_prob_binary_.resize(Nlayers_);
241 }
242
set_activation(char * name)243 void NeuralNetwork::set_activation(char * name)
244 {
245 if (strcmp(name, "sigmoid") == 0)
246 {
247 activFunc_ = &sigmoid;
248 activFuncDeriv_ = &sigmoid_derivative;
249 }
250 else if (strcmp(name, "tanh") == 0)
251 {
252 activFunc_ = &tanh;
253 activFuncDeriv_ = &tanh_derivative;
254 }
255 else if (strcmp(name, "relu") == 0)
256 {
257 activFunc_ = &relu;
258 activFuncDeriv_ = &relu_derivative;
259 }
260 else if (strcmp(name, "elu") == 0)
261 {
262 activFunc_ = &elu;
263 activFuncDeriv_ = &elu_derivative;
264 }
265 }
266
set_keep_prob(double * keep_prob)267 void NeuralNetwork::set_keep_prob(double * keep_prob)
268 {
269 for (int i = 0; i < Nlayers_; i++) { keep_prob_[i] = keep_prob[i]; }
270 }
271
add_weight_bias(double ** weight,double * bias,int layer)272 void NeuralNetwork::add_weight_bias(double ** weight, double * bias, int layer)
273 {
274 int rows;
275 int cols;
276
277 if (layer == 0)
278 {
279 rows = inputSize_;
280 cols = layerSizes_[layer];
281 }
282 else
283 {
284 rows = layerSizes_[layer - 1];
285 cols = layerSizes_[layer];
286 }
287
288 // copy data
289 RowMatrixXd w(rows, cols);
290 RowVectorXd b(cols);
291 for (int i = 0; i < rows; i++)
292 {
293 for (int j = 0; j < cols; j++) { w(i, j) = weight[i][j]; }
294 }
295 for (int j = 0; j < cols; j++) { b(j) = bias[j]; }
296
297 // store in vector
298 weights_[layer] = w;
299 biases_[layer] = b;
300 }
301
set_ensemble_size(int repeat)302 void NeuralNetwork::set_ensemble_size(int repeat)
303 {
304 ensemble_size_ = repeat;
305 row_binary_.resize(repeat);
306 for (size_t i = 0; i < row_binary_.size(); i++)
307 { row_binary_[i].resize(Nlayers_); }
308 }
309
add_dropout_binary(int ensemble_index,int layer_index,int size,int * binary)310 void NeuralNetwork::add_dropout_binary(int ensemble_index,
311 int layer_index,
312 int size,
313 int * binary)
314 {
315 RowMatrixXd data(1, size);
316 for (int i = 0; i < size; i++) { data(0, i) = binary[i]; }
317 row_binary_[ensemble_index][layer_index] = data;
318 }
319
forward(double * zeta,const int rows,const int cols,int const ensemble_index)320 void NeuralNetwork::forward(double * zeta,
321 const int rows,
322 const int cols,
323 int const ensemble_index)
324 {
325 RowMatrixXd act;
326
327 // map raw C++ data into Matrix data
328 // see: https://eigen.tuxfamily.org/dox/group__TutorialMapClass.html
329 Map<RowMatrixXd> activation(zeta, rows, cols);
330 act = activation;
331
332 for (int i = 0; i < Nlayers_; i++)
333 {
334 // apply dropout
335 if (fully_connected_ == false && keep_prob_[i] < 1 - 1e-10)
336 {
337 act = dropout_(act, i, ensemble_index); // no aliasing will occur for act
338 }
339
340 preactiv_[i] = (act * weights_[i]).rowwise() + biases_[i];
341
342 if (i == Nlayers_ - 1)
343 { // output layer (no activation function applied)
344 activOutputLayer_ = preactiv_[i];
345 }
346 else
347 {
348 act = activFunc_(preactiv_[i]);
349 }
350 }
351 }
352
backward()353 void NeuralNetwork::backward()
354 {
355 // our cost (energy E) is the sum of activations at output layer, and no
356 // activation function is employed in the output layer
357 int rows = preactiv_[Nlayers_ - 1].rows();
358 int cols = preactiv_[Nlayers_ - 1].cols();
359
360 // error at output layer
361 RowMatrixXd delta = RowMatrixXd::Constant(rows, cols, 1.0);
362
363 for (int i = Nlayers_ - 2; i >= 0; i--)
364 {
365 // eval() is used to prevent aliasing since delta is both lvalue and rvalue.
366 delta = (delta * weights_[i + 1].transpose())
367 .eval()
368 .cwiseProduct(activFuncDeriv_(preactiv_[i]));
369
370 // apply dropout
371 if (fully_connected_ == false && keep_prob_[i + 1] < 1 - 1e-10)
372 {
373 delta = delta.cwiseProduct(keep_prob_binary_[i + 1]) / keep_prob_[i + 1];
374 }
375 }
376
377 gradInput_ = delta * weights_[0].transpose();
378 // apply dropout
379 if (fully_connected_ == false && keep_prob_[0] < 1 - 1e-10)
380 {
381 gradInput_ = gradInput_.cwiseProduct(keep_prob_binary_[0]) / keep_prob_[0];
382 }
383 }
384
385 // dropout
dropout_(RowMatrixXd const & x,int layer,int const ensemble_index)386 RowMatrixXd NeuralNetwork::dropout_(RowMatrixXd const & x,
387 int layer,
388 int const ensemble_index)
389 {
390 RowMatrixXd y;
391 double keep_prob = keep_prob_[layer];
392
393 if (fully_connected_ == false && keep_prob < 1 - 1e-10)
394 {
395 //// do it within model
396 //// uniform [-1, 1]
397 // RowMatrixXd random = RowMatrixXd::Random(1, x.cols());
398 //// uniform [keep_prob, 1+keep_prob] .floor()
399 // random =( (random/2.).array() + 0.5 + keep_prob ).floor();
400
401 // read in from file
402 RowMatrixXd random = row_binary_[ensemble_index][layer];
403
404 // each row is the same (each atom is treated the same)
405 keep_prob_binary_[layer] = random.replicate(x.rows(), 1);
406
407 y = (x / keep_prob).cwiseProduct(keep_prob_binary_[layer]);
408 }
409 else
410 {
411 y = x;
412 }
413
414 return y;
415 }
416
417 //*****************************************************************************
418 // activation functions and derivatives
419 //*****************************************************************************
420
relu(RowMatrixXd const & x)421 RowMatrixXd relu(RowMatrixXd const & x) { return x.cwiseMax(0.0); }
422
relu_derivative(RowMatrixXd const & x)423 RowMatrixXd relu_derivative(RowMatrixXd const & x)
424 {
425 RowMatrixXd deriv(x.rows(), x.cols());
426
427 for (int i = 0; i < x.rows(); i++)
428 {
429 for (int j = 0; j < x.cols(); j++)
430 {
431 if (x(i, j) < 0.) { deriv(i, j) = 0.; }
432 else
433 {
434 deriv(i, j) = 1.;
435 }
436 }
437 }
438 return deriv;
439 }
440
elu(RowMatrixXd const & x)441 RowMatrixXd elu(RowMatrixXd const & x)
442 {
443 double alpha = 1.0;
444 RowMatrixXd e(x.rows(), x.cols());
445
446 for (int i = 0; i < x.rows(); i++)
447 {
448 for (int j = 0; j < x.cols(); j++)
449 {
450 if (x(i, j) < 0.) { e(i, j) = alpha * (exp(x(i, j)) - 1); }
451 else
452 {
453 e(i, j) = x(i, j);
454 }
455 }
456 }
457 return e;
458 }
459
elu_derivative(RowMatrixXd const & x)460 RowMatrixXd elu_derivative(RowMatrixXd const & x)
461 {
462 double alpha = 1.0;
463 RowMatrixXd deriv(x.rows(), x.cols());
464
465 for (int i = 0; i < x.rows(); i++)
466 {
467 for (int j = 0; j < x.cols(); j++)
468 {
469 if (x(i, j) < 0.) { deriv(i, j) = alpha * exp(x(i, j)); }
470 else
471 {
472 deriv(i, j) = 1.;
473 }
474 }
475 }
476 return deriv;
477 }
478
tanh(RowMatrixXd const & x)479 RowMatrixXd tanh(RowMatrixXd const & x) { return (x.array().tanh()).matrix(); }
480
tanh_derivative(RowMatrixXd const & x)481 RowMatrixXd tanh_derivative(RowMatrixXd const & x)
482 {
483 return (1.0 - x.array().tanh().square()).matrix();
484 }
485
sigmoid(RowMatrixXd const & x)486 RowMatrixXd sigmoid(RowMatrixXd const & x)
487 {
488 return (1.0 / (1.0 + (-x).array().exp())).matrix();
489 }
490
sigmoid_derivative(RowMatrixXd const & x)491 RowMatrixXd sigmoid_derivative(RowMatrixXd const & x)
492 {
493 RowMatrixXd s = sigmoid(x);
494
495 return (s.array() * (1.0 - s.array())).matrix();
496 }
497