1 //
2 // CDDL HEADER START
3 //
4 // The contents of this file are subject to the terms of the Common Development
5 // and Distribution License Version 1.0 (the "License").
6 //
7 // You can obtain a copy of the license at
8 // http://www.opensource.org/licenses/CDDL-1.0.  See the License for the
9 // specific language governing permissions and limitations under the License.
10 //
11 // When distributing Covered Code, include this CDDL HEADER in each file and
12 // include the License file in a prominent location with the name LICENSE.CDDL.
13 // If applicable, add the following below this CDDL HEADER, with the fields
14 // enclosed by brackets "[]" replaced with your own identifying information:
15 //
16 // Portions Copyright (c) [yyyy] [name of copyright owner]. All rights reserved.
17 //
18 // CDDL HEADER END
19 //
20 
21 //
22 // Copyright (c) 2019, Regents of the University of Minnesota.
23 // All rights reserved.
24 //
25 // Contributors:
26 //    Mingjian Wen
27 //
28 
29 #include "network.h"
30 
31 #define LOG_ERROR(msg) \
32   (std::cerr << "ERROR (NeuralNetwork): " << (msg) << std::endl)
33 
34 // Nothing to do at this moment
NeuralNetwork()35 NeuralNetwork::NeuralNetwork() :
36     inputSize_(0),
37     Nlayers_(0),
38     fully_connected_(false),
39     ensemble_size_(0)
40 {
41   return;
42 }
43 
~NeuralNetwork()44 NeuralNetwork::~NeuralNetwork() {}
45 
read_parameter_file(FILE * const filePointer,int desc_size)46 int NeuralNetwork::read_parameter_file(FILE * const filePointer, int desc_size)
47 {
48   int ier;
49   int endOfFileFlag = 0;
50   char nextLine[MAXLINE];
51   char errorMsg[1024];
52   char name[128];
53   int numLayers;
54   int * numNodes;
55 
56   // number of layers
57   getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
58   ier = sscanf(nextLine, "%d", &numLayers);
59   if (ier != 1)
60   {
61     sprintf(errorMsg, "unable to read number of layers from line:\n");
62     strcat(errorMsg, nextLine);
63     LOG_ERROR(errorMsg);
64     return true;
65   }
66 
67   // number of nodes in each layer
68   numNodes = new int[numLayers];
69   getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
70   ier = getXint(nextLine, numLayers, numNodes);
71   if (ier)
72   {
73     sprintf(errorMsg, "unable to read number of nodes from line:\n");
74     strcat(errorMsg, nextLine);
75     LOG_ERROR(errorMsg);
76     return true;
77   }
78   set_nn_structure(desc_size, numLayers, numNodes);
79 
80   // activation function
81   getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
82   ier = sscanf(nextLine, "%s", name);
83   if (ier != 1)
84   {
85     sprintf(errorMsg, "unable to read `activation function` from line:\n");
86     strcat(errorMsg, nextLine);
87     LOG_ERROR(errorMsg);
88     return true;
89   }
90 
91   // register activation function
92   lowerCase(name);
93   if (strcmp(name, "sigmoid") != 0 && strcmp(name, "tanh") != 0
94       && strcmp(name, "relu") != 0 && strcmp(name, "elu") != 0)
95   {
96     sprintf(errorMsg,
97             "unsupported activation function. Expecting `sigmoid`, `tanh` "
98             " `relu` or `elu`, given %s.\n",
99             name);
100     LOG_ERROR(errorMsg);
101     return true;
102   }
103   set_activation(name);
104 
105   // keep probability
106   double * keep_prob;
107   AllocateAndInitialize1DArray<double>(keep_prob, numLayers);
108 
109   getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
110   ier = getXdouble(nextLine, numLayers, keep_prob);
111   if (ier)
112   {
113     sprintf(errorMsg, "unable to read `keep probability` from line:\n");
114     strcat(errorMsg, nextLine);
115     LOG_ERROR(errorMsg);
116     return true;
117   }
118   set_keep_prob(keep_prob);
119   Deallocate1DArray(keep_prob);
120 
121   // weights and biases
122   for (int i = 0; i < numLayers; i++)
123   {
124     double ** weight;
125     double * bias;
126     int row;
127     int col;
128 
129     if (i == 0)
130     {
131       row = desc_size;
132       col = numNodes[i];
133     }
134     else
135     {
136       row = numNodes[i - 1];
137       col = numNodes[i];
138     }
139 
140     AllocateAndInitialize2DArray<double>(weight, row, col);
141     for (int j = 0; j < row; j++)
142     {
143       getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
144       ier = getXdouble(nextLine, col, weight[j]);
145       if (ier)
146       {
147         sprintf(errorMsg, "unable to read `weight` from line:\n");
148         strcat(errorMsg, nextLine);
149         LOG_ERROR(errorMsg);
150         return true;
151       }
152     }
153 
154     // bias
155     AllocateAndInitialize1DArray<double>(bias, col);
156     getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
157     ier = getXdouble(nextLine, col, bias);
158     if (ier)
159     {
160       sprintf(errorMsg, "unable to read `bias` from line:\n");
161       strcat(errorMsg, nextLine);
162       LOG_ERROR(errorMsg);
163       return true;
164     }
165 
166     // copy to network class
167     add_weight_bias(weight, bias, i);
168 
169     Deallocate2DArray(weight);
170     Deallocate1DArray(bias);
171   }
172 
173   delete[] numNodes;
174 
175   // everything is OK
176   return false;
177 }
178 
read_dropout_file(FILE * const filePointer)179 int NeuralNetwork::read_dropout_file(FILE * const filePointer)
180 {
181   int ier;
182   int endOfFileFlag = 0;
183   char nextLine[MAXLINE];
184   char errorMsg[1024];
185 
186   getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
187   int ensemble_size;
188   ier = sscanf(nextLine, "%d", &ensemble_size);
189   if (ier != 1)
190   {
191     sprintf(errorMsg, "unable to read ensemble_size from line:\n");
192     strcat(errorMsg, nextLine);
193     LOG_ERROR(errorMsg);
194     return true;
195   }
196   set_ensemble_size(ensemble_size);
197 
198   for (int i = 0; i < ensemble_size; i++)
199   {
200     for (int j = 0; j < Nlayers_; j++)
201     {
202       int size;
203       if (j == 0) { size = inputSize_; }
204       else
205       {
206         size = layerSizes_[j - 1];
207       }
208 
209       int * row_binary = new int[size];
210       getNextDataLine(filePointer, nextLine, MAXLINE, &endOfFileFlag);
211       ier = getXint(nextLine, size, row_binary);
212       if (ier)
213       {
214         sprintf(errorMsg, "unable to read dropout binary from line:\n");
215         strcat(errorMsg, nextLine);
216         LOG_ERROR(errorMsg);
217         return true;
218       }
219       add_dropout_binary(i, j, size, row_binary);
220       delete[] row_binary;
221     }
222   }
223 
224   // everything is OK
225   return false;
226 }
227 
set_nn_structure(int size_input,int num_layers,int * layer_sizes)228 void NeuralNetwork::set_nn_structure(int size_input,
229                                      int num_layers,
230                                      int * layer_sizes)
231 {
232   inputSize_ = size_input;
233   Nlayers_ = num_layers;
234   for (int i = 0; i < Nlayers_; i++) { layerSizes_.push_back(layer_sizes[i]); }
235 
236   weights_.resize(Nlayers_);
237   biases_.resize(Nlayers_);
238   preactiv_.resize(Nlayers_);
239   keep_prob_.resize(Nlayers_);
240   keep_prob_binary_.resize(Nlayers_);
241 }
242 
set_activation(char * name)243 void NeuralNetwork::set_activation(char * name)
244 {
245   if (strcmp(name, "sigmoid") == 0)
246   {
247     activFunc_ = &sigmoid;
248     activFuncDeriv_ = &sigmoid_derivative;
249   }
250   else if (strcmp(name, "tanh") == 0)
251   {
252     activFunc_ = &tanh;
253     activFuncDeriv_ = &tanh_derivative;
254   }
255   else if (strcmp(name, "relu") == 0)
256   {
257     activFunc_ = &relu;
258     activFuncDeriv_ = &relu_derivative;
259   }
260   else if (strcmp(name, "elu") == 0)
261   {
262     activFunc_ = &elu;
263     activFuncDeriv_ = &elu_derivative;
264   }
265 }
266 
set_keep_prob(double * keep_prob)267 void NeuralNetwork::set_keep_prob(double * keep_prob)
268 {
269   for (int i = 0; i < Nlayers_; i++) { keep_prob_[i] = keep_prob[i]; }
270 }
271 
add_weight_bias(double ** weight,double * bias,int layer)272 void NeuralNetwork::add_weight_bias(double ** weight, double * bias, int layer)
273 {
274   int rows;
275   int cols;
276 
277   if (layer == 0)
278   {
279     rows = inputSize_;
280     cols = layerSizes_[layer];
281   }
282   else
283   {
284     rows = layerSizes_[layer - 1];
285     cols = layerSizes_[layer];
286   }
287 
288   // copy data
289   RowMatrixXd w(rows, cols);
290   RowVectorXd b(cols);
291   for (int i = 0; i < rows; i++)
292   {
293     for (int j = 0; j < cols; j++) { w(i, j) = weight[i][j]; }
294   }
295   for (int j = 0; j < cols; j++) { b(j) = bias[j]; }
296 
297   // store in vector
298   weights_[layer] = w;
299   biases_[layer] = b;
300 }
301 
set_ensemble_size(int repeat)302 void NeuralNetwork::set_ensemble_size(int repeat)
303 {
304   ensemble_size_ = repeat;
305   row_binary_.resize(repeat);
306   for (size_t i = 0; i < row_binary_.size(); i++)
307   { row_binary_[i].resize(Nlayers_); }
308 }
309 
add_dropout_binary(int ensemble_index,int layer_index,int size,int * binary)310 void NeuralNetwork::add_dropout_binary(int ensemble_index,
311                                        int layer_index,
312                                        int size,
313                                        int * binary)
314 {
315   RowMatrixXd data(1, size);
316   for (int i = 0; i < size; i++) { data(0, i) = binary[i]; }
317   row_binary_[ensemble_index][layer_index] = data;
318 }
319 
forward(double * zeta,const int rows,const int cols,int const ensemble_index)320 void NeuralNetwork::forward(double * zeta,
321                             const int rows,
322                             const int cols,
323                             int const ensemble_index)
324 {
325   RowMatrixXd act;
326 
327   // map raw C++ data into Matrix data
328   // see: https://eigen.tuxfamily.org/dox/group__TutorialMapClass.html
329   Map<RowMatrixXd> activation(zeta, rows, cols);
330   act = activation;
331 
332   for (int i = 0; i < Nlayers_; i++)
333   {
334     // apply dropout
335     if (fully_connected_ == false && keep_prob_[i] < 1 - 1e-10)
336     {
337       act = dropout_(act, i, ensemble_index);  // no aliasing will occur for act
338     }
339 
340     preactiv_[i] = (act * weights_[i]).rowwise() + biases_[i];
341 
342     if (i == Nlayers_ - 1)
343     {  // output layer (no activation function applied)
344       activOutputLayer_ = preactiv_[i];
345     }
346     else
347     {
348       act = activFunc_(preactiv_[i]);
349     }
350   }
351 }
352 
backward()353 void NeuralNetwork::backward()
354 {
355   // our cost (energy E) is the sum of activations at output layer, and no
356   // activation function is employed in the output layer
357   int rows = preactiv_[Nlayers_ - 1].rows();
358   int cols = preactiv_[Nlayers_ - 1].cols();
359 
360   // error at output layer
361   RowMatrixXd delta = RowMatrixXd::Constant(rows, cols, 1.0);
362 
363   for (int i = Nlayers_ - 2; i >= 0; i--)
364   {
365     // eval() is used to prevent aliasing since delta is both lvalue and rvalue.
366     delta = (delta * weights_[i + 1].transpose())
367                 .eval()
368                 .cwiseProduct(activFuncDeriv_(preactiv_[i]));
369 
370     // apply dropout
371     if (fully_connected_ == false && keep_prob_[i + 1] < 1 - 1e-10)
372     {
373       delta = delta.cwiseProduct(keep_prob_binary_[i + 1]) / keep_prob_[i + 1];
374     }
375   }
376 
377   gradInput_ = delta * weights_[0].transpose();
378   // apply dropout
379   if (fully_connected_ == false && keep_prob_[0] < 1 - 1e-10)
380   {
381     gradInput_ = gradInput_.cwiseProduct(keep_prob_binary_[0]) / keep_prob_[0];
382   }
383 }
384 
385 // dropout
dropout_(RowMatrixXd const & x,int layer,int const ensemble_index)386 RowMatrixXd NeuralNetwork::dropout_(RowMatrixXd const & x,
387                                     int layer,
388                                     int const ensemble_index)
389 {
390   RowMatrixXd y;
391   double keep_prob = keep_prob_[layer];
392 
393   if (fully_connected_ == false && keep_prob < 1 - 1e-10)
394   {
395     //// do it within model
396     //// uniform [-1, 1]
397     // RowMatrixXd random = RowMatrixXd::Random(1, x.cols());
398     //// uniform [keep_prob, 1+keep_prob] .floor()
399     // random =( (random/2.).array() + 0.5 + keep_prob ).floor();
400 
401     // read in from file
402     RowMatrixXd random = row_binary_[ensemble_index][layer];
403 
404     // each row is the same (each atom is treated the same)
405     keep_prob_binary_[layer] = random.replicate(x.rows(), 1);
406 
407     y = (x / keep_prob).cwiseProduct(keep_prob_binary_[layer]);
408   }
409   else
410   {
411     y = x;
412   }
413 
414   return y;
415 }
416 
417 //*****************************************************************************
418 // activation functions and derivatives
419 //*****************************************************************************
420 
relu(RowMatrixXd const & x)421 RowMatrixXd relu(RowMatrixXd const & x) { return x.cwiseMax(0.0); }
422 
relu_derivative(RowMatrixXd const & x)423 RowMatrixXd relu_derivative(RowMatrixXd const & x)
424 {
425   RowMatrixXd deriv(x.rows(), x.cols());
426 
427   for (int i = 0; i < x.rows(); i++)
428   {
429     for (int j = 0; j < x.cols(); j++)
430     {
431       if (x(i, j) < 0.) { deriv(i, j) = 0.; }
432       else
433       {
434         deriv(i, j) = 1.;
435       }
436     }
437   }
438   return deriv;
439 }
440 
elu(RowMatrixXd const & x)441 RowMatrixXd elu(RowMatrixXd const & x)
442 {
443   double alpha = 1.0;
444   RowMatrixXd e(x.rows(), x.cols());
445 
446   for (int i = 0; i < x.rows(); i++)
447   {
448     for (int j = 0; j < x.cols(); j++)
449     {
450       if (x(i, j) < 0.) { e(i, j) = alpha * (exp(x(i, j)) - 1); }
451       else
452       {
453         e(i, j) = x(i, j);
454       }
455     }
456   }
457   return e;
458 }
459 
elu_derivative(RowMatrixXd const & x)460 RowMatrixXd elu_derivative(RowMatrixXd const & x)
461 {
462   double alpha = 1.0;
463   RowMatrixXd deriv(x.rows(), x.cols());
464 
465   for (int i = 0; i < x.rows(); i++)
466   {
467     for (int j = 0; j < x.cols(); j++)
468     {
469       if (x(i, j) < 0.) { deriv(i, j) = alpha * exp(x(i, j)); }
470       else
471       {
472         deriv(i, j) = 1.;
473       }
474     }
475   }
476   return deriv;
477 }
478 
tanh(RowMatrixXd const & x)479 RowMatrixXd tanh(RowMatrixXd const & x) { return (x.array().tanh()).matrix(); }
480 
tanh_derivative(RowMatrixXd const & x)481 RowMatrixXd tanh_derivative(RowMatrixXd const & x)
482 {
483   return (1.0 - x.array().tanh().square()).matrix();
484 }
485 
sigmoid(RowMatrixXd const & x)486 RowMatrixXd sigmoid(RowMatrixXd const & x)
487 {
488   return (1.0 / (1.0 + (-x).array().exp())).matrix();
489 }
490 
sigmoid_derivative(RowMatrixXd const & x)491 RowMatrixXd sigmoid_derivative(RowMatrixXd const & x)
492 {
493   RowMatrixXd s = sigmoid(x);
494 
495   return (s.array() * (1.0 - s.array())).matrix();
496 }
497