1 #ifndef NETWORK_H_
2 #define NETWORK_H_
3 
4 #include <Eigen/Core>
5 #include <vector>
6 #include <map>
7 #include <stdexcept>
8 #include "Config.h"
9 #include "RNG.h"
10 #include "Layer.h"
11 #include "Output.h"
12 #include "Callback.h"
13 #include "Utils/Random.h"
14 #include "Utils/IO.h"
15 #include "Utils/Factory.h"
16 
17 namespace MiniDNN
18 {
19 
20 
21 ///
22 /// \defgroup Network Neural Network Model
23 ///
24 
25 ///
26 /// \ingroup Network
27 ///
28 /// This class represents a neural network model that typically consists of a
29 /// number of hidden layers and an output layer. It provides functions for
30 /// network building, model fitting, and prediction, etc.
31 ///
32 class Network
33 {
34     private:
35         typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
36         typedef Eigen::RowVectorXi IntegerVector;
37         typedef std::map<std::string, int> MetaInfo;
38 
39         RNG                 m_default_rng;      // Built-in RNG
40         RNG&                m_rng;              // Reference to the RNG provided by the user,
41                                                 // otherwise reference to m_default_rng
42         std::vector<Layer*> m_layers;           // Pointers to hidden layers
43         Output*             m_output;           // The output layer
44         Callback            m_default_callback; // Default callback function
45         Callback*           m_callback;         // Points to user-provided callback function,
46                                                 // otherwise points to m_default_callback
47 
48         // Check dimensions of layers
check_unit_sizes()49         void check_unit_sizes() const
50         {
51             const int nlayer = num_layers();
52 
53             if (nlayer <= 1)
54             {
55                 return;
56             }
57 
58             for (int i = 1; i < nlayer; i++)
59             {
60                 if (m_layers[i]->in_size() != m_layers[i - 1]->out_size())
61                 {
62                     throw std::invalid_argument("[class Network]: Unit sizes do not match");
63                 }
64             }
65         }
66 
67         // Let each layer compute its output
forward(const Matrix & input)68         void forward(const Matrix& input)
69         {
70             const int nlayer = num_layers();
71 
72             if (nlayer <= 0)
73             {
74                 return;
75             }
76 
77             // First layer
78             if (input.rows() != m_layers[0]->in_size())
79             {
80                 throw std::invalid_argument("[class Network]: Input data have incorrect dimension");
81             }
82 
83             m_layers[0]->forward(input);
84 
85             // The following layers
86             for (int i = 1; i < nlayer; i++)
87             {
88                 m_layers[i]->forward(m_layers[i - 1]->output());
89             }
90         }
91 
92         // Let each layer compute its gradients of the parameters
93         // target has two versions: Matrix and RowVectorXi
94         // The RowVectorXi version is used in classification problems where each
95         // element is a class label
96         template <typename TargetType>
backprop(const Matrix & input,const TargetType & target)97         void backprop(const Matrix& input, const TargetType& target)
98         {
99             const int nlayer = num_layers();
100 
101             if (nlayer <= 0)
102             {
103                 return;
104             }
105 
106             Layer* first_layer = m_layers[0];
107             Layer* last_layer = m_layers[nlayer - 1];
108             // Let output layer compute back-propagation data
109             m_output->check_target_data(target);
110             m_output->evaluate(last_layer->output(), target);
111 
112             // If there is only one hidden layer, "prev_layer_data" will be the input data
113             if (nlayer == 1)
114             {
115                 first_layer->backprop(input, m_output->backprop_data());
116                 return;
117             }
118 
119             // Compute gradients for the last hidden layer
120             last_layer->backprop(m_layers[nlayer - 2]->output(), m_output->backprop_data());
121 
122             // Compute gradients for all the hidden layers except for the first one and the last one
123             for (int i = nlayer - 2; i > 0; i--)
124             {
125                 m_layers[i]->backprop(m_layers[i - 1]->output(),
126                                       m_layers[i + 1]->backprop_data());
127             }
128 
129             // Compute gradients for the first layer
130             first_layer->backprop(input, m_layers[1]->backprop_data());
131         }
132 
133         // Update parameters
update(Optimizer & opt)134         void update(Optimizer& opt)
135         {
136             const int nlayer = num_layers();
137 
138             if (nlayer <= 0)
139             {
140                 return;
141             }
142 
143             for (int i = 0; i < nlayer; i++)
144             {
145                 m_layers[i]->update(opt);
146             }
147         }
148 
149         // Get the meta information of the network, used to export the NN model
get_meta_info()150         MetaInfo get_meta_info() const
151         {
152             const int nlayer = num_layers();
153             MetaInfo map;
154             map.insert(std::make_pair("Nlayers", nlayer));
155 
156             for (int i = 0; i < nlayer; i++)
157             {
158                 m_layers[i]->fill_meta_info(map, i);
159             }
160 
161             map.insert(std::make_pair("OutputLayer", internal::output_id(m_output->output_type())));
162             return map;
163         }
164 
165     public:
166         ///
167         /// Default constructor that creates an empty neural network
168         ///
Network()169         Network() :
170             m_default_rng(1),
171             m_rng(m_default_rng),
172             m_output(NULL),
173             m_default_callback(),
174             m_callback(&m_default_callback)
175         {}
176 
177         ///
178         /// Constructor with a user-provided random number generator
179         ///
180         /// \param rng A user-provided random number generator object that inherits
181         ///            from the default RNG class.
182         ///
Network(RNG & rng)183         Network(RNG& rng) :
184             m_default_rng(1),
185             m_rng(rng),
186             m_output(NULL),
187             m_default_callback(),
188             m_callback(&m_default_callback)
189         {}
190 
191         ///
192         /// Destructor that frees the added hidden layers and output layer
193         ///
~Network()194         ~Network()
195         {
196             const int nlayer = num_layers();
197 
198             for (int i = 0; i < nlayer; i++)
199             {
200                 delete m_layers[i];
201             }
202 
203             if (m_output)
204             {
205                 delete m_output;
206             }
207         }
208 
209         ///
210         /// Add a hidden layer to the neural network
211         ///
212         /// \param layer A pointer to a Layer object, typically constructed from
213         ///              layer classes such as FullyConnected and Convolutional.
214         ///              **NOTE**: the pointer will be handled and freed by the
215         ///              network object, so do not delete it manually.
216         ///
add_layer(Layer * layer)217         void add_layer(Layer* layer)
218         {
219             m_layers.push_back(layer);
220         }
221 
222         ///
223         /// Set the output layer of the neural network
224         ///
225         /// \param output A pointer to an Output object, typically constructed from
226         ///               output layer classes such as RegressionMSE and MultiClassEntropy.
227         ///               **NOTE**: the pointer will be handled and freed by the
228         ///               network object, so do not delete it manually.
229         ///
set_output(Output * output)230         void set_output(Output* output)
231         {
232             if (m_output)
233             {
234                 delete m_output;
235             }
236 
237             m_output = output;
238         }
239 
240         ///
241         /// Number of hidden layers in the network
242         ///
num_layers()243         int num_layers() const
244         {
245             return m_layers.size();
246         }
247 
248         ///
249         /// Get the list of hidden layers of the network
250         ///
get_layers()251         std::vector<const Layer*> get_layers() const
252         {
253             const int nlayer = num_layers();
254             std::vector<const Layer*> layers(nlayer);
255             std::copy(m_layers.begin(), m_layers.end(), layers.begin());
256             return layers;
257         }
258 
259         ///
260         /// Get the output layer
261         ///
get_output()262         const Output* get_output() const
263         {
264             return m_output;
265         }
266 
267         ///
268         /// Set the callback function that can be called during model fitting
269         ///
270         /// \param callback A user-provided callback function object that inherits
271         ///                 from the default Callback class.
272         ///
set_callback(Callback & callback)273         void set_callback(Callback& callback)
274         {
275             m_callback = &callback;
276         }
277         ///
278         /// Set the default silent callback function
279         ///
set_default_callback()280         void set_default_callback()
281         {
282             m_callback = &m_default_callback;
283         }
284 
285         ///
286         /// Initialize layer parameters in the network using normal distribution
287         ///
288         /// \param mu    Mean of the normal distribution.
289         /// \param sigma Standard deviation of the normal distribution.
290         /// \param seed  Set the random seed of the %RNG if `seed > 0`, otherwise
291         ///              use the current random state.
292         ///
293         void init(const Scalar& mu = Scalar(0), const Scalar& sigma = Scalar(0.01),
294                   int seed = -1)
295         {
296             check_unit_sizes();
297 
298             if (seed > 0)
299             {
300                 m_rng.seed(seed);
301             }
302 
303             const int nlayer = num_layers();
304 
305             for (int i = 0; i < nlayer; i++)
306             {
307                 m_layers[i]->init(mu, sigma, m_rng);
308             }
309         }
310 
311         ///
312         /// Get the serialized layer parameters
313         ///
get_parameters()314         std::vector< std::vector<Scalar> > get_parameters() const
315         {
316             const int nlayer = num_layers();
317             std::vector< std::vector<Scalar> > res;
318             res.reserve(nlayer);
319 
320             for (int i = 0; i < nlayer; i++)
321             {
322                 res.push_back(m_layers[i]->get_parameters());
323             }
324 
325             return res;
326         }
327 
328         ///
329         /// Set the layer parameters
330         ///
331         /// \param param Serialized layer parameters
332         ///
set_parameters(const std::vector<std::vector<Scalar>> & param)333         void set_parameters(const std::vector< std::vector<Scalar> >& param)
334         {
335             const int nlayer = num_layers();
336 
337             if (static_cast<int>(param.size()) != nlayer)
338             {
339                 throw std::invalid_argument("[class Network]: Parameter size does not match");
340             }
341 
342             for (int i = 0; i < nlayer; i++)
343             {
344                 m_layers[i]->set_parameters(param[i]);
345             }
346         }
347 
348         ///
349         /// Get the serialized derivatives of layer parameters
350         ///
get_derivatives()351         std::vector< std::vector<Scalar> > get_derivatives() const
352         {
353             const int nlayer = num_layers();
354             std::vector< std::vector<Scalar> > res;
355             res.reserve(nlayer);
356 
357             for (int i = 0; i < nlayer; i++)
358             {
359                 res.push_back(m_layers[i]->get_derivatives());
360             }
361 
362             return res;
363         }
364 
365         ///
366         /// Debugging tool to check parameter gradients
367         ///
368         template <typename TargetType>
369         void check_gradient(const Matrix& input, const TargetType& target, int npoints,
370                             int seed = -1)
371         {
372             if (seed > 0)
373             {
374                 m_rng.seed(seed);
375             }
376 
377             this->forward(input);
378             this->backprop(input, target);
379             std::vector< std::vector<Scalar> > param = this->get_parameters();
380             std::vector< std::vector<Scalar> > deriv = this->get_derivatives();
381             const Scalar eps = 1e-5;
382             const int nlayer = deriv.size();
383 
384             for (int i = 0; i < npoints; i++)
385             {
386                 // Randomly select a layer
387                 const int layer_id = int(m_rng.rand() * nlayer);
388                 // Randomly pick a parameter, note that some layers may have no parameters
389                 const int nparam = deriv[layer_id].size();
390 
391                 if (nparam < 1)
392                 {
393                     continue;
394                 }
395 
396                 const int param_id = int(m_rng.rand() * nparam);
397                 // Turbulate the parameter a little bit
398                 const Scalar old = param[layer_id][param_id];
399                 param[layer_id][param_id] -= eps;
400                 this->set_parameters(param);
401                 this->forward(input);
402                 this->backprop(input, target);
403                 const Scalar loss_pre = m_output->loss();
404                 param[layer_id][param_id] += eps * 2;
405                 this->set_parameters(param);
406                 this->forward(input);
407                 this->backprop(input, target);
408                 const Scalar loss_post = m_output->loss();
409                 const Scalar deriv_est = (loss_post - loss_pre) / eps / 2;
410                 std::cout << "[layer " << layer_id << ", param " << param_id <<
411                           "] deriv = " << deriv[layer_id][param_id] << ", est = " << deriv_est <<
412                           ", diff = " << deriv_est - deriv[layer_id][param_id] << std::endl;
413                 param[layer_id][param_id] = old;
414             }
415 
416             // Restore original parameters
417             this->set_parameters(param);
418         }
419 
420         ///
421         /// Fit the model based on the given data
422         ///
423         /// \param opt        An object that inherits from the Optimizer class, indicating the optimization algorithm to use.
424         /// \param x          The predictors. Each column is an observation.
425         /// \param y          The response variable. Each column is an observation.
426         /// \param batch_size Mini-batch size.
427         /// \param epoch      Number of epochs of training.
428         /// \param seed       Set the random seed of the %RNG if `seed > 0`, otherwise
429         ///                   use the current random state.
430         ///
431         template <typename DerivedX, typename DerivedY>
432         bool fit(Optimizer& opt, const Eigen::MatrixBase<DerivedX>& x,
433                  const Eigen::MatrixBase<DerivedY>& y,
434                  int batch_size, int epoch, int seed = -1)
435         {
436             // We do not directly use PlainObjectX since it may be row-majored if x is passed as mat.transpose()
437             // We want to force XType and YType to be column-majored
438             typedef typename Eigen::MatrixBase<DerivedX>::PlainObject PlainObjectX;
439             typedef typename Eigen::MatrixBase<DerivedY>::PlainObject PlainObjectY;
440             typedef Eigen::Matrix<typename PlainObjectX::Scalar, PlainObjectX::RowsAtCompileTime, PlainObjectX::ColsAtCompileTime>
441             XType;
442             typedef Eigen::Matrix<typename PlainObjectY::Scalar, PlainObjectY::RowsAtCompileTime, PlainObjectY::ColsAtCompileTime>
443             YType;
444             const int nlayer = num_layers();
445 
446             if (nlayer <= 0)
447             {
448                 return false;
449             }
450 
451             // Reset optimizer
452             opt.reset();
453 
454             // Create shuffled mini-batches
455             if (seed > 0)
456             {
457                 m_rng.seed(seed);
458             }
459 
460             std::vector<XType> x_batches;
461             std::vector<YType> y_batches;
462             const int nbatch = internal::create_shuffled_batches(x, y, batch_size, m_rng,
463                                x_batches, y_batches);
464             // Set up callback parameters
465             m_callback->m_nbatch = nbatch;
466             m_callback->m_nepoch = epoch;
467 
468             // Iterations on the whole data set
469             for (int k = 0; k < epoch; k++)
470             {
471                 m_callback->m_epoch_id = k;
472 
473                 // Train on each mini-batch
474                 for (int i = 0; i < nbatch; i++)
475                 {
476                     m_callback->m_batch_id = i;
477                     m_callback->pre_training_batch(this, x_batches[i], y_batches[i]);
478                     this->forward(x_batches[i]);
479                     this->backprop(x_batches[i], y_batches[i]);
480                     this->update(opt);
481                     m_callback->post_training_batch(this, x_batches[i], y_batches[i]);
482                 }
483             }
484 
485             return true;
486         }
487 
488         ///
489         /// Use the fitted model to make predictions
490         ///
491         /// \param x The predictors. Each column is an observation.
492         ///
predict(const Matrix & x)493         Matrix predict(const Matrix& x)
494         {
495             const int nlayer = num_layers();
496 
497             if (nlayer <= 0)
498             {
499                 return Matrix();
500             }
501 
502             this->forward(x);
503             return m_layers[nlayer - 1]->output();
504         }
505 
506         ///
507         /// Export the network to files.
508         ///
509         /// \param folder   The folder where the network is saved.
510         /// \param fileName The filename for the network.
511         ///
export_net(const std::string & folder,const std::string & filename)512         void export_net(const std::string& folder, const std::string& filename) const
513         {
514             bool created = internal::create_directory(folder);
515             if (!created)
516                 throw std::runtime_error("[class Network]: Folder creation failed");
517 
518             MetaInfo map = this->get_meta_info();
519             internal::write_map(folder + "/" + filename, map);
520             std::vector< std::vector<Scalar> > params = this->get_parameters();
521             internal::write_parameters(folder, filename, params);
522         }
523 
524         ///
525         /// Read in a network from files.
526         ///
527         /// \param folder   The folder where the network is saved.
528         /// \param fileName The filename for the network.
529         ///
read_net(const std::string & folder,const std::string & filename)530         void read_net(const std::string& folder, const std::string& filename)
531         {
532             MetaInfo map;
533             internal::read_map(folder + "/" + filename, map);
534             int nlayer = map.find("Nlayers")->second;
535             std::vector< std::vector<Scalar> > params = internal::read_parameters(folder, filename, nlayer);
536             m_layers.clear();
537 
538             for (int i = 0; i < nlayer; i++)
539             {
540                 this->add_layer(internal::create_layer(map, i));
541             }
542 
543             this->set_parameters(params);
544             this->set_output(internal::create_output(map));
545         }
546 };
547 
548 
549 } // namespace MiniDNN
550 
551 
552 #endif /* NETWORK_H_ */
553