1 /**
2  * @file methods/ann/ffn.hpp
3  * @author Marcus Edel
4  * @author Shangtong Zhang
5  *
6  * Definition of the FFN class, which implements feed forward neural networks.
7  *
8  * mlpack is free software; you may redistribute it and/or modify it under the
9  * terms of the 3-clause BSD license.  You should have received a copy of the
10  * 3-clause BSD license along with mlpack.  If not, see
11  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
12  */
13 #ifndef MLPACK_METHODS_ANN_FFN_HPP
14 #define MLPACK_METHODS_ANN_FFN_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 
18 #include "visitor/delete_visitor.hpp"
19 #include "visitor/delta_visitor.hpp"
20 #include "visitor/output_height_visitor.hpp"
21 #include "visitor/output_parameter_visitor.hpp"
22 #include "visitor/output_width_visitor.hpp"
23 #include "visitor/reset_visitor.hpp"
24 #include "visitor/weight_size_visitor.hpp"
25 #include "visitor/copy_visitor.hpp"
26 #include "visitor/loss_visitor.hpp"
27 
28 #include "init_rules/network_init.hpp"
29 
30 #include <mlpack/methods/ann/layer/layer_types.hpp>
31 #include <mlpack/methods/ann/layer/layer.hpp>
32 #include <mlpack/methods/ann/init_rules/random_init.hpp>
33 #include <mlpack/methods/ann/layer/layer_traits.hpp>
34 #include <ensmallen.hpp>
35 
36 namespace mlpack {
37 namespace ann /** Artificial Neural Network. */ {
38 
39 /**
40  * Implementation of a standard feed forward network.
41  *
42  * @tparam OutputLayerType The output layer type used to evaluate the network.
43  * @tparam InitializationRuleType Rule used to initialize the weight matrix.
44  * @tparam CustomLayers Any set of custom layers that could be a part of the
45  *         feed forward network.
46  */
47 template<
48   typename OutputLayerType = NegativeLogLikelihood<>,
49   typename InitializationRuleType = RandomInitialization,
50   typename... CustomLayers
51 >
52 class FFN
53 {
54  public:
55   //! Convenience typedef for the internal model construction.
56   using NetworkType = FFN<OutputLayerType, InitializationRuleType>;
57 
58   /**
59    * Create the FFN object.
60    *
61    * Optionally, specify which initialize rule and performance function should
62    * be used.
63    *
64    * If you want to pass in a parameter and discard the original parameter
65    * object, be sure to use std::move to avoid unnecessary copy.
66    *
67    * @param outputLayer Output layer used to evaluate the network.
68    * @param initializeRule Optional instantiated InitializationRule object
69    *        for initializing the network parameter.
70    */
71   FFN(OutputLayerType outputLayer = OutputLayerType(),
72       InitializationRuleType initializeRule = InitializationRuleType());
73 
74   //! Copy constructor.
75   FFN(const FFN&);
76 
77   //! Move constructor.
78   FFN(FFN&&);
79 
80   //! Copy/move assignment operator.
81   FFN& operator = (FFN);
82 
83   //! Destructor to release allocated memory.
84   ~FFN();
85 
86   /**
87    * Check if the optimizer has MaxIterations() parameter, if it does
88    * then check if it's value is less than the number of datapoints
89    * in the dataset.
90    *
91    * @tparam OptimizerType Type of optimizer to use to train the model.
92    * @param optimizer optimizer used in the training process.
93    * @param samples Number of datapoints in the dataset.
94    */
95   template<typename OptimizerType>
96   typename std::enable_if<
97       HasMaxIterations<OptimizerType, size_t&(OptimizerType::*)()>
98       ::value, void>::type
99   WarnMessageMaxIterations(OptimizerType& optimizer, size_t samples) const;
100 
101   /**
102    * Check if the optimizer has MaxIterations() parameter, if it
103    * doesn't then simply return from the function.
104    *
105    * @tparam OptimizerType Type of optimizer to use to train the model.
106    * @param optimizer optimizer used in the training process.
107    * @param samples Number of datapoints in the dataset.
108    */
109   template<typename OptimizerType>
110   typename std::enable_if<
111       !HasMaxIterations<OptimizerType, size_t&(OptimizerType::*)()>
112       ::value, void>::type
113   WarnMessageMaxIterations(OptimizerType& optimizer, size_t samples) const;
114 
115   /**
116    * Train the feedforward network on the given input data using the given
117    * optimizer.
118    *
119    * This will use the existing model parameters as a starting point for the
120    * optimization. If this is not what you want, then you should access the
121    * parameters vector directly with Parameters() and modify it as desired.
122    *
123    * If you want to pass in a parameter and discard the original parameter
124    * object, be sure to use std::move to avoid unnecessary copy.
125    *
126    * @tparam OptimizerType Type of optimizer to use to train the model.
127    * @tparam CallbackTypes Types of Callback Functions.
128    * @param predictors Input training variables.
129    * @param responses Outputs results from input training variables.
130    * @param optimizer Instantiated optimizer used to train the model.
131    * @param callbacks Callback function for ensmallen optimizer `OptimizerType`.
132    *      See https://www.ensmallen.org/docs.html#callback-documentation.
133    * @return The final objective of the trained model (NaN or Inf on error).
134    */
135   template<typename OptimizerType, typename... CallbackTypes>
136   double Train(arma::mat predictors,
137                arma::mat responses,
138                OptimizerType& optimizer,
139                CallbackTypes&&... callbacks);
140 
141   /**
142    * Train the feedforward network on the given input data. By default, the
143    * RMSProp optimization algorithm is used, but others can be specified
144    * (such as ens::SGD).
145    *
146    * This will use the existing model parameters as a starting point for the
147    * optimization. If this is not what you want, then you should access the
148    * parameters vector directly with Parameters() and modify it as desired.
149    *
150    * If you want to pass in a parameter and discard the original parameter
151    * object, be sure to use std::move to avoid unnecessary copy.
152    *
153    * @tparam OptimizerType Type of optimizer to use to train the model.
154    * @param predictors Input training variables.
155    * @tparam CallbackTypes Types of Callback Functions.
156    * @param responses Outputs results from input training variables.
157    * @param callbacks Callback function for ensmallen optimizer `OptimizerType`.
158    *      See https://www.ensmallen.org/docs.html#callback-documentation.
159    * @return The final objective of the trained model (NaN or Inf on error).
160    */
161   template<typename OptimizerType = ens::RMSProp, typename... CallbackTypes>
162   double Train(arma::mat predictors,
163                arma::mat responses,
164                CallbackTypes&&... callbacks);
165 
166   /**
167    * Predict the responses to a given set of predictors. The responses will
168    * reflect the output of the given output layer as returned by the
169    * output layer function.
170    *
171    * If you want to pass in a parameter and discard the original parameter
172    * object, be sure to use std::move to avoid unnecessary copy.
173    *
174    * @param predictors Input predictors.
175    * @param results Matrix to put output predictions of responses into.
176    */
177   void Predict(arma::mat predictors, arma::mat& results);
178 
179   /**
180    * Evaluate the feedforward network with the given predictors and responses.
181    * This functions is usually used to monitor progress while training.
182    *
183    * @param predictors Input variables.
184    * @param responses Target outputs for input variables.
185    */
186   template<typename PredictorsType, typename ResponsesType>
187   double Evaluate(const PredictorsType& predictors,
188                   const ResponsesType& responses);
189 
190   /**
191    * Evaluate the feedforward network with the given parameters. This function
192    * is usually called by the optimizer to train the model.
193    *
194    * @param parameters Matrix model parameters.
195    */
196   double Evaluate(const arma::mat& parameters);
197 
198    /**
199    * Evaluate the feedforward network with the given parameters, but using only
200    * a number of data points. This is useful for optimizers such as SGD, which
201    * require a separable objective function.
202    *
203    * @param parameters Matrix model parameters.
204    * @param begin Index of the starting point to use for objective function
205    *        evaluation.
206    * @param batchSize Number of points to be passed at a time to use for
207    *        objective function evaluation.
208    * @param deterministic Whether or not to train or test the model. Note some
209    *        layer act differently in training or testing mode.
210    */
211   double Evaluate(const arma::mat& parameters,
212                   const size_t begin,
213                   const size_t batchSize,
214                   const bool deterministic);
215 
216    /**
217    * Evaluate the feedforward network with the given parameters, but using only
218    * a number of data points. This is useful for optimizers such as SGD, which
219    * require a separable objective function. This just calls the overload of
220    * Evaluate() with deterministic = true.
221    *
222    * @param parameters Matrix model parameters.
223    * @param begin Index of the starting point to use for objective function
224    *        evaluation.
225    * @param batchSize Number of points to be passed at a time to use for
226    *        objective function evaluation.
227    */
228   double Evaluate(const arma::mat& parameters,
229                   const size_t begin,
230                   const size_t batchSize);
231 
232   /**
233    * Evaluate the feedforward network with the given parameters.
234    * This function is usually called by the optimizer to train the model.
235    * This just calls the overload of EvaluateWithGradient() with batchSize = 1.
236    *
237    * @param parameters Matrix model parameters.
238    * @param gradient Matrix to output gradient into.
239    */
240   template<typename GradType>
241   double EvaluateWithGradient(const arma::mat& parameters, GradType& gradient);
242 
243    /**
244    * Evaluate the feedforward network with the given parameters, but using only
245    * a number of data points. This is useful for optimizers such as SGD, which
246    * require a separable objective function.
247    *
248    * @param parameters Matrix model parameters.
249    * @param begin Index of the starting point to use for objective function
250    *        evaluation.
251    * @param gradient Matrix to output gradient into.
252    * @param batchSize Number of points to be passed at a time to use for
253    *        objective function evaluation.
254    */
255   template<typename GradType>
256   double EvaluateWithGradient(const arma::mat& parameters,
257                               const size_t begin,
258                               GradType& gradient,
259                               const size_t batchSize);
260 
261   /**
262    * Evaluate the gradient of the feedforward network with the given parameters,
263    * and with respect to only a number of points in the dataset. This is useful
264    * for optimizers such as SGD, which require a separable objective function.
265    *
266    * @param parameters Matrix of the model parameters to be optimized.
267    * @param begin Index of the starting point to use for objective function
268    *        gradient evaluation.
269    * @param gradient Matrix to output gradient into.
270    * @param batchSize Number of points to be processed as a batch for objective
271    *        function gradient evaluation.
272    */
273   void Gradient(const arma::mat& parameters,
274                 const size_t begin,
275                 arma::mat& gradient,
276                 const size_t batchSize);
277 
278   /**
279    * Shuffle the order of function visitation. This may be called by the
280    * optimizer.
281    */
282   void Shuffle();
283 
284   /*
285    * Add a new module to the model.
286    *
287    * @param args The layer parameter.
288    */
289   template <class LayerType, class... Args>
Add(Args...args)290   void Add(Args... args) { network.push_back(new LayerType(args...)); }
291 
292   /*
293    * Add a new module to the model.
294    *
295    * @param layer The Layer to be added to the model.
296    */
Add(LayerTypes<CustomLayers...> layer)297   void Add(LayerTypes<CustomLayers...> layer) { network.push_back(layer); }
298 
299   //! Get the network model.
Model() const300   const std::vector<LayerTypes<CustomLayers...> >& Model() const
301   {
302     return network;
303   }
304   //! Modify the network model.  Be careful!  If you change the structure of the
305   //! network or parameters for layers, its state may become invalid, so be sure
306   //! to call ResetParameters() afterwards.
Model()307   std::vector<LayerTypes<CustomLayers...> >& Model() { return network; }
308 
309   //! Return the number of separable functions (the number of predictor points).
NumFunctions() const310   size_t NumFunctions() const { return numFunctions; }
311 
312   //! Return the initial point for the optimization.
Parameters() const313   const arma::mat& Parameters() const { return parameter; }
314   //! Modify the initial point for the optimization.
Parameters()315   arma::mat& Parameters() { return parameter; }
316 
317   //! Get the matrix of responses to the input data points.
Responses() const318   const arma::mat& Responses() const { return responses; }
319   //! Modify the matrix of responses to the input data points.
Responses()320   arma::mat& Responses() { return responses; }
321 
322   //! Get the matrix of data points (predictors).
Predictors() const323   const arma::mat& Predictors() const { return predictors; }
324   //! Modify the matrix of data points (predictors).
Predictors()325   arma::mat& Predictors() { return predictors; }
326 
327   /**
328    * Reset the module infomration (weights/parameters).
329    */
330   void ResetParameters();
331 
332   //! Serialize the model.
333   template<typename Archive>
334   void serialize(Archive& ar, const unsigned int /* version */);
335 
336   /**
337    * Perform the forward pass of the data in real batch mode.
338    *
339    * Forward and Backward should be used as a pair, and they are designed mainly
340    * for advanced users. User should try to use Predict and Train unless those
341    * two functions can't satisfy some special requirements.
342    *
343    * @param inputs The input data.
344    * @param results The predicted results.
345    */
346   template<typename PredictorsType, typename ResponsesType>
347   void Forward(const PredictorsType& inputs, ResponsesType& results);
348 
349   /**
350    * Perform a partial forward pass of the data.
351    *
352    * This function is meant for the cases when users require a forward pass only
353    * through certain layers and not the entire network.
354    *
355    * @param inputs The input data for the specified first layer.
356    * @param results The predicted results from the specified last layer.
357    * @param begin The index of the first layer.
358    * @param end The index of the last layer.
359    */
360   template<typename PredictorsType, typename ResponsesType>
361   void Forward(const PredictorsType& inputs ,
362                ResponsesType& results,
363                const size_t begin,
364                const size_t end);
365 
366   /**
367    * Perform the backward pass of the data in real batch mode.
368    *
369    * Forward and Backward should be used as a pair, and they are designed mainly
370    * for advanced users. User should try to use Predict and Train unless those
371    * two functions can't satisfy some special requirements.
372    *
373    * @param inputs Inputs of current pass.
374    * @param targets The training target.
375    * @param gradients Computed gradients.
376    * @return Training error of the current pass.
377    */
378   template<typename PredictorsType,
379            typename TargetsType,
380            typename GradientsType>
381   double Backward(const PredictorsType& inputs,
382                   const TargetsType& targets,
383                   GradientsType& gradients);
384 
385  private:
386   // Helper functions.
387   /**
388    * The Forward algorithm (part of the Forward-Backward algorithm).  Computes
389    * forward probabilities for each module.
390    *
391    * @param input Data sequence to compute probabilities for.
392    */
393   template<typename InputType>
394   void Forward(const InputType& input);
395 
396   /**
397    * Prepare the network for the given data.
398    * This function won't actually trigger training process.
399    *
400    * @param predictors Input data variables.
401    * @param responses Outputs results from input data variables.
402    */
403   void ResetData(arma::mat predictors, arma::mat responses);
404 
405   /**
406    * The Backward algorithm (part of the Forward-Backward algorithm). Computes
407    * backward pass for module.
408    */
409   void Backward();
410 
411   /**
412    * Iterate through all layer modules and update the the gradient using the
413    * layer defined optimizer.
414    */
415   template<typename InputType>
416   void Gradient(const InputType& input);
417 
418   /**
419    * Reset the module status by setting the current deterministic parameter
420    * for all modules that implement the Deterministic function.
421    */
422   void ResetDeterministic();
423 
424   /**
425    * Reset the gradient for all modules that implement the Gradient function.
426    */
427   void ResetGradients(arma::mat& gradient);
428 
429   /**
430    * Swap the content of this network with given network.
431    *
432    * @param network Desired source network.
433    */
434   void Swap(FFN& network);
435 
436   //! Instantiated outputlayer used to evaluate the network.
437   OutputLayerType outputLayer;
438 
439   //! Instantiated InitializationRule object for initializing the network
440   //! parameter.
441   InitializationRuleType initializeRule;
442 
443   //! The input width.
444   size_t width;
445 
446   //! The input height.
447   size_t height;
448 
449   //! Indicator if we already trained the model.
450   bool reset;
451 
452   //! Locally-stored model modules.
453   std::vector<LayerTypes<CustomLayers...> > network;
454 
455   //! The matrix of data points (predictors).
456   arma::mat predictors;
457 
458   //! The matrix of responses to the input data points.
459   arma::mat responses;
460 
461   //! Matrix of (trained) parameters.
462   arma::mat parameter;
463 
464   //! The number of separable functions (the number of predictor points).
465   size_t numFunctions;
466 
467   //! The current error for the backward pass.
468   arma::mat error;
469 
470   //! Locally-stored delta visitor.
471   DeltaVisitor deltaVisitor;
472 
473   //! Locally-stored output parameter visitor.
474   OutputParameterVisitor outputParameterVisitor;
475 
476   //! Locally-stored weight size visitor.
477   WeightSizeVisitor weightSizeVisitor;
478 
479   //! Locally-stored output width visitor.
480   OutputWidthVisitor outputWidthVisitor;
481 
482   //! Locally-stored output height visitor.
483   OutputHeightVisitor outputHeightVisitor;
484 
485   //! Locally-stored loss visitor
486   LossVisitor lossVisitor;
487 
488   //! Locally-stored reset visitor.
489   ResetVisitor resetVisitor;
490 
491   //! Locally-stored delete visitor.
492   DeleteVisitor deleteVisitor;
493 
494   //! The current evaluation mode (training or testing).
495   bool deterministic;
496 
497   //! Locally-stored delta object.
498   arma::mat delta;
499 
500   //! Locally-stored input parameter object.
501   arma::mat inputParameter;
502 
503   //! Locally-stored output parameter object.
504   arma::mat outputParameter;
505 
506   //! Locally-stored gradient parameter.
507   arma::mat gradient;
508 
509   //! Locally-stored copy visitor
510   CopyVisitor<CustomLayers...> copyVisitor;
511 
512   // The GAN class should have access to internal members.
513   template<
514     typename Model,
515     typename InitializerType,
516     typename NoiseType,
517     typename PolicyType
518   >
519   friend class GAN;
520 }; // class FFN
521 
522 } // namespace ann
523 } // namespace mlpack
524 
525 //! Set the serialization version of the FFN class.  Multiple template arguments
526 //! makes this ugly...
527 namespace boost {
528 namespace serialization {
529 
530 template<typename OutputLayerType,
531          typename InitializationRuleType,
532          typename... CustomLayer>
533 struct version<
534     mlpack::ann::FFN<OutputLayerType, InitializationRuleType, CustomLayer...>>
535 {
536   BOOST_STATIC_CONSTANT(int, value = 2);
537 };
538 
539 } // namespace serialization
540 } // namespace boost
541 
542 // Include implementation.
543 #include "ffn_impl.hpp"
544 
545 #endif
546