1 /** 2 * @file methods/ann/ffn.hpp 3 * @author Marcus Edel 4 * @author Shangtong Zhang 5 * 6 * Definition of the FFN class, which implements feed forward neural networks. 7 * 8 * mlpack is free software; you may redistribute it and/or modify it under the 9 * terms of the 3-clause BSD license. You should have received a copy of the 10 * 3-clause BSD license along with mlpack. If not, see 11 * http://www.opensource.org/licenses/BSD-3-Clause for more information. 12 */ 13 #ifndef MLPACK_METHODS_ANN_FFN_HPP 14 #define MLPACK_METHODS_ANN_FFN_HPP 15 16 #include <mlpack/prereqs.hpp> 17 18 #include "visitor/delete_visitor.hpp" 19 #include "visitor/delta_visitor.hpp" 20 #include "visitor/output_height_visitor.hpp" 21 #include "visitor/output_parameter_visitor.hpp" 22 #include "visitor/output_width_visitor.hpp" 23 #include "visitor/reset_visitor.hpp" 24 #include "visitor/weight_size_visitor.hpp" 25 #include "visitor/copy_visitor.hpp" 26 #include "visitor/loss_visitor.hpp" 27 28 #include "init_rules/network_init.hpp" 29 30 #include <mlpack/methods/ann/layer/layer_types.hpp> 31 #include <mlpack/methods/ann/layer/layer.hpp> 32 #include <mlpack/methods/ann/init_rules/random_init.hpp> 33 #include <mlpack/methods/ann/layer/layer_traits.hpp> 34 #include <ensmallen.hpp> 35 36 namespace mlpack { 37 namespace ann /** Artificial Neural Network. */ { 38 39 /** 40 * Implementation of a standard feed forward network. 41 * 42 * @tparam OutputLayerType The output layer type used to evaluate the network. 43 * @tparam InitializationRuleType Rule used to initialize the weight matrix. 44 * @tparam CustomLayers Any set of custom layers that could be a part of the 45 * feed forward network. 46 */ 47 template< 48 typename OutputLayerType = NegativeLogLikelihood<>, 49 typename InitializationRuleType = RandomInitialization, 50 typename... CustomLayers 51 > 52 class FFN 53 { 54 public: 55 //! Convenience typedef for the internal model construction. 56 using NetworkType = FFN<OutputLayerType, InitializationRuleType>; 57 58 /** 59 * Create the FFN object. 60 * 61 * Optionally, specify which initialize rule and performance function should 62 * be used. 63 * 64 * If you want to pass in a parameter and discard the original parameter 65 * object, be sure to use std::move to avoid unnecessary copy. 66 * 67 * @param outputLayer Output layer used to evaluate the network. 68 * @param initializeRule Optional instantiated InitializationRule object 69 * for initializing the network parameter. 70 */ 71 FFN(OutputLayerType outputLayer = OutputLayerType(), 72 InitializationRuleType initializeRule = InitializationRuleType()); 73 74 //! Copy constructor. 75 FFN(const FFN&); 76 77 //! Move constructor. 78 FFN(FFN&&); 79 80 //! Copy/move assignment operator. 81 FFN& operator = (FFN); 82 83 //! Destructor to release allocated memory. 84 ~FFN(); 85 86 /** 87 * Check if the optimizer has MaxIterations() parameter, if it does 88 * then check if it's value is less than the number of datapoints 89 * in the dataset. 90 * 91 * @tparam OptimizerType Type of optimizer to use to train the model. 92 * @param optimizer optimizer used in the training process. 93 * @param samples Number of datapoints in the dataset. 94 */ 95 template<typename OptimizerType> 96 typename std::enable_if< 97 HasMaxIterations<OptimizerType, size_t&(OptimizerType::*)()> 98 ::value, void>::type 99 WarnMessageMaxIterations(OptimizerType& optimizer, size_t samples) const; 100 101 /** 102 * Check if the optimizer has MaxIterations() parameter, if it 103 * doesn't then simply return from the function. 104 * 105 * @tparam OptimizerType Type of optimizer to use to train the model. 106 * @param optimizer optimizer used in the training process. 107 * @param samples Number of datapoints in the dataset. 108 */ 109 template<typename OptimizerType> 110 typename std::enable_if< 111 !HasMaxIterations<OptimizerType, size_t&(OptimizerType::*)()> 112 ::value, void>::type 113 WarnMessageMaxIterations(OptimizerType& optimizer, size_t samples) const; 114 115 /** 116 * Train the feedforward network on the given input data using the given 117 * optimizer. 118 * 119 * This will use the existing model parameters as a starting point for the 120 * optimization. If this is not what you want, then you should access the 121 * parameters vector directly with Parameters() and modify it as desired. 122 * 123 * If you want to pass in a parameter and discard the original parameter 124 * object, be sure to use std::move to avoid unnecessary copy. 125 * 126 * @tparam OptimizerType Type of optimizer to use to train the model. 127 * @tparam CallbackTypes Types of Callback Functions. 128 * @param predictors Input training variables. 129 * @param responses Outputs results from input training variables. 130 * @param optimizer Instantiated optimizer used to train the model. 131 * @param callbacks Callback function for ensmallen optimizer `OptimizerType`. 132 * See https://www.ensmallen.org/docs.html#callback-documentation. 133 * @return The final objective of the trained model (NaN or Inf on error). 134 */ 135 template<typename OptimizerType, typename... CallbackTypes> 136 double Train(arma::mat predictors, 137 arma::mat responses, 138 OptimizerType& optimizer, 139 CallbackTypes&&... callbacks); 140 141 /** 142 * Train the feedforward network on the given input data. By default, the 143 * RMSProp optimization algorithm is used, but others can be specified 144 * (such as ens::SGD). 145 * 146 * This will use the existing model parameters as a starting point for the 147 * optimization. If this is not what you want, then you should access the 148 * parameters vector directly with Parameters() and modify it as desired. 149 * 150 * If you want to pass in a parameter and discard the original parameter 151 * object, be sure to use std::move to avoid unnecessary copy. 152 * 153 * @tparam OptimizerType Type of optimizer to use to train the model. 154 * @param predictors Input training variables. 155 * @tparam CallbackTypes Types of Callback Functions. 156 * @param responses Outputs results from input training variables. 157 * @param callbacks Callback function for ensmallen optimizer `OptimizerType`. 158 * See https://www.ensmallen.org/docs.html#callback-documentation. 159 * @return The final objective of the trained model (NaN or Inf on error). 160 */ 161 template<typename OptimizerType = ens::RMSProp, typename... CallbackTypes> 162 double Train(arma::mat predictors, 163 arma::mat responses, 164 CallbackTypes&&... callbacks); 165 166 /** 167 * Predict the responses to a given set of predictors. The responses will 168 * reflect the output of the given output layer as returned by the 169 * output layer function. 170 * 171 * If you want to pass in a parameter and discard the original parameter 172 * object, be sure to use std::move to avoid unnecessary copy. 173 * 174 * @param predictors Input predictors. 175 * @param results Matrix to put output predictions of responses into. 176 */ 177 void Predict(arma::mat predictors, arma::mat& results); 178 179 /** 180 * Evaluate the feedforward network with the given predictors and responses. 181 * This functions is usually used to monitor progress while training. 182 * 183 * @param predictors Input variables. 184 * @param responses Target outputs for input variables. 185 */ 186 template<typename PredictorsType, typename ResponsesType> 187 double Evaluate(const PredictorsType& predictors, 188 const ResponsesType& responses); 189 190 /** 191 * Evaluate the feedforward network with the given parameters. This function 192 * is usually called by the optimizer to train the model. 193 * 194 * @param parameters Matrix model parameters. 195 */ 196 double Evaluate(const arma::mat& parameters); 197 198 /** 199 * Evaluate the feedforward network with the given parameters, but using only 200 * a number of data points. This is useful for optimizers such as SGD, which 201 * require a separable objective function. 202 * 203 * @param parameters Matrix model parameters. 204 * @param begin Index of the starting point to use for objective function 205 * evaluation. 206 * @param batchSize Number of points to be passed at a time to use for 207 * objective function evaluation. 208 * @param deterministic Whether or not to train or test the model. Note some 209 * layer act differently in training or testing mode. 210 */ 211 double Evaluate(const arma::mat& parameters, 212 const size_t begin, 213 const size_t batchSize, 214 const bool deterministic); 215 216 /** 217 * Evaluate the feedforward network with the given parameters, but using only 218 * a number of data points. This is useful for optimizers such as SGD, which 219 * require a separable objective function. This just calls the overload of 220 * Evaluate() with deterministic = true. 221 * 222 * @param parameters Matrix model parameters. 223 * @param begin Index of the starting point to use for objective function 224 * evaluation. 225 * @param batchSize Number of points to be passed at a time to use for 226 * objective function evaluation. 227 */ 228 double Evaluate(const arma::mat& parameters, 229 const size_t begin, 230 const size_t batchSize); 231 232 /** 233 * Evaluate the feedforward network with the given parameters. 234 * This function is usually called by the optimizer to train the model. 235 * This just calls the overload of EvaluateWithGradient() with batchSize = 1. 236 * 237 * @param parameters Matrix model parameters. 238 * @param gradient Matrix to output gradient into. 239 */ 240 template<typename GradType> 241 double EvaluateWithGradient(const arma::mat& parameters, GradType& gradient); 242 243 /** 244 * Evaluate the feedforward network with the given parameters, but using only 245 * a number of data points. This is useful for optimizers such as SGD, which 246 * require a separable objective function. 247 * 248 * @param parameters Matrix model parameters. 249 * @param begin Index of the starting point to use for objective function 250 * evaluation. 251 * @param gradient Matrix to output gradient into. 252 * @param batchSize Number of points to be passed at a time to use for 253 * objective function evaluation. 254 */ 255 template<typename GradType> 256 double EvaluateWithGradient(const arma::mat& parameters, 257 const size_t begin, 258 GradType& gradient, 259 const size_t batchSize); 260 261 /** 262 * Evaluate the gradient of the feedforward network with the given parameters, 263 * and with respect to only a number of points in the dataset. This is useful 264 * for optimizers such as SGD, which require a separable objective function. 265 * 266 * @param parameters Matrix of the model parameters to be optimized. 267 * @param begin Index of the starting point to use for objective function 268 * gradient evaluation. 269 * @param gradient Matrix to output gradient into. 270 * @param batchSize Number of points to be processed as a batch for objective 271 * function gradient evaluation. 272 */ 273 void Gradient(const arma::mat& parameters, 274 const size_t begin, 275 arma::mat& gradient, 276 const size_t batchSize); 277 278 /** 279 * Shuffle the order of function visitation. This may be called by the 280 * optimizer. 281 */ 282 void Shuffle(); 283 284 /* 285 * Add a new module to the model. 286 * 287 * @param args The layer parameter. 288 */ 289 template <class LayerType, class... Args> Add(Args...args)290 void Add(Args... args) { network.push_back(new LayerType(args...)); } 291 292 /* 293 * Add a new module to the model. 294 * 295 * @param layer The Layer to be added to the model. 296 */ Add(LayerTypes<CustomLayers...> layer)297 void Add(LayerTypes<CustomLayers...> layer) { network.push_back(layer); } 298 299 //! Get the network model. Model() const300 const std::vector<LayerTypes<CustomLayers...> >& Model() const 301 { 302 return network; 303 } 304 //! Modify the network model. Be careful! If you change the structure of the 305 //! network or parameters for layers, its state may become invalid, so be sure 306 //! to call ResetParameters() afterwards. Model()307 std::vector<LayerTypes<CustomLayers...> >& Model() { return network; } 308 309 //! Return the number of separable functions (the number of predictor points). NumFunctions() const310 size_t NumFunctions() const { return numFunctions; } 311 312 //! Return the initial point for the optimization. Parameters() const313 const arma::mat& Parameters() const { return parameter; } 314 //! Modify the initial point for the optimization. Parameters()315 arma::mat& Parameters() { return parameter; } 316 317 //! Get the matrix of responses to the input data points. Responses() const318 const arma::mat& Responses() const { return responses; } 319 //! Modify the matrix of responses to the input data points. Responses()320 arma::mat& Responses() { return responses; } 321 322 //! Get the matrix of data points (predictors). Predictors() const323 const arma::mat& Predictors() const { return predictors; } 324 //! Modify the matrix of data points (predictors). Predictors()325 arma::mat& Predictors() { return predictors; } 326 327 /** 328 * Reset the module infomration (weights/parameters). 329 */ 330 void ResetParameters(); 331 332 //! Serialize the model. 333 template<typename Archive> 334 void serialize(Archive& ar, const unsigned int /* version */); 335 336 /** 337 * Perform the forward pass of the data in real batch mode. 338 * 339 * Forward and Backward should be used as a pair, and they are designed mainly 340 * for advanced users. User should try to use Predict and Train unless those 341 * two functions can't satisfy some special requirements. 342 * 343 * @param inputs The input data. 344 * @param results The predicted results. 345 */ 346 template<typename PredictorsType, typename ResponsesType> 347 void Forward(const PredictorsType& inputs, ResponsesType& results); 348 349 /** 350 * Perform a partial forward pass of the data. 351 * 352 * This function is meant for the cases when users require a forward pass only 353 * through certain layers and not the entire network. 354 * 355 * @param inputs The input data for the specified first layer. 356 * @param results The predicted results from the specified last layer. 357 * @param begin The index of the first layer. 358 * @param end The index of the last layer. 359 */ 360 template<typename PredictorsType, typename ResponsesType> 361 void Forward(const PredictorsType& inputs , 362 ResponsesType& results, 363 const size_t begin, 364 const size_t end); 365 366 /** 367 * Perform the backward pass of the data in real batch mode. 368 * 369 * Forward and Backward should be used as a pair, and they are designed mainly 370 * for advanced users. User should try to use Predict and Train unless those 371 * two functions can't satisfy some special requirements. 372 * 373 * @param inputs Inputs of current pass. 374 * @param targets The training target. 375 * @param gradients Computed gradients. 376 * @return Training error of the current pass. 377 */ 378 template<typename PredictorsType, 379 typename TargetsType, 380 typename GradientsType> 381 double Backward(const PredictorsType& inputs, 382 const TargetsType& targets, 383 GradientsType& gradients); 384 385 private: 386 // Helper functions. 387 /** 388 * The Forward algorithm (part of the Forward-Backward algorithm). Computes 389 * forward probabilities for each module. 390 * 391 * @param input Data sequence to compute probabilities for. 392 */ 393 template<typename InputType> 394 void Forward(const InputType& input); 395 396 /** 397 * Prepare the network for the given data. 398 * This function won't actually trigger training process. 399 * 400 * @param predictors Input data variables. 401 * @param responses Outputs results from input data variables. 402 */ 403 void ResetData(arma::mat predictors, arma::mat responses); 404 405 /** 406 * The Backward algorithm (part of the Forward-Backward algorithm). Computes 407 * backward pass for module. 408 */ 409 void Backward(); 410 411 /** 412 * Iterate through all layer modules and update the the gradient using the 413 * layer defined optimizer. 414 */ 415 template<typename InputType> 416 void Gradient(const InputType& input); 417 418 /** 419 * Reset the module status by setting the current deterministic parameter 420 * for all modules that implement the Deterministic function. 421 */ 422 void ResetDeterministic(); 423 424 /** 425 * Reset the gradient for all modules that implement the Gradient function. 426 */ 427 void ResetGradients(arma::mat& gradient); 428 429 /** 430 * Swap the content of this network with given network. 431 * 432 * @param network Desired source network. 433 */ 434 void Swap(FFN& network); 435 436 //! Instantiated outputlayer used to evaluate the network. 437 OutputLayerType outputLayer; 438 439 //! Instantiated InitializationRule object for initializing the network 440 //! parameter. 441 InitializationRuleType initializeRule; 442 443 //! The input width. 444 size_t width; 445 446 //! The input height. 447 size_t height; 448 449 //! Indicator if we already trained the model. 450 bool reset; 451 452 //! Locally-stored model modules. 453 std::vector<LayerTypes<CustomLayers...> > network; 454 455 //! The matrix of data points (predictors). 456 arma::mat predictors; 457 458 //! The matrix of responses to the input data points. 459 arma::mat responses; 460 461 //! Matrix of (trained) parameters. 462 arma::mat parameter; 463 464 //! The number of separable functions (the number of predictor points). 465 size_t numFunctions; 466 467 //! The current error for the backward pass. 468 arma::mat error; 469 470 //! Locally-stored delta visitor. 471 DeltaVisitor deltaVisitor; 472 473 //! Locally-stored output parameter visitor. 474 OutputParameterVisitor outputParameterVisitor; 475 476 //! Locally-stored weight size visitor. 477 WeightSizeVisitor weightSizeVisitor; 478 479 //! Locally-stored output width visitor. 480 OutputWidthVisitor outputWidthVisitor; 481 482 //! Locally-stored output height visitor. 483 OutputHeightVisitor outputHeightVisitor; 484 485 //! Locally-stored loss visitor 486 LossVisitor lossVisitor; 487 488 //! Locally-stored reset visitor. 489 ResetVisitor resetVisitor; 490 491 //! Locally-stored delete visitor. 492 DeleteVisitor deleteVisitor; 493 494 //! The current evaluation mode (training or testing). 495 bool deterministic; 496 497 //! Locally-stored delta object. 498 arma::mat delta; 499 500 //! Locally-stored input parameter object. 501 arma::mat inputParameter; 502 503 //! Locally-stored output parameter object. 504 arma::mat outputParameter; 505 506 //! Locally-stored gradient parameter. 507 arma::mat gradient; 508 509 //! Locally-stored copy visitor 510 CopyVisitor<CustomLayers...> copyVisitor; 511 512 // The GAN class should have access to internal members. 513 template< 514 typename Model, 515 typename InitializerType, 516 typename NoiseType, 517 typename PolicyType 518 > 519 friend class GAN; 520 }; // class FFN 521 522 } // namespace ann 523 } // namespace mlpack 524 525 //! Set the serialization version of the FFN class. Multiple template arguments 526 //! makes this ugly... 527 namespace boost { 528 namespace serialization { 529 530 template<typename OutputLayerType, 531 typename InitializationRuleType, 532 typename... CustomLayer> 533 struct version< 534 mlpack::ann::FFN<OutputLayerType, InitializationRuleType, CustomLayer...>> 535 { 536 BOOST_STATIC_CONSTANT(int, value = 2); 537 }; 538 539 } // namespace serialization 540 } // namespace boost 541 542 // Include implementation. 543 #include "ffn_impl.hpp" 544 545 #endif 546