1 #ifndef NETWORK_H_ 2 #define NETWORK_H_ 3 4 #include <Eigen/Core> 5 #include <vector> 6 #include <map> 7 #include <stdexcept> 8 #include "Config.h" 9 #include "RNG.h" 10 #include "Layer.h" 11 #include "Output.h" 12 #include "Callback.h" 13 #include "Utils/Random.h" 14 #include "Utils/IO.h" 15 #include "Utils/Factory.h" 16 17 namespace MiniDNN 18 { 19 20 21 /// 22 /// \defgroup Network Neural Network Model 23 /// 24 25 /// 26 /// \ingroup Network 27 /// 28 /// This class represents a neural network model that typically consists of a 29 /// number of hidden layers and an output layer. It provides functions for 30 /// network building, model fitting, and prediction, etc. 31 /// 32 class Network 33 { 34 private: 35 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix; 36 typedef Eigen::RowVectorXi IntegerVector; 37 typedef std::map<std::string, int> MetaInfo; 38 39 RNG m_default_rng; // Built-in RNG 40 RNG& m_rng; // Reference to the RNG provided by the user, 41 // otherwise reference to m_default_rng 42 std::vector<Layer*> m_layers; // Pointers to hidden layers 43 Output* m_output; // The output layer 44 Callback m_default_callback; // Default callback function 45 Callback* m_callback; // Points to user-provided callback function, 46 // otherwise points to m_default_callback 47 48 // Check dimensions of layers check_unit_sizes()49 void check_unit_sizes() const 50 { 51 const int nlayer = num_layers(); 52 53 if (nlayer <= 1) 54 { 55 return; 56 } 57 58 for (int i = 1; i < nlayer; i++) 59 { 60 if (m_layers[i]->in_size() != m_layers[i - 1]->out_size()) 61 { 62 throw std::invalid_argument("[class Network]: Unit sizes do not match"); 63 } 64 } 65 } 66 67 // Let each layer compute its output forward(const Matrix & input)68 void forward(const Matrix& input) 69 { 70 const int nlayer = num_layers(); 71 72 if (nlayer <= 0) 73 { 74 return; 75 } 76 77 // First layer 78 if (input.rows() != m_layers[0]->in_size()) 79 { 80 throw std::invalid_argument("[class Network]: Input data have incorrect dimension"); 81 } 82 83 m_layers[0]->forward(input); 84 85 // The following layers 86 for (int i = 1; i < nlayer; i++) 87 { 88 m_layers[i]->forward(m_layers[i - 1]->output()); 89 } 90 } 91 92 // Let each layer compute its gradients of the parameters 93 // target has two versions: Matrix and RowVectorXi 94 // The RowVectorXi version is used in classification problems where each 95 // element is a class label 96 template <typename TargetType> backprop(const Matrix & input,const TargetType & target)97 void backprop(const Matrix& input, const TargetType& target) 98 { 99 const int nlayer = num_layers(); 100 101 if (nlayer <= 0) 102 { 103 return; 104 } 105 106 Layer* first_layer = m_layers[0]; 107 Layer* last_layer = m_layers[nlayer - 1]; 108 // Let output layer compute back-propagation data 109 m_output->check_target_data(target); 110 m_output->evaluate(last_layer->output(), target); 111 112 // If there is only one hidden layer, "prev_layer_data" will be the input data 113 if (nlayer == 1) 114 { 115 first_layer->backprop(input, m_output->backprop_data()); 116 return; 117 } 118 119 // Compute gradients for the last hidden layer 120 last_layer->backprop(m_layers[nlayer - 2]->output(), m_output->backprop_data()); 121 122 // Compute gradients for all the hidden layers except for the first one and the last one 123 for (int i = nlayer - 2; i > 0; i--) 124 { 125 m_layers[i]->backprop(m_layers[i - 1]->output(), 126 m_layers[i + 1]->backprop_data()); 127 } 128 129 // Compute gradients for the first layer 130 first_layer->backprop(input, m_layers[1]->backprop_data()); 131 } 132 133 // Update parameters update(Optimizer & opt)134 void update(Optimizer& opt) 135 { 136 const int nlayer = num_layers(); 137 138 if (nlayer <= 0) 139 { 140 return; 141 } 142 143 for (int i = 0; i < nlayer; i++) 144 { 145 m_layers[i]->update(opt); 146 } 147 } 148 149 // Get the meta information of the network, used to export the NN model get_meta_info()150 MetaInfo get_meta_info() const 151 { 152 const int nlayer = num_layers(); 153 MetaInfo map; 154 map.insert(std::make_pair("Nlayers", nlayer)); 155 156 for (int i = 0; i < nlayer; i++) 157 { 158 m_layers[i]->fill_meta_info(map, i); 159 } 160 161 map.insert(std::make_pair("OutputLayer", internal::output_id(m_output->output_type()))); 162 return map; 163 } 164 165 public: 166 /// 167 /// Default constructor that creates an empty neural network 168 /// Network()169 Network() : 170 m_default_rng(1), 171 m_rng(m_default_rng), 172 m_output(NULL), 173 m_default_callback(), 174 m_callback(&m_default_callback) 175 {} 176 177 /// 178 /// Constructor with a user-provided random number generator 179 /// 180 /// \param rng A user-provided random number generator object that inherits 181 /// from the default RNG class. 182 /// Network(RNG & rng)183 Network(RNG& rng) : 184 m_default_rng(1), 185 m_rng(rng), 186 m_output(NULL), 187 m_default_callback(), 188 m_callback(&m_default_callback) 189 {} 190 191 /// 192 /// Destructor that frees the added hidden layers and output layer 193 /// ~Network()194 ~Network() 195 { 196 const int nlayer = num_layers(); 197 198 for (int i = 0; i < nlayer; i++) 199 { 200 delete m_layers[i]; 201 } 202 203 if (m_output) 204 { 205 delete m_output; 206 } 207 } 208 209 /// 210 /// Add a hidden layer to the neural network 211 /// 212 /// \param layer A pointer to a Layer object, typically constructed from 213 /// layer classes such as FullyConnected and Convolutional. 214 /// **NOTE**: the pointer will be handled and freed by the 215 /// network object, so do not delete it manually. 216 /// add_layer(Layer * layer)217 void add_layer(Layer* layer) 218 { 219 m_layers.push_back(layer); 220 } 221 222 /// 223 /// Set the output layer of the neural network 224 /// 225 /// \param output A pointer to an Output object, typically constructed from 226 /// output layer classes such as RegressionMSE and MultiClassEntropy. 227 /// **NOTE**: the pointer will be handled and freed by the 228 /// network object, so do not delete it manually. 229 /// set_output(Output * output)230 void set_output(Output* output) 231 { 232 if (m_output) 233 { 234 delete m_output; 235 } 236 237 m_output = output; 238 } 239 240 /// 241 /// Number of hidden layers in the network 242 /// num_layers()243 int num_layers() const 244 { 245 return m_layers.size(); 246 } 247 248 /// 249 /// Get the list of hidden layers of the network 250 /// get_layers()251 std::vector<const Layer*> get_layers() const 252 { 253 const int nlayer = num_layers(); 254 std::vector<const Layer*> layers(nlayer); 255 std::copy(m_layers.begin(), m_layers.end(), layers.begin()); 256 return layers; 257 } 258 259 /// 260 /// Get the output layer 261 /// get_output()262 const Output* get_output() const 263 { 264 return m_output; 265 } 266 267 /// 268 /// Set the callback function that can be called during model fitting 269 /// 270 /// \param callback A user-provided callback function object that inherits 271 /// from the default Callback class. 272 /// set_callback(Callback & callback)273 void set_callback(Callback& callback) 274 { 275 m_callback = &callback; 276 } 277 /// 278 /// Set the default silent callback function 279 /// set_default_callback()280 void set_default_callback() 281 { 282 m_callback = &m_default_callback; 283 } 284 285 /// 286 /// Initialize layer parameters in the network using normal distribution 287 /// 288 /// \param mu Mean of the normal distribution. 289 /// \param sigma Standard deviation of the normal distribution. 290 /// \param seed Set the random seed of the %RNG if `seed > 0`, otherwise 291 /// use the current random state. 292 /// 293 void init(const Scalar& mu = Scalar(0), const Scalar& sigma = Scalar(0.01), 294 int seed = -1) 295 { 296 check_unit_sizes(); 297 298 if (seed > 0) 299 { 300 m_rng.seed(seed); 301 } 302 303 const int nlayer = num_layers(); 304 305 for (int i = 0; i < nlayer; i++) 306 { 307 m_layers[i]->init(mu, sigma, m_rng); 308 } 309 } 310 311 /// 312 /// Get the serialized layer parameters 313 /// get_parameters()314 std::vector< std::vector<Scalar> > get_parameters() const 315 { 316 const int nlayer = num_layers(); 317 std::vector< std::vector<Scalar> > res; 318 res.reserve(nlayer); 319 320 for (int i = 0; i < nlayer; i++) 321 { 322 res.push_back(m_layers[i]->get_parameters()); 323 } 324 325 return res; 326 } 327 328 /// 329 /// Set the layer parameters 330 /// 331 /// \param param Serialized layer parameters 332 /// set_parameters(const std::vector<std::vector<Scalar>> & param)333 void set_parameters(const std::vector< std::vector<Scalar> >& param) 334 { 335 const int nlayer = num_layers(); 336 337 if (static_cast<int>(param.size()) != nlayer) 338 { 339 throw std::invalid_argument("[class Network]: Parameter size does not match"); 340 } 341 342 for (int i = 0; i < nlayer; i++) 343 { 344 m_layers[i]->set_parameters(param[i]); 345 } 346 } 347 348 /// 349 /// Get the serialized derivatives of layer parameters 350 /// get_derivatives()351 std::vector< std::vector<Scalar> > get_derivatives() const 352 { 353 const int nlayer = num_layers(); 354 std::vector< std::vector<Scalar> > res; 355 res.reserve(nlayer); 356 357 for (int i = 0; i < nlayer; i++) 358 { 359 res.push_back(m_layers[i]->get_derivatives()); 360 } 361 362 return res; 363 } 364 365 /// 366 /// Debugging tool to check parameter gradients 367 /// 368 template <typename TargetType> 369 void check_gradient(const Matrix& input, const TargetType& target, int npoints, 370 int seed = -1) 371 { 372 if (seed > 0) 373 { 374 m_rng.seed(seed); 375 } 376 377 this->forward(input); 378 this->backprop(input, target); 379 std::vector< std::vector<Scalar> > param = this->get_parameters(); 380 std::vector< std::vector<Scalar> > deriv = this->get_derivatives(); 381 const Scalar eps = 1e-5; 382 const int nlayer = deriv.size(); 383 384 for (int i = 0; i < npoints; i++) 385 { 386 // Randomly select a layer 387 const int layer_id = int(m_rng.rand() * nlayer); 388 // Randomly pick a parameter, note that some layers may have no parameters 389 const int nparam = deriv[layer_id].size(); 390 391 if (nparam < 1) 392 { 393 continue; 394 } 395 396 const int param_id = int(m_rng.rand() * nparam); 397 // Turbulate the parameter a little bit 398 const Scalar old = param[layer_id][param_id]; 399 param[layer_id][param_id] -= eps; 400 this->set_parameters(param); 401 this->forward(input); 402 this->backprop(input, target); 403 const Scalar loss_pre = m_output->loss(); 404 param[layer_id][param_id] += eps * 2; 405 this->set_parameters(param); 406 this->forward(input); 407 this->backprop(input, target); 408 const Scalar loss_post = m_output->loss(); 409 const Scalar deriv_est = (loss_post - loss_pre) / eps / 2; 410 std::cout << "[layer " << layer_id << ", param " << param_id << 411 "] deriv = " << deriv[layer_id][param_id] << ", est = " << deriv_est << 412 ", diff = " << deriv_est - deriv[layer_id][param_id] << std::endl; 413 param[layer_id][param_id] = old; 414 } 415 416 // Restore original parameters 417 this->set_parameters(param); 418 } 419 420 /// 421 /// Fit the model based on the given data 422 /// 423 /// \param opt An object that inherits from the Optimizer class, indicating the optimization algorithm to use. 424 /// \param x The predictors. Each column is an observation. 425 /// \param y The response variable. Each column is an observation. 426 /// \param batch_size Mini-batch size. 427 /// \param epoch Number of epochs of training. 428 /// \param seed Set the random seed of the %RNG if `seed > 0`, otherwise 429 /// use the current random state. 430 /// 431 template <typename DerivedX, typename DerivedY> 432 bool fit(Optimizer& opt, const Eigen::MatrixBase<DerivedX>& x, 433 const Eigen::MatrixBase<DerivedY>& y, 434 int batch_size, int epoch, int seed = -1) 435 { 436 // We do not directly use PlainObjectX since it may be row-majored if x is passed as mat.transpose() 437 // We want to force XType and YType to be column-majored 438 typedef typename Eigen::MatrixBase<DerivedX>::PlainObject PlainObjectX; 439 typedef typename Eigen::MatrixBase<DerivedY>::PlainObject PlainObjectY; 440 typedef Eigen::Matrix<typename PlainObjectX::Scalar, PlainObjectX::RowsAtCompileTime, PlainObjectX::ColsAtCompileTime> 441 XType; 442 typedef Eigen::Matrix<typename PlainObjectY::Scalar, PlainObjectY::RowsAtCompileTime, PlainObjectY::ColsAtCompileTime> 443 YType; 444 const int nlayer = num_layers(); 445 446 if (nlayer <= 0) 447 { 448 return false; 449 } 450 451 // Reset optimizer 452 opt.reset(); 453 454 // Create shuffled mini-batches 455 if (seed > 0) 456 { 457 m_rng.seed(seed); 458 } 459 460 std::vector<XType> x_batches; 461 std::vector<YType> y_batches; 462 const int nbatch = internal::create_shuffled_batches(x, y, batch_size, m_rng, 463 x_batches, y_batches); 464 // Set up callback parameters 465 m_callback->m_nbatch = nbatch; 466 m_callback->m_nepoch = epoch; 467 468 // Iterations on the whole data set 469 for (int k = 0; k < epoch; k++) 470 { 471 m_callback->m_epoch_id = k; 472 473 // Train on each mini-batch 474 for (int i = 0; i < nbatch; i++) 475 { 476 m_callback->m_batch_id = i; 477 m_callback->pre_training_batch(this, x_batches[i], y_batches[i]); 478 this->forward(x_batches[i]); 479 this->backprop(x_batches[i], y_batches[i]); 480 this->update(opt); 481 m_callback->post_training_batch(this, x_batches[i], y_batches[i]); 482 } 483 } 484 485 return true; 486 } 487 488 /// 489 /// Use the fitted model to make predictions 490 /// 491 /// \param x The predictors. Each column is an observation. 492 /// predict(const Matrix & x)493 Matrix predict(const Matrix& x) 494 { 495 const int nlayer = num_layers(); 496 497 if (nlayer <= 0) 498 { 499 return Matrix(); 500 } 501 502 this->forward(x); 503 return m_layers[nlayer - 1]->output(); 504 } 505 506 /// 507 /// Export the network to files. 508 /// 509 /// \param folder The folder where the network is saved. 510 /// \param fileName The filename for the network. 511 /// export_net(const std::string & folder,const std::string & filename)512 void export_net(const std::string& folder, const std::string& filename) const 513 { 514 bool created = internal::create_directory(folder); 515 if (!created) 516 throw std::runtime_error("[class Network]: Folder creation failed"); 517 518 MetaInfo map = this->get_meta_info(); 519 internal::write_map(folder + "/" + filename, map); 520 std::vector< std::vector<Scalar> > params = this->get_parameters(); 521 internal::write_parameters(folder, filename, params); 522 } 523 524 /// 525 /// Read in a network from files. 526 /// 527 /// \param folder The folder where the network is saved. 528 /// \param fileName The filename for the network. 529 /// read_net(const std::string & folder,const std::string & filename)530 void read_net(const std::string& folder, const std::string& filename) 531 { 532 MetaInfo map; 533 internal::read_map(folder + "/" + filename, map); 534 int nlayer = map.find("Nlayers")->second; 535 std::vector< std::vector<Scalar> > params = internal::read_parameters(folder, filename, nlayer); 536 m_layers.clear(); 537 538 for (int i = 0; i < nlayer; i++) 539 { 540 this->add_layer(internal::create_layer(map, i)); 541 } 542 543 this->set_parameters(params); 544 this->set_output(internal::create_output(map)); 545 } 546 }; 547 548 549 } // namespace MiniDNN 550 551 552 #endif /* NETWORK_H_ */ 553