1 // OpenNN: Open Neural Networks Library 2 // www.opennn.net 3 // 4 // G R A D I E N T D E S C E N T C L A S S H E A D E R 5 // 6 // Artificial Intelligence Techniques SL 7 // artelnics@artelnics.com 8 9 #ifndef GRADIENTDESCENT_H 10 #define GRADIENTDESCENT_H 11 12 // System includes 13 14 #include <string> 15 #include <sstream> 16 #include <iostream> 17 #include <fstream> 18 #include <algorithm> 19 #include <functional> 20 #include <limits> 21 #include <limits.h> 22 #include <cmath> 23 #include <ctime> 24 25 // OpenNN includes 26 27 #include "loss_index.h" 28 29 #include "optimization_algorithm.h" 30 #include "learning_rate_algorithm.h" 31 #include "config.h" 32 33 34 namespace OpenNN 35 { 36 37 /// The process of making changes to weights and biases, 38 /// where the changes are propotyional to derivatives of network error with respect to those weights and biases. 39 /// This is done to minimize network error. 40 41 /// This concrete class represents the gradient descent optimization algorithm[1], used to minimize loss function. 42 /// 43 /// \cite 1 Neural Designer "5 Algorithms to Train a Neural Network." 44 /// \ref https://www.neuraldesigner.com/blog/5_algorithms_to_train_a_neural_network 45 46 class GradientDescent : public OptimizationAlgorithm 47 { 48 49 public: 50 51 struct GDOptimizationData : public OptimizationData 52 { 53 /// Default constructor. 54 GDOptimizationDataGDOptimizationData55 explicit GDOptimizationData() 56 { 57 } 58 GDOptimizationDataGDOptimizationData59 explicit GDOptimizationData(GradientDescent* new_gradient_descent_pointer) 60 { 61 set(new_gradient_descent_pointer); 62 } 63 ~GDOptimizationDataGDOptimizationData64 virtual ~GDOptimizationData() {} 65 setGDOptimizationData66 void set(GradientDescent* new_gradient_descent_pointer) 67 { 68 gradient_descent_pointer = new_gradient_descent_pointer; 69 70 LossIndex* loss_index_pointer = gradient_descent_pointer->get_loss_index_pointer(); 71 72 NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer(); 73 74 const Index parameters_number = neural_network_pointer->get_parameters_number(); 75 76 // Neural network data 77 78 parameters.resize(parameters_number); 79 parameters = neural_network_pointer->get_parameters(); 80 81 old_parameters.resize(parameters_number); 82 potential_parameters.resize(parameters_number); 83 84 parameters_increment.resize(parameters_number); 85 86 // Loss index data 87 88 old_gradient.resize(parameters_number); 89 90 // Optimization algorithm data 91 92 training_direction.resize(parameters_number); 93 } 94 printGDOptimizationData95 void print() const 96 { 97 cout << "Training Direction:" << endl; 98 cout << training_direction << endl; 99 100 cout << "Learning rate:" << endl; 101 cout << learning_rate << endl; 102 103 cout << "Parameters:" << endl; 104 cout << parameters << endl; 105 } 106 107 GradientDescent* gradient_descent_pointer = nullptr; 108 109 // Neural network data 110 111 Tensor<type, 1> old_parameters; 112 113 Tensor<type, 1> parameters_increment; 114 115 type parameters_increment_norm = 0; 116 117 // Loss index data 118 119 type old_training_loss = 0; 120 121 Tensor<type, 1> old_gradient; 122 123 Tensor<type, 2> inverse_hessian; 124 Tensor<type, 2> old_inverse_hessian; 125 126 // Optimization algorithm data 127 128 Index epoch = 0; 129 130 Tensor<type, 0> training_slope; 131 132 type learning_rate = 0; 133 type old_learning_rate = 0; 134 }; 135 136 // Constructors 137 138 explicit GradientDescent(); 139 140 explicit GradientDescent(LossIndex*); 141 142 // Destructor 143 144 virtual ~GradientDescent(); 145 146 const LearningRateAlgorithm& get_learning_rate_algorithm() const; 147 LearningRateAlgorithm* get_learning_rate_algorithm_pointer(); 148 149 string get_hardware_use() const; 150 151 // Stopping criteria 152 153 const type& get_minimum_parameters_increment_norm() const; 154 155 const type& get_minimum_loss_decrease() const; 156 const type& get_loss_goal() const; 157 const type& get_gradient_norm_goal() const; 158 const Index& get_maximum_selection_error_increases() const; 159 160 const Index& get_maximum_epochs_number() const; 161 const type& get_maximum_time() const; 162 163 const bool& get_choose_best_selection() const; 164 165 // Reserve training history 166 167 const bool& get_reserve_training_error_history() const; 168 const bool& get_reserve_selection_error_history() const; 169 170 // Set methods 171 172 void set_loss_index_pointer(LossIndex*); 173 174 void set_learning_rate_algorithm(const LearningRateAlgorithm&); 175 176 void set_default(); 177 178 void set_reserve_all_training_history(const bool&); 179 180 // Stopping criteria 181 182 void set_maximum_epochs_number(const Index&); 183 184 void set_minimum_parameters_increment_norm(const type&); 185 186 void set_minimum_loss_decrease(const type&); 187 void set_loss_goal(const type&); 188 void set_gradient_norm_goal(const type&); 189 void set_maximum_selection_error_increases(const Index&); 190 191 void set_maximum_time(const type&); 192 193 void set_choose_best_selection(const bool&); 194 195 // Reserve training history 196 197 void set_reserve_training_error_history(const bool&); 198 void set_reserve_selection_error_history(const bool&); 199 200 // Training methods 201 202 void calculate_training_direction(const Tensor<type, 1>&, Tensor<type, 1>&) const; 203 204 void update_epoch( 205 const DataSet::Batch& batch, 206 NeuralNetwork::ForwardPropagation& forward_propagation, 207 LossIndex::BackPropagation& back_propagation, 208 GDOptimizationData& optimization_data); 209 210 Results perform_training(); 211 212 void perform_training_void(); 213 214 string write_optimization_algorithm_type() const; 215 216 // Serialization methods 217 218 Tensor<string, 2> to_string_matrix() const; 219 220 void from_XML(const tinyxml2::XMLDocument&); 221 222 void write_XML(tinyxml2::XMLPrinter&) const; 223 224 private: 225 226 // TRAINING OPERATORS 227 228 /// Learning rate algorithm object for one-dimensional minimization. 229 230 LearningRateAlgorithm learning_rate_algorithm; 231 232 type first_learning_rate = static_cast<type>(0.01); 233 234 // Stopping criteria 235 236 /// Norm of the parameters increment vector at which training stops. 237 238 type minimum_parameters_increment_norm; 239 240 /// Minimum loss improvement between two successive iterations. It is used as a stopping criterion. 241 242 type minimum_loss_decrease; 243 244 /// Goal value for the loss. It is used as a stopping criterion. 245 246 type training_loss_goal; 247 248 /// Goal value for the norm of the error function gradient. It is used as a stopping criterion. 249 250 type gradient_norm_goal; 251 252 /// Maximum number of epochs at which the selection error increases. 253 /// This is an early stopping method for improving selection. 254 255 Index maximum_selection_error_increases; 256 257 /// Maximum epochs number 258 259 Index maximum_epochs_number; 260 261 /// Maximum training time. It is used as a stopping criterion. 262 263 type maximum_time; 264 265 /// True if the final model will be the neural network with the minimum selection error, false otherwise. 266 267 bool choose_best_selection = false; 268 269 // TRAINING HISTORY 270 271 /// True if the loss history vector is to be reserved, false otherwise. 272 273 bool reserve_training_error_history; 274 275 /// True if the selection error history vector is to be reserved, false otherwise. 276 277 bool reserve_selection_error_history; 278 279 }; 280 281 } 282 283 #endif 284