1 // OpenNN: Open Neural Networks Library 2 // www.opennn.net 3 // 4 // A D A P T I V E M O M E N T E S T I M A T I O N 5 // 6 // Artificial Intelligence Techniques SL 7 // artelnics@artelnics.com 8 9 #ifndef ADAPTIVEMOMENTESTIMATION_H 10 #define ADAPTIVEMOMENTESTIMATION_H 11 12 // System includes 13 14 #include <string> 15 #include <sstream> 16 #include <iostream> 17 #include <fstream> 18 #include <algorithm> 19 #include <functional> 20 #include <limits> 21 #include <cmath> 22 #include <ctime> 23 #include <chrono> 24 #include <time.h> 25 #include <iostream> 26 #include <ctime> 27 #include <ratio> 28 #include <chrono> 29 30 // OpenNN includes 31 32 #include "loss_index.h" 33 #include "optimization_algorithm.h" 34 #include "config.h" 35 36 namespace OpenNN 37 { 38 39 /// This concrete class represents the adaptive moment estimation(Adam) training algorithm, 40 /// based on adaptive estimates of lower-order moments. 41 42 /// 43 /// For more information visit: 44 /// 45 /// \cite 1 C. Barranquero "High performance optimization algorithms for neural networks." 46 /// \ref https://www.opennn.net/files/high_performance_optimization_algorithms_for_neural_networks.pdf . 47 /// 48 /// \cite 2 D. P. Kingma and J. L. Ba, "ADAM: A Method for Stochastic Optimization." arXiv preprint arXiv:1412.6980v8 (2014). 49 50 class AdaptiveMomentEstimation : public OptimizationAlgorithm 51 { 52 53 public: 54 55 struct OptimizationData 56 { 57 /// Default constructor. 58 59 explicit OptimizationData(); 60 61 explicit OptimizationData(AdaptiveMomentEstimation* new_stochastic_gradient_descent_pointer); 62 63 virtual ~OptimizationData(); 64 65 void set(AdaptiveMomentEstimation* new_adaptive_moment_estimation_pointer); 66 67 void print() const; 68 69 AdaptiveMomentEstimation* adaptive_moment_estimation_pointer = nullptr; 70 71 Index learning_rate_iteration = 0; 72 73 Tensor<type, 1> parameters; 74 Tensor<type, 1> minimal_selection_parameters; 75 76 Tensor<type, 1> gradient_exponential_decay; 77 Tensor<type, 1> square_gradient_exponential_decay; 78 79 Tensor<type, 1> aux; 80 81 Index iteration; 82 }; 83 84 85 // Constructors 86 87 explicit AdaptiveMomentEstimation(); 88 89 explicit AdaptiveMomentEstimation(LossIndex*); 90 91 virtual ~AdaptiveMomentEstimation(); 92 93 // Training operators 94 95 const type& get_initial_learning_rate() const; 96 const type& get_beta_1() const; 97 const type& get_beta_2() const; 98 const type& get_epsilon() const; 99 100 // Stopping criteria 101 102 const type& get_loss_goal() const; 103 const type& get_maximum_time() const; 104 const bool& get_choose_best_selection() const; 105 106 // Reserve training history 107 108 const bool& get_reserve_training_error_history() const; 109 const bool& get_reserve_selection_error_history() const; 110 111 // Set methods 112 113 void set_loss_index_pointer(LossIndex*); 114 115 void set_reserve_all_training_history(const bool&); 116 117 void set_batch_samples_number(const Index& new_batch_samples_number); 118 119 // Training operators 120 121 void set_initial_learning_rate(const type&); 122 void set_beta_1(const type&); 123 void set_beta_2(const type&); 124 void set_epsilon(const type&); 125 126 // Training parameters 127 128 void set_maximum_epochs_number(const Index&); 129 130 // Stopping criteria 131 132 void set_loss_goal(const type&); 133 void set_maximum_time(const type&); 134 void set_choose_best_selection(const bool&); 135 136 // Reserve training history 137 138 void set_reserve_training_error_history(const bool&); 139 void set_reserve_selection_error_history(const bool&); 140 141 // Training methods 142 143 Results perform_training(); 144 145 /// Perform Neural Network training. 146 147 void perform_training_void(); 148 149 /// Return the algorithm optimum for your model. 150 151 string write_optimization_algorithm_type() const; 152 153 // Serialization methods 154 155 Tensor<string, 2> to_string_matrix() const; 156 157 void from_XML(const tinyxml2::XMLDocument&); 158 159 void write_XML(tinyxml2::XMLPrinter&) const; 160 161 void update_iteration(const LossIndex::BackPropagation& back_propagation, 162 OptimizationData& optimization_data); 163 164 private: 165 166 // TRAINING OPERATORS 167 168 /// Initial learning rate 169 170 type initial_learning_rate = static_cast<type>(0.001); 171 172 /// Learning rate decay over each update. 173 174 type initial_decay = 0; 175 176 /// Exponential decay over gradient estimates. 177 178 type beta_1 = static_cast<type>(0.9); 179 180 /// Exponential decay over square gradient estimates. 181 182 type beta_2 = static_cast<type>(0.999); 183 184 /// Small number to prevent any division by zero 185 186 type epsilon =static_cast<type>(1.e-7); 187 188 // Stopping criteria 189 190 /// Goal value for the loss. It is used as a stopping criterion. 191 192 type training_loss_goal = 0; 193 194 /// gradient norm goal. It is used as a stopping criterion. 195 196 type gradient_norm_goal = 0; 197 198 /// Maximum epochs number 199 200 Index maximum_epochs_number = 10000; 201 202 /// Maximum selection error allowed 203 204 Index maximum_selection_error_increases = 1000; 205 206 /// Maximum training time. It is used as a stopping criterion. 207 208 type maximum_time = 3600; 209 210 /// True if the final model will be the neural network with the minimum selection error, false otherwise. 211 212 bool choose_best_selection = false; 213 214 // TRAINING HISTORY 215 216 /// True if the error history vector is to be reserved, false otherwise. 217 218 bool reserve_training_error_history = true; 219 220 /// True if the selection error history vector is to be reserved, false otherwise. 221 222 bool reserve_selection_error_history = true; 223 224 /// Training and selection batch size. 225 226 Index batch_samples_number = 1000; 227 228 /// Hardware use. 229 230 string hardware_use = "Multi-core"; 231 232 #ifdef OPENNN_CUDA 233 #include "../../opennn-cuda/opennn_cuda/adaptive_moment_estimation_cuda.h" 234 #endif 235 236 #ifdef OPENNN_MKL 237 #include "../../opennn-mkl/opennn_mkl/adaptive_moment_estimation_mkl.h" 238 #endif 239 240 }; 241 242 } 243 244 #endif 245