1 //   OpenNN: Open Neural Networks Library
2 //   www.opennn.net
3 //
4 //   A D A P T I V E   M O M E N T   E S T I M A T I O N
5 //
6 //   Artificial Intelligence Techniques SL
7 //   artelnics@artelnics.com
8 
9 #ifndef ADAPTIVEMOMENTESTIMATION_H
10 #define ADAPTIVEMOMENTESTIMATION_H
11 
12 // System includes
13 
14 #include <string>
15 #include <sstream>
16 #include <iostream>
17 #include <fstream>
18 #include <algorithm>
19 #include <functional>
20 #include <limits>
21 #include <cmath>
22 #include <ctime>
23 #include <chrono>
24 #include <time.h>
25 #include <iostream>
26 #include <ctime>
27 #include <ratio>
28 #include <chrono>
29 
30 // OpenNN includes
31 
32 #include "loss_index.h"
33 #include "optimization_algorithm.h"
34 #include "config.h"
35 
36 namespace OpenNN
37 {
38 
39 /// This concrete class represents the adaptive moment estimation(Adam) training algorithm,
40 /// based on adaptive estimates of lower-order moments.
41 
42 ///
43 /// For more information visit:
44 ///
45 /// \cite 1 C. Barranquero "High performance optimization algorithms for neural networks."
46 /// \ref https://www.opennn.net/files/high_performance_optimization_algorithms_for_neural_networks.pdf .
47 ///
48 /// \cite 2 D. P. Kingma and J. L. Ba, "ADAM: A Method for Stochastic Optimization." arXiv preprint arXiv:1412.6980v8 (2014).
49 
50 class AdaptiveMomentEstimation : public OptimizationAlgorithm
51 {
52 
53 public:
54 
55     struct OptimizationData
56     {
57         /// Default constructor.
58 
59         explicit OptimizationData();
60 
61         explicit OptimizationData(AdaptiveMomentEstimation* new_stochastic_gradient_descent_pointer);
62 
63         virtual ~OptimizationData();
64 
65         void set(AdaptiveMomentEstimation* new_adaptive_moment_estimation_pointer);
66 
67         void print() const;
68 
69         AdaptiveMomentEstimation* adaptive_moment_estimation_pointer = nullptr;
70 
71         Index learning_rate_iteration = 0;
72 
73         Tensor<type, 1> parameters;
74         Tensor<type, 1> minimal_selection_parameters;
75 
76         Tensor<type, 1> gradient_exponential_decay;
77         Tensor<type, 1> square_gradient_exponential_decay;
78 
79         Tensor<type, 1> aux;
80 
81         Index iteration;
82     };
83 
84 
85    // Constructors
86 
87    explicit AdaptiveMomentEstimation();
88 
89    explicit AdaptiveMomentEstimation(LossIndex*);
90 
91    virtual ~AdaptiveMomentEstimation();
92 
93    // Training operators
94 
95    const type& get_initial_learning_rate() const;
96    const type& get_beta_1() const;
97    const type& get_beta_2() const;
98    const type& get_epsilon() const;
99 
100    // Stopping criteria
101 
102    const type& get_loss_goal() const;
103    const type& get_maximum_time() const;
104    const bool& get_choose_best_selection() const;
105 
106    // Reserve training history
107 
108    const bool& get_reserve_training_error_history() const;
109    const bool& get_reserve_selection_error_history() const;
110 
111    // Set methods
112 
113    void set_loss_index_pointer(LossIndex*);
114 
115    void set_reserve_all_training_history(const bool&);
116 
117    void set_batch_samples_number(const Index& new_batch_samples_number);
118 
119    // Training operators
120 
121    void set_initial_learning_rate(const type&);
122    void set_beta_1(const type&);
123    void set_beta_2(const type&);
124    void set_epsilon(const type&);
125 
126    // Training parameters
127 
128    void set_maximum_epochs_number(const Index&);
129 
130    // Stopping criteria
131 
132    void set_loss_goal(const type&);
133    void set_maximum_time(const type&);
134    void set_choose_best_selection(const bool&);
135 
136    // Reserve training history
137 
138    void set_reserve_training_error_history(const bool&);
139    void set_reserve_selection_error_history(const bool&);
140 
141    // Training methods
142 
143    Results perform_training();
144 
145    /// Perform Neural Network training.
146 
147    void perform_training_void();
148 
149    /// Return the algorithm optimum for your model.
150 
151    string write_optimization_algorithm_type() const;
152 
153    // Serialization methods
154 
155    Tensor<string, 2> to_string_matrix() const;
156 
157    void from_XML(const tinyxml2::XMLDocument&);
158 
159    void write_XML(tinyxml2::XMLPrinter&) const;
160 
161    void update_iteration(const LossIndex::BackPropagation& back_propagation,
162                                  OptimizationData& optimization_data);
163 
164 private:
165 
166    // TRAINING OPERATORS
167 
168    /// Initial learning rate
169 
170    type initial_learning_rate = static_cast<type>(0.001);
171 
172    /// Learning rate decay over each update.
173 
174    type initial_decay = 0;
175 
176    /// Exponential decay over gradient estimates.
177 
178    type beta_1 = static_cast<type>(0.9);
179 
180    /// Exponential decay over square gradient estimates.
181 
182    type beta_2 = static_cast<type>(0.999);
183 
184    /// Small number to prevent any division by zero
185 
186    type epsilon =static_cast<type>(1.e-7);
187 
188     // Stopping criteria
189 
190    /// Goal value for the loss. It is used as a stopping criterion.
191 
192    type training_loss_goal = 0;
193 
194    /// gradient norm goal. It is used as a stopping criterion.
195 
196    type gradient_norm_goal = 0;
197 
198    /// Maximum epochs number
199 
200    Index maximum_epochs_number = 10000;
201 
202    /// Maximum selection error allowed
203 
204    Index maximum_selection_error_increases = 1000;
205 
206    /// Maximum training time. It is used as a stopping criterion.
207 
208    type maximum_time = 3600;
209 
210    /// True if the final model will be the neural network with the minimum selection error, false otherwise.
211 
212    bool choose_best_selection = false;
213 
214    // TRAINING HISTORY
215 
216    /// True if the error history vector is to be reserved, false otherwise.
217 
218    bool reserve_training_error_history = true;
219 
220    /// True if the selection error history vector is to be reserved, false otherwise.
221 
222    bool reserve_selection_error_history = true;
223 
224    /// Training and selection batch size.
225 
226    Index batch_samples_number = 1000;
227 
228    /// Hardware use.
229 
230    string hardware_use = "Multi-core";
231 
232 #ifdef OPENNN_CUDA
233     #include "../../opennn-cuda/opennn_cuda/adaptive_moment_estimation_cuda.h"
234 #endif
235 
236 #ifdef OPENNN_MKL
237     #include "../../opennn-mkl/opennn_mkl/adaptive_moment_estimation_mkl.h"
238 #endif
239 
240 };
241 
242 }
243 
244 #endif
245