1 //   OpenNN: Open Neural Networks Library
2 //   www.opennn.net
3 //
4 //   G R A D I E N T   D E S C E N T   C L A S S   H E A D E R
5 //
6 //   Artificial Intelligence Techniques SL
7 //   artelnics@artelnics.com
8 
9 #ifndef GRADIENTDESCENT_H
10 #define GRADIENTDESCENT_H
11 
12 // System includes
13 
14 #include <string>
15 #include <sstream>
16 #include <iostream>
17 #include <fstream>
18 #include <algorithm>
19 #include <functional>
20 #include <limits>
21 #include <limits.h>
22 #include <cmath>
23 #include <ctime>
24 
25 // OpenNN includes
26 
27 #include "loss_index.h"
28 
29 #include "optimization_algorithm.h"
30 #include "learning_rate_algorithm.h"
31 #include "config.h"
32 
33 
34 namespace OpenNN
35 {
36 
37 /// The process of making changes to weights and biases,
38 /// where the changes are propotyional to derivatives of network error with respect to those weights and biases.
39 /// This is done to minimize network error.
40 
41 /// This concrete class represents the gradient descent optimization algorithm[1], used to minimize loss function.
42 ///
43 /// \cite 1  Neural Designer "5 Algorithms to Train a Neural Network."
44 /// \ref https://www.neuraldesigner.com/blog/5_algorithms_to_train_a_neural_network
45 
46 class GradientDescent : public OptimizationAlgorithm
47 {
48 
49 public:
50 
51     struct GDOptimizationData : public OptimizationData
52     {
53         /// Default constructor.
54 
GDOptimizationDataGDOptimizationData55         explicit GDOptimizationData()
56         {
57         }
58 
GDOptimizationDataGDOptimizationData59         explicit GDOptimizationData(GradientDescent* new_gradient_descent_pointer)
60         {
61             set(new_gradient_descent_pointer);
62         }
63 
~GDOptimizationDataGDOptimizationData64         virtual ~GDOptimizationData() {}
65 
setGDOptimizationData66         void set(GradientDescent* new_gradient_descent_pointer)
67         {
68             gradient_descent_pointer = new_gradient_descent_pointer;
69 
70             LossIndex* loss_index_pointer = gradient_descent_pointer->get_loss_index_pointer();
71 
72             NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
73 
74             const Index parameters_number = neural_network_pointer->get_parameters_number();
75 
76             // Neural network data
77 
78             parameters.resize(parameters_number);
79             parameters = neural_network_pointer->get_parameters();
80 
81             old_parameters.resize(parameters_number);
82             potential_parameters.resize(parameters_number);
83 
84             parameters_increment.resize(parameters_number);
85 
86             // Loss index data
87 
88             old_gradient.resize(parameters_number);
89 
90             // Optimization algorithm data
91 
92             training_direction.resize(parameters_number);
93         }
94 
printGDOptimizationData95         void print() const
96         {
97             cout << "Training Direction:" << endl;
98             cout << training_direction << endl;
99 
100             cout << "Learning rate:" << endl;
101             cout << learning_rate << endl;
102 
103             cout << "Parameters:" << endl;
104             cout << parameters << endl;
105         }
106 
107         GradientDescent* gradient_descent_pointer = nullptr;
108 
109         // Neural network data
110 
111         Tensor<type, 1> old_parameters;
112 
113         Tensor<type, 1> parameters_increment;
114 
115         type parameters_increment_norm = 0;
116 
117         // Loss index data
118 
119         type old_training_loss = 0;
120 
121         Tensor<type, 1> old_gradient;
122 
123         Tensor<type, 2> inverse_hessian;
124         Tensor<type, 2> old_inverse_hessian;
125 
126         // Optimization algorithm data
127 
128         Index epoch = 0;
129 
130         Tensor<type, 0> training_slope;
131 
132         type learning_rate = 0;
133         type old_learning_rate = 0;
134     };
135 
136    // Constructors
137 
138    explicit GradientDescent();
139 
140    explicit GradientDescent(LossIndex*);
141 
142    // Destructor
143 
144    virtual ~GradientDescent();
145 
146    const LearningRateAlgorithm& get_learning_rate_algorithm() const;
147    LearningRateAlgorithm* get_learning_rate_algorithm_pointer();
148 
149    string get_hardware_use() const;
150 
151    // Stopping criteria
152 
153    const type& get_minimum_parameters_increment_norm() const;
154 
155    const type& get_minimum_loss_decrease() const;
156    const type& get_loss_goal() const;
157    const type& get_gradient_norm_goal() const;
158    const Index& get_maximum_selection_error_increases() const;
159 
160    const Index& get_maximum_epochs_number() const;
161    const type& get_maximum_time() const;
162 
163    const bool& get_choose_best_selection() const;
164 
165    // Reserve training history
166 
167    const bool& get_reserve_training_error_history() const;
168    const bool& get_reserve_selection_error_history() const;
169 
170    // Set methods
171 
172    void set_loss_index_pointer(LossIndex*);
173 
174    void set_learning_rate_algorithm(const LearningRateAlgorithm&);
175 
176    void set_default();
177 
178    void set_reserve_all_training_history(const bool&);
179 
180    // Stopping criteria
181 
182    void set_maximum_epochs_number(const Index&);
183 
184    void set_minimum_parameters_increment_norm(const type&);
185 
186    void set_minimum_loss_decrease(const type&);
187    void set_loss_goal(const type&);
188    void set_gradient_norm_goal(const type&);
189    void set_maximum_selection_error_increases(const Index&);
190 
191    void set_maximum_time(const type&);
192 
193    void set_choose_best_selection(const bool&);
194 
195    // Reserve training history
196 
197    void set_reserve_training_error_history(const bool&);
198    void set_reserve_selection_error_history(const bool&);
199 
200    // Training methods
201 
202    void calculate_training_direction(const Tensor<type, 1>&, Tensor<type, 1>&) const;
203 
204    void update_epoch(
205            const DataSet::Batch& batch,
206            NeuralNetwork::ForwardPropagation& forward_propagation,
207            LossIndex::BackPropagation& back_propagation,
208            GDOptimizationData& optimization_data);
209 
210    Results perform_training();
211 
212    void perform_training_void();
213 
214    string write_optimization_algorithm_type() const;
215 
216    // Serialization methods
217 
218    Tensor<string, 2> to_string_matrix() const;
219 
220    void from_XML(const tinyxml2::XMLDocument&);
221 
222    void write_XML(tinyxml2::XMLPrinter&) const;
223 
224 private:
225 
226    // TRAINING OPERATORS
227 
228    /// Learning rate algorithm object for one-dimensional minimization.
229 
230    LearningRateAlgorithm learning_rate_algorithm;
231 
232    type first_learning_rate = static_cast<type>(0.01);
233 
234    // Stopping criteria
235 
236    /// Norm of the parameters increment vector at which training stops.
237 
238    type minimum_parameters_increment_norm;
239 
240    /// Minimum loss improvement between two successive iterations. It is used as a stopping criterion.
241 
242    type minimum_loss_decrease;
243 
244    /// Goal value for the loss. It is used as a stopping criterion.
245 
246    type training_loss_goal;
247 
248    /// Goal value for the norm of the error function gradient. It is used as a stopping criterion.
249 
250    type gradient_norm_goal;
251 
252    /// Maximum number of epochs at which the selection error increases.
253    /// This is an early stopping method for improving selection.
254 
255    Index maximum_selection_error_increases;
256 
257    /// Maximum epochs number
258 
259    Index maximum_epochs_number;
260 
261    /// Maximum training time. It is used as a stopping criterion.
262 
263    type maximum_time;
264 
265    /// True if the final model will be the neural network with the minimum selection error, false otherwise.
266 
267    bool choose_best_selection = false;
268 
269    // TRAINING HISTORY
270 
271    /// True if the loss history vector is to be reserved, false otherwise.
272 
273    bool reserve_training_error_history;
274 
275    /// True if the selection error history vector is to be reserved, false otherwise.
276 
277    bool reserve_selection_error_history;
278 
279 };
280 
281 }
282 
283 #endif
284