1 //   OpenNN: Open Neural Networks Library
2 //   www.opennn.net
3 //
4 //   Q U A S I - N E W T O N   M E T H O D   C L A S S
5 //
6 //   Artificial Intelligence Techniques SL
7 //   artelnics@artelnics.com
8 
9 #include "quasi_newton_method.h"
10 
11 namespace OpenNN
12 {
13 
14 /// Default constructor.
15 /// It creates a quasi-Newton method optimization algorithm not associated to any loss index.
16 /// It also initializes the class members to their default values.
17 
QuasiNewtonMethod()18 QuasiNewtonMethod::QuasiNewtonMethod()
19     : OptimizationAlgorithm()
20 {
21     set_default();
22 }
23 
24 
25 /// Loss index constructor.
26 /// It creates a quasi-Newton method optimization algorithm associated to a loss index.
27 /// It also initializes the class members to their default values.
28 /// @param new_loss_index_pointer Pointer to a loss index object.
29 
QuasiNewtonMethod(LossIndex * new_loss_index_pointer)30 QuasiNewtonMethod::QuasiNewtonMethod(LossIndex* new_loss_index_pointer)
31     : OptimizationAlgorithm(new_loss_index_pointer)
32 {
33     learning_rate_algorithm.set_loss_index_pointer(new_loss_index_pointer);
34 
35     set_default();
36 }
37 
38 
39 /// Destructor.
40 /// It does not delete any object.
41 
~QuasiNewtonMethod()42 QuasiNewtonMethod::~QuasiNewtonMethod()
43 {
44 }
45 
46 
47 /// Returns a constant reference to the learning rate algorithm object inside the quasi-Newton method object.
48 
get_learning_rate_algorithm() const49 const LearningRateAlgorithm& QuasiNewtonMethod::get_learning_rate_algorithm() const
50 {
51     return learning_rate_algorithm;
52 }
53 
54 
55 /// Returns a pointer to the learning rate algorithm object inside the quasi-Newton method object.
56 
get_learning_rate_algorithm_pointer()57 LearningRateAlgorithm* QuasiNewtonMethod::get_learning_rate_algorithm_pointer()
58 {
59     return &learning_rate_algorithm;
60 }
61 
62 
63 /// Returns the method for approximating the inverse hessian matrix to be used when training.
64 
get_inverse_hessian_approximation_method() const65 const QuasiNewtonMethod::InverseHessianApproximationMethod& QuasiNewtonMethod::get_inverse_hessian_approximation_method() const
66 {
67     return inverse_hessian_approximation_method;
68 }
69 
70 
71 /// Returns the name of the method for the approximation of the inverse hessian.
72 
write_inverse_hessian_approximation_method() const73 string QuasiNewtonMethod::write_inverse_hessian_approximation_method() const
74 {
75     switch(inverse_hessian_approximation_method)
76     {
77     case DFP:
78         return "DFP";
79 
80     case BFGS:
81         return "BFGS";
82     }
83 
84     ostringstream buffer;
85 
86     buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
87            << "string write_inverse_hessian_approximation_method() const method.\n"
88            << "Unknown inverse hessian approximation method.\n";
89 
90     throw logic_error(buffer.str());
91 }
92 
93 
get_epochs_number() const94 const Index& QuasiNewtonMethod::get_epochs_number() const
95 {
96     return epochs_number;
97 }
98 
99 
100 /// Returns the minimum norm of the parameter increment vector used as a stopping criteria when training.
101 
get_minimum_parameters_increment_norm() const102 const type& QuasiNewtonMethod::get_minimum_parameters_increment_norm() const
103 {
104     return minimum_parameters_increment_norm;
105 }
106 
107 
108 /// Returns the minimum loss improvement during training.
109 
get_minimum_loss_decrease() const110 const type& QuasiNewtonMethod::get_minimum_loss_decrease() const
111 {
112     return minimum_loss_decrease;
113 }
114 
115 
116 /// Returns the goal value for the loss.
117 /// This is used as a stopping criterion when training a neural network
118 
get_loss_goal() const119 const type& QuasiNewtonMethod::get_loss_goal() const
120 {
121     return training_loss_goal;
122 }
123 
124 
125 /// Returns the goal value for the norm of the error function gradient.
126 /// This is used as a stopping criterion when training a neural network
127 
get_gradient_norm_goal() const128 const type& QuasiNewtonMethod::get_gradient_norm_goal() const
129 {
130     return gradient_norm_goal;
131 }
132 
133 
134 /// Returns the maximum number of selection error increases during the training process.
135 
get_maximum_selection_error_increases() const136 const Index& QuasiNewtonMethod::get_maximum_selection_error_increases() const
137 {
138     return maximum_selection_error_increases;
139 }
140 
141 
142 /// Returns the maximum number of epochs for training.
143 
get_maximum_epochs_number() const144 const Index& QuasiNewtonMethod::get_maximum_epochs_number() const
145 {
146     return maximum_epochs_number;
147 }
148 
149 
150 /// Returns the maximum training time.
151 
get_maximum_time() const152 const type& QuasiNewtonMethod::get_maximum_time() const
153 {
154     return maximum_time;
155 }
156 
157 
158 /// Returns true if the final model will be the neural network with the minimum selection error, false otherwise.
159 
get_choose_best_selection() const160 const bool& QuasiNewtonMethod::get_choose_best_selection() const
161 {
162     return choose_best_selection;
163 }
164 
165 
166 /// Returns true if the error history vector is to be reserved, and false otherwise.
167 
get_reserve_training_error_history() const168 const bool& QuasiNewtonMethod::get_reserve_training_error_history() const
169 {
170     return reserve_training_error_history;
171 }
172 
173 
174 /// Returns true if the selection error history vector is to be reserved, and false otherwise.
175 
get_reserve_selection_error_history() const176 const bool& QuasiNewtonMethod::get_reserve_selection_error_history() const
177 {
178     return reserve_selection_error_history;
179 }
180 
181 
182 /// Sets a pointer to a loss index object to be associated to the quasi-Newton method object.
183 /// It also sets that loss index to the learning rate algorithm.
184 /// @param new_loss_index_pointer Pointer to a loss index object.
185 
set_loss_index_pointer(LossIndex * new_loss_index_pointer)186 void QuasiNewtonMethod::set_loss_index_pointer(LossIndex* new_loss_index_pointer)
187 {
188     loss_index_pointer = new_loss_index_pointer;
189 
190     learning_rate_algorithm.set_loss_index_pointer(new_loss_index_pointer);
191 }
192 
193 
194 /// Sets a new inverse hessian approximatation method value.
195 /// @param new_inverse_hessian_approximation_method Inverse hessian approximation method value.
196 
set_inverse_hessian_approximation_method(const QuasiNewtonMethod::InverseHessianApproximationMethod & new_inverse_hessian_approximation_method)197 void QuasiNewtonMethod::set_inverse_hessian_approximation_method(
198     const QuasiNewtonMethod::InverseHessianApproximationMethod& new_inverse_hessian_approximation_method)
199 {
200     inverse_hessian_approximation_method = new_inverse_hessian_approximation_method;
201 }
202 
203 
204 /// Sets a new method for approximating the inverse of the hessian matrix from a string containing the name.
205 /// Possible values are:
206 /// <ul>
207 /// <li> "DFP"
208 /// <li> "BFGS"
209 /// </ul>
210 /// @param new_inverse_hessian_approximation_method_name Name of inverse hessian approximation method.
211 
set_inverse_hessian_approximation_method(const string & new_inverse_hessian_approximation_method_name)212 void QuasiNewtonMethod::set_inverse_hessian_approximation_method(const string& new_inverse_hessian_approximation_method_name)
213 {
214     if(new_inverse_hessian_approximation_method_name == "DFP")
215     {
216         inverse_hessian_approximation_method = DFP;
217     }
218     else if(new_inverse_hessian_approximation_method_name == "BFGS")
219     {
220         inverse_hessian_approximation_method = BFGS;
221     }
222     else
223     {
224         ostringstream buffer;
225 
226         buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
227                << "void set_inverse_hessian_approximation_method(const string&) method.\n"
228                << "Unknown inverse hessian approximation method: " << new_inverse_hessian_approximation_method_name << ".\n";
229 
230         throw logic_error(buffer.str());
231     }
232 }
233 
234 
235 /// Makes the training history of all variables to reseved or not in memory.
236 /// @param new_reserve_all_training_history True if the training history of all variables is to be reserved,
237 /// false otherwise.
238 
set_reserve_all_training_history(const bool & new_reserve_all_training_history)239 void QuasiNewtonMethod::set_reserve_all_training_history(const bool& new_reserve_all_training_history)
240 {
241     reserve_training_error_history = new_reserve_all_training_history;
242 
243     reserve_selection_error_history = new_reserve_all_training_history;
244 }
245 
246 
247 /// Sets a new display value.
248 /// If it is set to true messages from this class are to be displayed on the screen;
249 /// if it is set to false messages from this class are not to be displayed on the screen.
250 /// @param new_display Display value.
251 
set_display(const bool & new_display)252 void QuasiNewtonMethod::set_display(const bool& new_display)
253 {
254     display = new_display;
255 }
256 
257 
set_default()258 void QuasiNewtonMethod::set_default()
259 {
260     inverse_hessian_approximation_method = BFGS;
261 
262     learning_rate_algorithm.set_default();
263 
264     // Stopping criteria
265 
266     minimum_parameters_increment_norm = static_cast<type>(1.0e-3);
267 
268     minimum_loss_decrease = static_cast<type>(0.0);
269     training_loss_goal = 0;
270     gradient_norm_goal = 0;
271     maximum_selection_error_increases = 1000000;
272 
273     maximum_epochs_number = 1000;
274     maximum_time = 3600.0;
275 
276     choose_best_selection = false;
277 
278     // TRAINING HISTORY
279 
280     reserve_training_error_history = true;
281     reserve_selection_error_history = true;
282 
283     // UTILITIES
284 
285     display = true;
286     display_period = 5;
287 }
288 
289 
290 /// Sets a new value for the minimum parameters increment norm stopping criterion.
291 /// @param new_minimum_parameters_increment_norm Value of norm of parameters increment norm used to stop training.
292 
set_minimum_parameters_increment_norm(const type & new_minimum_parameters_increment_norm)293 void QuasiNewtonMethod::set_minimum_parameters_increment_norm(const type& new_minimum_parameters_increment_norm)
294 {
295 #ifdef __OPENNN_DEBUG__
296 
297     if(new_minimum_parameters_increment_norm < static_cast<type>(0.0))
298     {
299         ostringstream buffer;
300 
301         buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
302                << "void new_minimum_parameters_increment_norm(const type&) method.\n"
303                << "Minimum parameters increment norm must be equal or greater than 0.\n";
304 
305         throw logic_error(buffer.str());
306     }
307 
308 #endif
309 
310     // Set error learning rate
311 
312     minimum_parameters_increment_norm = new_minimum_parameters_increment_norm;
313 }
314 
315 
316 /// Sets a new minimum loss improvement during training.
317 /// @param new_minimum_loss_decrease Minimum improvement in the loss between two epochs.
318 
set_minimum_loss_decrease(const type & new_minimum_loss_decrease)319 void QuasiNewtonMethod::set_minimum_loss_decrease(const type& new_minimum_loss_decrease)
320 {
321 #ifdef __OPENNN_DEBUG__
322 
323     if(new_minimum_loss_decrease < static_cast<type>(0.0))
324     {
325         ostringstream buffer;
326 
327         buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
328                << "void set_minimum_loss_decrease(const type&) method.\n"
329                << "Minimum loss improvement must be equal or greater than 0.\n";
330 
331         throw logic_error(buffer.str());
332     }
333 
334 #endif
335 
336     // Set minimum loss improvement
337 
338     minimum_loss_decrease = new_minimum_loss_decrease;
339 }
340 
341 
342 /// Sets a new goal value for the loss.
343 /// This is used as a stopping criterion when training a neural network
344 /// @param new_loss_goal Goal value for the loss.
345 
set_loss_goal(const type & new_loss_goal)346 void QuasiNewtonMethod::set_loss_goal(const type& new_loss_goal)
347 {
348     training_loss_goal = new_loss_goal;
349 }
350 
351 
352 /// Sets a new the goal value for the norm of the error function gradient.
353 /// This is used as a stopping criterion when training a neural network
354 /// @param new_gradient_norm_goal Goal value for the norm of the error function gradient.
355 
set_gradient_norm_goal(const type & new_gradient_norm_goal)356 void QuasiNewtonMethod::set_gradient_norm_goal(const type& new_gradient_norm_goal)
357 {
358 #ifdef __OPENNN_DEBUG__
359 
360     if(new_gradient_norm_goal < static_cast<type>(0.0))
361     {
362         ostringstream buffer;
363 
364         buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
365                << "void set_gradient_norm_goal(const type&) method.\n"
366                << "Gradient norm goal must be equal or greater than 0.\n";
367 
368         throw logic_error(buffer.str());
369     }
370 
371 #endif
372 
373     // Set gradient norm goal
374 
375     gradient_norm_goal = new_gradient_norm_goal;
376 }
377 
378 
379 /// Sets a new maximum number of selection error increases.
380 /// @param new_maximum_selection_error_increases Maximum number of epochs in which the selection evalutation increases.
381 
set_maximum_selection_error_increases(const Index & new_maximum_selection_error_increases)382 void QuasiNewtonMethod::set_maximum_selection_error_increases(const Index& new_maximum_selection_error_increases)
383 {
384     maximum_selection_error_increases = new_maximum_selection_error_increases;
385 }
386 
387 
388 /// Sets a new maximum number of epochs number.
389 /// @param new_maximum_epochs_number Maximum number of epochs in which the selection evalutation decreases.
390 
set_maximum_epochs_number(const Index & new_maximum_epochs_number)391 void QuasiNewtonMethod::set_maximum_epochs_number(const Index& new_maximum_epochs_number)
392 {
393     maximum_epochs_number = new_maximum_epochs_number;
394 }
395 
396 
397 /// Sets a new maximum training time.
398 /// @param new_maximum_time Maximum training time.
399 
set_maximum_time(const type & new_maximum_time)400 void QuasiNewtonMethod::set_maximum_time(const type& new_maximum_time)
401 {
402 #ifdef __OPENNN_DEBUG__
403 
404     if(new_maximum_time < static_cast<type>(0.0))
405     {
406         ostringstream buffer;
407 
408         buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
409                << "void set_maximum_time(const type&) method.\n"
410                << "Maximum time must be equal or greater than 0.\n";
411 
412         throw logic_error(buffer.str());
413     }
414 
415 #endif
416 
417     // Set maximum time
418 
419     maximum_time = new_maximum_time;
420 }
421 
422 
423 /// Makes the minimum selection error neural network of all the epochs to be returned or not.
424 /// @param new_choose_best_selection True if the final model will be the neural network with the minimum selection error,
425 /// false otherwise.
426 
set_choose_best_selection(const bool & new_choose_best_selection)427 void QuasiNewtonMethod::set_choose_best_selection(const bool& new_choose_best_selection)
428 {
429     choose_best_selection = new_choose_best_selection;
430 }
431 
432 
433 /// Makes the error history vector to be reseved or not in memory.
434 /// @param new_reserve_training_error_history True if the loss history vector is to be reserved, false otherwise.
435 
set_reserve_training_error_history(const bool & new_reserve_training_error_history)436 void QuasiNewtonMethod::set_reserve_training_error_history(const bool& new_reserve_training_error_history)
437 {
438     reserve_training_error_history = new_reserve_training_error_history;
439 }
440 
441 
442 /// Makes the selection error history to be reserved or not in memory.
443 /// This is a vector.
444 /// @param new_reserve_selection_error_history True if the selection error history is to be reserved, false otherwise.
445 
set_reserve_selection_error_history(const bool & new_reserve_selection_error_history)446 void QuasiNewtonMethod::set_reserve_selection_error_history(const bool& new_reserve_selection_error_history)
447 {
448     reserve_selection_error_history = new_reserve_selection_error_history;
449 }
450 
451 
452 
initialize_inverse_hessian_approximation(QNMOptimizationData & optimization_data) const453 void QuasiNewtonMethod::initialize_inverse_hessian_approximation(QNMOptimizationData& optimization_data) const
454 {
455     optimization_data.inverse_hessian.setZero();
456 
457     const Index parameters_number = optimization_data.parameters.size();
458 
459     for(Index i = 0; i < parameters_number; i++) optimization_data.inverse_hessian(i,i) = 1.0;
460 
461 }
462 
463 /// Calculates an approximation of the inverse hessian, accoring to the method used.
464 /// @param old_parameters Another point of the error function.
465 /// @param parameters Current point of the error function
466 /// @param old_gradient Gradient at the other point.
467 /// @param gradient Gradient at the current point.
468 /// @param old_inverse_hessian Inverse hessian at the other point of the error function.
469 
calculate_inverse_hessian_approximation(const LossIndex::BackPropagation & back_propagation,QNMOptimizationData & optimization_data) const470 void QuasiNewtonMethod::calculate_inverse_hessian_approximation(const LossIndex::BackPropagation& back_propagation,
471                                                                 QNMOptimizationData& optimization_data) const
472 {
473     switch(inverse_hessian_approximation_method)
474     {
475     case DFP:
476         calculate_DFP_inverse_hessian(back_propagation, optimization_data);
477 
478         return;
479 
480     case BFGS:
481         calculate_BFGS_inverse_hessian(back_propagation, optimization_data);
482 
483         return;
484     }
485 
486     ostringstream buffer;
487 
488     buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
489            << "Tensor<type, 1> calculate_inverse_hessian_approximation(const Tensor<type, 1>&, "
490            "const Tensor<type, 1>&, const Tensor<type, 1>&, const Tensor<type, 1>&, const Tensor<type, 2>&) method.\n"
491            << "Unknown inverse hessian approximation method.\n";
492 
493     throw logic_error(buffer.str());
494 }
495 
496 
kronecker_product(Tensor<type,1> & left_matrix,Tensor<type,1> & right_matrix) const497 const Tensor<type, 2> QuasiNewtonMethod::kronecker_product(Tensor<type, 1> & left_matrix, Tensor<type, 1> & right_matrix) const
498 {
499     // Transform Tensors into Dense matrix
500 
501     auto ml = Eigen::Map<Eigen::Matrix<type,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor >>
502             (left_matrix.data(),left_matrix.dimension(0), 1);
503 
504     auto mr = Eigen::Map<Eigen::Matrix<type,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor>>
505             (right_matrix.data(),right_matrix.dimension(0), 1);
506 
507     // Kronecker Product
508 
509     auto product = kroneckerProduct(ml,mr).eval();
510 
511     // Matrix into a Tensor
512 
513     TensorMap< Tensor<type, 2> > direct_matrix(product.data(), left_matrix.size(), left_matrix.size());
514 
515     return direct_matrix;
516 }
517 
518 
519 /// This method calculates the kronecker product between two matrix.
520 /// Its return a direct matrix.
521 /// @param left_matrix Matrix to be porudct.
522 /// @param right_matrix Matrix to be product.
523 
kronecker_product(Tensor<type,2> & left_matrix,Tensor<type,2> & right_matrix) const524 const Tensor<type, 2> QuasiNewtonMethod::kronecker_product(Tensor<type, 2>& left_matrix, Tensor<type, 2>& right_matrix) const
525 {
526     // Transform Tensors into Dense matrix
527 
528     auto ml = Eigen::Map<Eigen::Matrix<type,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor >>
529             (left_matrix.data(),left_matrix.dimension(0),left_matrix.dimension(1));
530 
531     auto mr = Eigen::Map<Eigen::Matrix<type,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor>>
532             (right_matrix.data(),right_matrix.dimension(0),right_matrix.dimension(1));
533 
534     // Kronecker Product
535 
536     auto product = kroneckerProduct(ml,mr).eval();
537 
538     // Matrix into a Tensor
539 
540     TensorMap< Tensor<type, 2> > direct_matrix(product.data(), product.rows(), product.cols());
541 
542     return direct_matrix;
543 
544 }
545 
546 
547 /// Returns an approximation of the inverse hessian matrix according to the Davidon-Fletcher-Powel
548 /// (DFP) algorithm.
549 /// @param old_parameters A previous set of parameters.
550 /// @param old_gradient The gradient of the error function for that previous set of parameters.
551 /// @param old_inverse_hessian The hessian of the error function for that previous set of parameters.
552 /// @param parameters Actual set of parameters.
553 /// @param gradient The gradient of the error function for the actual set of parameters.
554 
calculate_DFP_inverse_hessian(const LossIndex::BackPropagation & back_propagation,QNMOptimizationData & optimization_data) const555 void QuasiNewtonMethod::calculate_DFP_inverse_hessian(const LossIndex::BackPropagation& back_propagation,
556                                                       QNMOptimizationData& optimization_data) const
557 {
558     const NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
559 
560     const Index parameters_number = neural_network_pointer->get_parameters_number();
561 
562     // Dots
563 
564     Tensor<type, 0> parameters_difference_dot_gradient_difference;
565 
566     parameters_difference_dot_gradient_difference.device(*thread_pool_device)
567             = optimization_data.parameters_difference.contract(optimization_data.gradient_difference, AT_B); // Ok
568 
569     optimization_data.old_inverse_hessian_dot_gradient_difference.device(*thread_pool_device)
570             = optimization_data.old_inverse_hessian.contract(optimization_data.gradient_difference, A_B); // Ok
571 
572     Tensor<type, 0> gradient_dot_hessian_dot_gradient;
573 
574     gradient_dot_hessian_dot_gradient.device(*thread_pool_device)
575             = optimization_data.gradient_difference.contract(optimization_data.old_inverse_hessian_dot_gradient_difference, AT_B); // Ok , auto?
576 
577     // Calculates Approximation
578 
579     optimization_data.inverse_hessian = optimization_data.old_inverse_hessian; // TensorMap?
580 
581     optimization_data.inverse_hessian
582             += kronecker_product(optimization_data.parameters_difference, optimization_data.parameters_difference)
583             /parameters_difference_dot_gradient_difference(0); // Ok
584 
585     optimization_data.inverse_hessian
586             -= kronecker_product(optimization_data.old_inverse_hessian_dot_gradient_difference, optimization_data.old_inverse_hessian_dot_gradient_difference)
587             / gradient_dot_hessian_dot_gradient(0); // Ok
588 }
589 
590 
591 /// Returns an approximation of the inverse hessian matrix according to the
592 /// Broyden-Fletcher-Goldfarb-Shanno(BGFS) algorithm.
593 /// @param old_parameters A previous set of parameters.
594 /// @param old_gradient The gradient of the error function for that previous set of parameters.
595 /// @param old_inverse_hessian The hessian of the error function for that previous set of parameters.
596 /// @param parameters Actual set of parameters.
597 /// @param gradient The gradient of the error function for the actual set of parameters.
598 
calculate_BFGS_inverse_hessian(const LossIndex::BackPropagation & back_propagation,QNMOptimizationData & optimization_data) const599 void QuasiNewtonMethod::calculate_BFGS_inverse_hessian(const LossIndex::BackPropagation& back_propagation,
600                                                        QNMOptimizationData& optimization_data) const
601 {
602     const NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
603 
604     const Index parameters_number = neural_network_pointer->get_parameters_number();
605 
606     Tensor<type, 0> parameters_difference_dot_gradient_difference;
607 
608     parameters_difference_dot_gradient_difference.device(*thread_pool_device)
609             = optimization_data.parameters_difference.contract(optimization_data.gradient_difference, AT_B);
610 
611 
612     optimization_data.old_inverse_hessian_dot_gradient_difference.device(*thread_pool_device)
613             = optimization_data.old_inverse_hessian.contract(optimization_data.gradient_difference, A_B);
614 
615     Tensor<type, 0> gradient_dot_hessian_dot_gradient;
616 
617     gradient_dot_hessian_dot_gradient.device(*thread_pool_device)
618             = optimization_data.gradient_difference.contract(optimization_data.old_inverse_hessian_dot_gradient_difference, AT_B);
619 
620     Tensor<type, 1> BFGS(parameters_number);
621 
622     BFGS.device(*thread_pool_device)
623             = optimization_data.parameters_difference/parameters_difference_dot_gradient_difference(0)
624             - optimization_data.old_inverse_hessian_dot_gradient_difference/gradient_dot_hessian_dot_gradient(0);
625 
626     // Calculates Approximation
627 
628     optimization_data.inverse_hessian = optimization_data.old_inverse_hessian;
629 
630     optimization_data.inverse_hessian
631             += kronecker_product(optimization_data.parameters_difference, optimization_data.parameters_difference)
632             / parameters_difference_dot_gradient_difference(0); // Ok
633 
634     optimization_data.inverse_hessian
635             -= kronecker_product(optimization_data.old_inverse_hessian_dot_gradient_difference, optimization_data.old_inverse_hessian_dot_gradient_difference)
636             / gradient_dot_hessian_dot_gradient(0); // Ok
637 
638     optimization_data.inverse_hessian
639             += kronecker_product(BFGS, BFGS)*(gradient_dot_hessian_dot_gradient(0)); // Ok
640 }
641 
642 
643 
644 ////// \brief QuasiNewtonMethod::update_epoch
645 ////// \param batch
646 ////// \param forward_propagation
647 ////// \param back_propagation
648 ////// \param optimization_data
update_epoch(const DataSet::Batch & batch,NeuralNetwork::ForwardPropagation & forward_propagation,LossIndex::BackPropagation & back_propagation,QNMOptimizationData & optimization_data)649 void QuasiNewtonMethod::update_epoch(
650         const DataSet::Batch& batch,
651         NeuralNetwork::ForwardPropagation& forward_propagation,
652         LossIndex::BackPropagation& back_propagation,
653         QNMOptimizationData& optimization_data)
654 {
655     #ifdef __OPENNN_DEBUG__
656 
657         check();
658 
659     #endif
660 
661     optimization_data.old_training_loss = back_propagation.loss;
662 
663     optimization_data.parameters_difference.device(*thread_pool_device)
664             = optimization_data.parameters - optimization_data.old_parameters;
665 
666     optimization_data.gradient_difference.device(*thread_pool_device)
667             = back_propagation.gradient - optimization_data.old_gradient;
668 
669     if(optimization_data.epoch == 0
670     || is_zero(optimization_data.parameters_difference)
671     || is_zero(optimization_data.gradient_difference))
672     {
673 //        if(is_zero(optimization_data.parameters_difference)) cout << "parameters_difference" << endl;
674 //        if(is_zero(optimization_data.gradient_difference)) cout << "gradient_difference" << endl;
675 
676         initialize_inverse_hessian_approximation(optimization_data);
677     }
678     else
679     {
680         calculate_inverse_hessian_approximation(back_propagation, optimization_data);
681     }
682 
683     // Optimization algorithm
684 
685     optimization_data.training_direction.device(*thread_pool_device)
686             = -optimization_data.inverse_hessian.contract(back_propagation.gradient, A_B);
687 
688     // Calculate training slope
689 
690     optimization_data.training_slope.device(*thread_pool_device)
691             = back_propagation.gradient.contract(optimization_data.training_direction, AT_B);
692 
693     // Check for a descent direction
694 
695     if(optimization_data.training_slope(0) >= 0)
696     {
697         cout << "Training slope is greater than zero." << endl;
698 
699         optimization_data.training_direction.device(*thread_pool_device) = -back_propagation.gradient;
700     }
701 
702     // Get initial learning rate
703 
704     optimization_data.initial_learning_rate = 0;
705 
706     optimization_data.epoch == 0
707             ? optimization_data.initial_learning_rate = first_learning_rate
708             : optimization_data.initial_learning_rate = optimization_data.old_learning_rate;
709 
710     pair<type,type> directional_point = learning_rate_algorithm.calculate_directional_point(
711              batch,
712              forward_propagation,
713              back_propagation,
714              optimization_data);
715 
716     optimization_data.learning_rate = directional_point.first;
717 
718     /// @todo ?
719     // Reset training direction when learning rate is 0
720 
721     if(optimization_data.epoch != 0 && abs(optimization_data.learning_rate) < numeric_limits<type>::min())
722     {
723         optimization_data.training_direction.device(*thread_pool_device) = -back_propagation.gradient;
724 
725         directional_point = learning_rate_algorithm.calculate_directional_point(
726                     batch,
727                     forward_propagation,
728                     back_propagation,
729                     optimization_data);
730 
731         optimization_data.learning_rate = directional_point.first;
732     }
733 
734     optimization_data.parameters_increment.device(*thread_pool_device)
735             = optimization_data.training_direction*optimization_data.learning_rate;
736 
737     optimization_data.parameters_increment_norm = l2_norm(optimization_data.parameters_increment);
738 
739     optimization_data.old_parameters = optimization_data.parameters;
740 
741     optimization_data.parameters.device(*thread_pool_device) += optimization_data.parameters_increment;
742 
743     // Update stuff
744 
745     optimization_data.old_gradient = back_propagation.gradient;
746 
747     optimization_data.old_inverse_hessian = optimization_data.inverse_hessian;
748 
749     optimization_data.old_learning_rate = optimization_data.learning_rate;
750 
751     back_propagation.loss = directional_point.second;
752 }
753 
754 
755 /// Trains a neural network with an associated loss index according to the quasi-Newton method.
756 /// Training occurs according to the training operators, training parameters and stopping criteria.
757 
perform_training()758 OptimizationAlgorithm::Results QuasiNewtonMethod::perform_training()
759 {
760 #ifdef __OPENNN_DEBUG__
761 
762     check();
763 
764 #endif
765 
766     // Start training
767 
768     if(display) cout << "Training with quasi-Newton method...\n";
769 
770     Results results;
771 
772     results.resize_training_history(maximum_epochs_number);
773 
774     // Data set
775 
776     DataSet* data_set_pointer = loss_index_pointer->get_data_set_pointer();
777 
778     const Index training_samples_number = data_set_pointer->get_training_samples_number();
779 
780     const Index selection_samples_number = data_set_pointer->get_selection_samples_number();
781     const bool has_selection = data_set_pointer->has_selection();
782 
783     Tensor<Index, 1> training_samples_indices = data_set_pointer->get_training_samples_indices();
784     Tensor<Index, 1> selection_samples_indices = data_set_pointer->get_selection_samples_indices();
785     Tensor<Index, 1> inputs_indices = data_set_pointer->get_input_variables_indices();
786     Tensor<Index, 1> target_indices = data_set_pointer->get_target_variables_indices();
787 
788     DataSet::Batch training_batch(training_samples_number, data_set_pointer);
789     DataSet::Batch selection_batch(selection_samples_number, data_set_pointer);
790 
791     training_batch.fill(training_samples_indices, inputs_indices, target_indices);
792     selection_batch.fill(selection_samples_indices, inputs_indices, target_indices);
793 
794     training_samples_indices.resize(0);
795     selection_samples_indices.resize(0);
796     inputs_indices.resize(0);
797     target_indices.resize(0);
798 
799     // Neural network
800 
801     NeuralNetwork* neural_network_pointer = loss_index_pointer->get_neural_network_pointer();
802 
803     type parameters_norm = 0;
804 
805     NeuralNetwork::ForwardPropagation training_forward_propagation(training_samples_number, neural_network_pointer);
806     NeuralNetwork::ForwardPropagation selection_forward_propagation(selection_samples_number, neural_network_pointer);
807 
808     // Loss index
809 
810     type gradient_norm = 0;
811 
812     type old_selection_error = numeric_limits<type>::max();
813 
814     LossIndex::BackPropagation training_back_propagation(training_samples_number, loss_index_pointer);
815     LossIndex::BackPropagation selection_back_propagation(selection_samples_number, loss_index_pointer);
816 
817     // Optimization algorithm
818 
819     Tensor<type, 1> minimal_selection_parameters;
820 
821     type minimum_selection_error = numeric_limits<type>::max();
822 
823     bool stop_training = false;
824 
825     Index selection_failures = 0;
826 
827     time_t beginning_time, current_time;
828     time(&beginning_time);
829     type elapsed_time;
830 
831     QNMOptimizationData optimization_data(this);
832 
833     if(has_selection) results.resize_selection_history(maximum_epochs_number);
834 
835     // Main loop
836 
837     for(Index epoch = 0; epoch < maximum_epochs_number; epoch++)
838     {
839         optimization_data.epoch = epoch;
840 
841         // Neural network
842 
843         parameters_norm = l2_norm(optimization_data.parameters);
844 
845         neural_network_pointer->forward_propagate(training_batch, training_forward_propagation);
846 
847         loss_index_pointer->back_propagate(training_batch, training_forward_propagation, training_back_propagation);
848 
849         gradient_norm = l2_norm(training_back_propagation.gradient);
850 
851         // Selection error
852 
853         if(has_selection)
854         {
855             neural_network_pointer->forward_propagate(selection_batch, selection_forward_propagation);
856 
857             // Loss Index
858 
859             loss_index_pointer->calculate_error(selection_batch, selection_forward_propagation, selection_back_propagation);
860 
861             if(selection_back_propagation.error > old_selection_error)
862             {
863                 selection_failures++;
864             }
865             else if(selection_back_propagation.error < minimum_selection_error)
866             {
867                 minimum_selection_error = selection_back_propagation.error;
868 
869                 minimal_selection_parameters = optimization_data.parameters;
870             }
871 
872             if(reserve_selection_error_history) results.selection_error_history(epoch) = selection_back_propagation.error;
873         }
874 
875         // Optimization data
876 
877         update_epoch(training_batch, training_forward_propagation, training_back_propagation, optimization_data);
878 
879         #ifdef __OPENNN_DEBUG__
880 
881         if(::isnan(training_back_propagation.error)){
882             ostringstream buffer;
883 
884             buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
885                    << "type perform_training() mehtod.\n"
886                    << "Error is NAN.\n";
887 
888             throw logic_error(buffer.str());
889         }
890         #endif
891 
892         neural_network_pointer->set_parameters(optimization_data.parameters);
893 
894         // Training history
895 
896         if(reserve_training_error_history) results.training_error_history(epoch) = training_back_propagation.error;
897 
898         // Stopping Criteria
899 
900         time(&current_time);
901         elapsed_time = static_cast<type>(difftime(current_time, beginning_time));
902 
903         if(optimization_data.parameters_increment_norm <= minimum_parameters_increment_norm)
904         {
905             if(display)
906             {
907                cout << "Epoch " << epoch+1 << ": Minimum parameters increment norm reached.\n"
908                     << "Parameters increment norm: " << optimization_data.parameters_increment_norm << endl;
909             }
910 
911             stop_training = true;
912 
913             results.stopping_condition = MinimumParametersIncrementNorm;
914         }
915         else if(epoch != 0 &&
916                 training_back_propagation.loss - optimization_data.old_training_loss >= minimum_loss_decrease)
917         {
918             if(display)
919             {
920                cout << "Epoch " << epoch+1 << ": Minimum loss decrease (" << minimum_loss_decrease << ") reached.\n"
921                     << "Loss decrease: " << training_back_propagation.loss - optimization_data.old_training_loss <<  endl;
922             }
923 
924             stop_training = true;
925 
926             results.stopping_condition = MinimumLossDecrease;
927         }
928         else if(training_back_propagation.loss <= training_loss_goal)
929         {
930             if(display)
931             {
932                 cout << "Epoch " << epoch+1 << ": Loss goal reached.\n";
933             }
934 
935             stop_training = true;
936 
937             results.stopping_condition = LossGoal;
938         }
939         else if(gradient_norm <= gradient_norm_goal)
940         {
941             if(display)
942             {
943                 cout << "Iteration " << epoch+1 << ": Gradient norm goal reached.\n";
944             }
945 
946             stop_training = true;
947 
948             results.stopping_condition = GradientNormGoal;
949         }
950         else if(selection_failures >= maximum_selection_error_increases)
951         {
952             if(display)
953             {
954                 cout << "Epoch " << epoch+1 << ": Maximum selection error increases reached.\n"
955                      << "Selection loss increases: "<< selection_failures << endl;
956             }
957 
958             stop_training = true;
959 
960             results.stopping_condition = MaximumSelectionErrorIncreases;
961         }
962         else if(epoch == maximum_epochs_number)
963         {
964             if(display)
965             {
966                 cout << "Epoch " << epoch+1 << ": Maximum number of epochs reached.\n";
967             }
968 
969             stop_training = true;
970 
971             results.stopping_condition = MaximumEpochsNumber;
972         }
973         else if(elapsed_time >= maximum_time)
974         {
975             if(display)
976             {
977                 cout << "Epoch " << epoch+1 << ": Maximum training time reached.\n";
978             }
979 
980             stop_training = true;
981 
982             results.stopping_condition = MaximumTime;
983         }
984 
985         if(epoch != 0 && epoch % save_period == 0)
986         {
987             neural_network_pointer->save(neural_network_file_name);
988         }
989 
990         if(stop_training)
991         {
992             results.final_parameters = optimization_data.parameters;
993             results.final_parameters_norm = parameters_norm;
994             results.final_training_error = training_back_propagation.error;
995             results.final_selection_error = selection_back_propagation.error;
996 
997             results.final_gradient_norm = gradient_norm;
998 
999             results.elapsed_time = write_elapsed_time(elapsed_time);
1000 
1001             results.epochs_number = epoch;
1002 
1003             results.resize_training_error_history(epoch+1);
1004             if(has_selection) results.resize_selection_error_history(epoch+1);
1005 
1006             if(display)
1007             {
1008                 cout << "Parameters norm: " << parameters_norm << "\n"
1009                      << "Training error: " << training_back_propagation.error <<  "\n"
1010                      << "Gradient norm: " << gradient_norm <<  "\n"
1011                      << "Learning rate: " << optimization_data.learning_rate <<  "\n"
1012                      << "Elapsed time: " << write_elapsed_time(elapsed_time) << endl;
1013 
1014                 if(has_selection)
1015                 {
1016                     cout << "Selection error: " << selection_back_propagation.error << endl;
1017                 }
1018             }
1019 
1020             break;
1021         }
1022         else if((display && epoch == 0) || (display && (epoch+1) % display_period == 0))
1023         {
1024             cout << "Epoch " << epoch+1 << ";\n"
1025                  << "Parameters norm: " << parameters_norm << "\n"
1026                  << "Training error: " << training_back_propagation.error << "\n"
1027                  << "Gradient norm: " << gradient_norm << "\n"
1028                  << "Learning rate: " << optimization_data.learning_rate << "\n"
1029                  << "Elapsed time: " << write_elapsed_time(elapsed_time) << endl;
1030 
1031             if(has_selection)
1032             {
1033                 cout << "Selection error: " << selection_back_propagation.error << endl;
1034             }
1035         }
1036 
1037         old_selection_error = selection_back_propagation.error;
1038 
1039         if(stop_training) break;
1040     }
1041 
1042     if(choose_best_selection)
1043     {
1044         //optimization_data.parameters = minimal_selection_parameters;
1045         //parameters_norm = l2_norm(parameters);
1046 
1047         neural_network_pointer->set_parameters(minimal_selection_parameters);
1048 
1049         //neural_network_pointer->forward_propagate(training_batch, training_forward_propagation);
1050 
1051         //loss_index_pointer->back_propagate(training_batch, training_forward_propagation, training_back_propagation);
1052 
1053         //training_loss = training_back_propagation.loss;
1054 
1055         //selection_error = minimum_selection_error;
1056     }
1057 
1058     return results;
1059 }
1060 
1061 
perform_training_void()1062 void QuasiNewtonMethod::perform_training_void()
1063 {
1064     perform_training();
1065 }
1066 
1067 
write_optimization_algorithm_type() const1068 string QuasiNewtonMethod::write_optimization_algorithm_type() const
1069 {
1070     return "QUASI_NEWTON_METHOD";
1071 }
1072 
1073 
1074 /// Serializes the quasi Newton method object into a XML document of the TinyXML library without keep the DOM tree in memory.
1075 /// See the OpenNN manual for more information about the format of this document.
1076 
write_XML(tinyxml2::XMLPrinter & file_stream) const1077 void QuasiNewtonMethod::write_XML(tinyxml2::XMLPrinter& file_stream) const
1078 {
1079     ostringstream buffer;
1080 
1081     file_stream.OpenElement("QuasiNewtonMethod");
1082 
1083     // Inverse hessian approximation method
1084 
1085     file_stream.OpenElement("InverseHessianApproximationMethod");
1086 
1087     file_stream.PushText(write_inverse_hessian_approximation_method().c_str());
1088 
1089     file_stream.CloseElement();
1090 
1091     // Learning rate algorithm
1092 
1093     learning_rate_algorithm.write_XML(file_stream);
1094 
1095     // Return minimum selection error neural network
1096 
1097     file_stream.OpenElement("ReturnMinimumSelectionErrorNN");
1098 
1099     buffer.str("");
1100     buffer << choose_best_selection;
1101 
1102     file_stream.PushText(buffer.str().c_str());
1103 
1104     file_stream.CloseElement();
1105 
1106     // Minimum parameters increment norm
1107 
1108     file_stream.OpenElement("MinimumParametersIncrementNorm");
1109 
1110     buffer.str("");
1111     buffer << minimum_parameters_increment_norm;
1112 
1113     file_stream.PushText(buffer.str().c_str());
1114 
1115     file_stream.CloseElement();
1116 
1117     // Minimum loss decrease
1118 
1119     file_stream.OpenElement("MinimumLossDecrease");
1120 
1121     buffer.str("");
1122     buffer << minimum_loss_decrease;
1123 
1124     file_stream.PushText(buffer.str().c_str());
1125 
1126     file_stream.CloseElement();
1127 
1128     // Loss goal
1129 
1130     file_stream.OpenElement("LossGoal");
1131 
1132     buffer.str("");
1133     buffer << training_loss_goal;
1134 
1135     file_stream.PushText(buffer.str().c_str());
1136 
1137     file_stream.CloseElement();
1138 
1139     // Gradient norm goal
1140 
1141     file_stream.OpenElement("GradientNormGoal");
1142 
1143     buffer.str("");
1144     buffer << gradient_norm_goal;
1145 
1146     file_stream.PushText(buffer.str().c_str());
1147 
1148     file_stream.CloseElement();
1149 
1150     // Maximum selection error increases
1151 
1152     file_stream.OpenElement("MaximumSelectionErrorIncreases");
1153 
1154     buffer.str("");
1155     buffer << maximum_selection_error_increases;
1156 
1157     file_stream.PushText(buffer.str().c_str());
1158 
1159     file_stream.CloseElement();
1160 
1161     // Maximum iterations number
1162 
1163     file_stream.OpenElement("MaximumEpochsNumber");
1164 
1165     buffer.str("");
1166     buffer << maximum_epochs_number;
1167 
1168     file_stream.PushText(buffer.str().c_str());
1169 
1170     file_stream.CloseElement();
1171 
1172     // Maximum time
1173 
1174     file_stream.OpenElement("MaximumTime");
1175 
1176     buffer.str("");
1177     buffer << maximum_time;
1178 
1179     file_stream.PushText(buffer.str().c_str());
1180 
1181     file_stream.CloseElement();
1182 
1183     // Reserve training error history
1184 
1185     file_stream.OpenElement("ReserveTrainingErrorHistory");
1186 
1187     buffer.str("");
1188     buffer << reserve_training_error_history;
1189 
1190     file_stream.PushText(buffer.str().c_str());
1191 
1192     file_stream.CloseElement();
1193 
1194     // Reserve selection error history
1195 
1196     file_stream.OpenElement("ReserveSelectionErrorHistory");
1197 
1198     buffer.str("");
1199     buffer << reserve_selection_error_history;
1200 
1201     file_stream.PushText(buffer.str().c_str());
1202 
1203     file_stream.CloseElement();
1204 
1205     // Hardware use
1206 
1207     file_stream.OpenElement("HardwareUse");
1208 
1209     buffer.str("");
1210     buffer << hardware_use;
1211 
1212     file_stream.PushText(buffer.str().c_str());
1213 
1214     file_stream.CloseElement();
1215 
1216     file_stream.CloseElement();
1217 }
1218 
1219 
1220 /// Writes as matrix of strings the most representative atributes.
1221 
to_string_matrix() const1222 Tensor<string, 2> QuasiNewtonMethod::to_string_matrix() const
1223 {
1224     Tensor<string, 2> labels_values(12, 2);
1225 
1226     // Inverse hessian approximation method
1227 
1228     labels_values(0,0) = "Inverse hessian approximation method";
1229 
1230     const string inverse_hessian_approximation_method_string = write_inverse_hessian_approximation_method();
1231 
1232     labels_values(0,1) = inverse_hessian_approximation_method_string;
1233 
1234     // Learning rate method
1235 
1236     labels_values(1,0) = "Learning rate method";
1237 
1238     const string learning_rate_method = learning_rate_algorithm.write_learning_rate_method();
1239 
1240     labels_values(1,1) = "learning_rate_method";
1241 
1242     // Loss tolerance
1243 
1244     labels_values(2,0) = "Learning rate tolerance";
1245 
1246     labels_values(2,1) = std::to_string(learning_rate_algorithm.get_learning_rate_tolerance());
1247 
1248     // Minimum parameters increment norm
1249 
1250     labels_values(3,0) = "Minimum parameters increment norm";
1251 
1252     labels_values(3,1) = std::to_string(minimum_parameters_increment_norm);
1253 
1254     // Minimum loss decrease
1255 
1256     labels_values(4,0) = "Minimum loss decrease";
1257 
1258     labels_values(4,1) = std::to_string(minimum_loss_decrease);
1259 
1260     // Loss goal
1261 
1262     labels_values(5,0) = "Loss goal";
1263 
1264     labels_values(5,1) = std::to_string(training_loss_goal);
1265 
1266     // Gradient norm goal
1267 
1268     labels_values(6,0) = "Gradient norm goal";
1269 
1270     labels_values(6,1) = std::to_string(gradient_norm_goal);
1271 
1272     // Maximum selection error increases
1273 
1274     labels_values(7,0) = "Maximum selection error increases";
1275 
1276     labels_values(7,1) = std::to_string(maximum_selection_error_increases);
1277 
1278     // Maximum epochs number
1279 
1280     labels_values(8,0) = "Maximum epochs number";
1281 
1282     labels_values(8,1) = std::to_string(maximum_epochs_number);
1283 
1284     // Maximum time
1285 
1286     labels_values(9,0) = "Maximum time";
1287 
1288     labels_values(9,1) = std::to_string(maximum_time);
1289 
1290     // Reserve training error history
1291 
1292     labels_values(10,0) = "Reserve training error history";
1293 
1294     if(reserve_training_error_history)
1295     {
1296         labels_values(10,1) = "true";
1297     }
1298     else
1299     {
1300         labels_values(10,1) = "false";
1301     }
1302 
1303     // Reserve selection error history
1304 
1305     labels_values(11,0) = "Reserve selection error history";
1306 
1307     if(reserve_selection_error_history)
1308     {
1309         labels_values(11,1) = "true";
1310     }
1311     else
1312     {
1313         labels_values(11,1) = "false";
1314     }
1315 
1316     return labels_values;
1317 }
1318 
1319 
from_XML(const tinyxml2::XMLDocument & document)1320 void QuasiNewtonMethod::from_XML(const tinyxml2::XMLDocument& document)
1321 {
1322     const tinyxml2::XMLElement* root_element = document.FirstChildElement("QuasiNewtonMethod");
1323 
1324     if(!root_element)
1325     {
1326         ostringstream buffer;
1327 
1328         buffer << "OpenNN Exception: QuasiNewtonMethod class.\n"
1329                << "void from_XML(const tinyxml2::XMLDocument&) method.\n"
1330                << "Quasi-Newton method element is nullptr.\n";
1331 
1332         throw logic_error(buffer.str());
1333     }
1334 
1335 
1336     // Inverse hessian approximation method
1337     {
1338         const tinyxml2::XMLElement* element = root_element->FirstChildElement("InverseHessianApproximationMethod");
1339 
1340         if(element)
1341         {
1342             const string new_inverse_hessian_approximation_method = element->GetText();
1343 
1344             try
1345             {
1346                 set_inverse_hessian_approximation_method(new_inverse_hessian_approximation_method);
1347             }
1348             catch(const logic_error& e)
1349             {
1350                 cerr << e.what() << endl;
1351             }
1352         }
1353     }
1354 
1355     // Learning rate algorithm
1356     {
1357         const tinyxml2::XMLElement* element = root_element->FirstChildElement("LearningRateAlgorithm");
1358 
1359         if(element)
1360         {
1361             tinyxml2::XMLDocument learning_rate_algorithm_document;
1362             tinyxml2::XMLNode* element_clone;
1363 
1364             element_clone = element->DeepClone(&learning_rate_algorithm_document);
1365 
1366             learning_rate_algorithm_document.InsertFirstChild(element_clone);
1367 
1368             learning_rate_algorithm.from_XML(learning_rate_algorithm_document);
1369         }
1370     }
1371 
1372     // Return minimum selection error neural network
1373 
1374     const tinyxml2::XMLElement* choose_best_selection_element = root_element->FirstChildElement("ReturnMinimumSelectionErrorNN");
1375 
1376     if(choose_best_selection_element)
1377     {
1378         string new_choose_best_selection = choose_best_selection_element->GetText();
1379 
1380         try
1381         {
1382             set_choose_best_selection(new_choose_best_selection != "0");
1383         }
1384         catch(const logic_error& e)
1385         {
1386             cerr << e.what() << endl;
1387         }
1388     }
1389 
1390     // Minimum parameters increment norm
1391     {
1392         const tinyxml2::XMLElement* element = root_element->FirstChildElement("MinimumParametersIncrementNorm");
1393 
1394         if(element)
1395         {
1396             const type new_minimum_parameters_increment_norm = static_cast<type>(atof(element->GetText()));
1397 
1398             try
1399             {
1400                 set_minimum_parameters_increment_norm(new_minimum_parameters_increment_norm);
1401             }
1402             catch(const logic_error& e)
1403             {
1404                 cerr << e.what() << endl;
1405             }
1406         }
1407     }
1408 
1409     // Minimum loss decrease
1410     {
1411         const tinyxml2::XMLElement* element = root_element->FirstChildElement("MinimumLossDecrease");
1412 
1413         if(element)
1414         {
1415             const type new_minimum_loss_decrease = static_cast<type>(atof(element->GetText()));
1416 
1417             try
1418             {
1419                 set_minimum_loss_decrease(new_minimum_loss_decrease);
1420             }
1421             catch(const logic_error& e)
1422             {
1423                 cerr << e.what() << endl;
1424             }
1425         }
1426     }
1427 
1428     // Loss goal
1429     {
1430         const tinyxml2::XMLElement* element = root_element->FirstChildElement("LossGoal");
1431 
1432         if(element)
1433         {
1434             const type new_loss_goal = static_cast<type>(atof(element->GetText()));
1435 
1436             try
1437             {
1438                 set_loss_goal(new_loss_goal);
1439             }
1440             catch(const logic_error& e)
1441             {
1442                 cerr << e.what() << endl;
1443             }
1444         }
1445     }
1446 
1447     // Gradient norm goal
1448     {
1449         const tinyxml2::XMLElement* element = root_element->FirstChildElement("GradientNormGoal");
1450 
1451         if(element)
1452         {
1453             const type new_gradient_norm_goal = static_cast<type>(atof(element->GetText()));
1454 
1455             try
1456             {
1457                 set_gradient_norm_goal(new_gradient_norm_goal);
1458             }
1459             catch(const logic_error& e)
1460             {
1461                 cerr << e.what() << endl;
1462             }
1463         }
1464     }
1465 
1466     // Maximum selection error increases
1467     {
1468         const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumSelectionErrorIncreases");
1469 
1470         if(element)
1471         {
1472             const Index new_maximum_selection_error_increases = static_cast<Index>(atoi(element->GetText()));
1473 
1474             try
1475             {
1476                 set_maximum_selection_error_increases(new_maximum_selection_error_increases);
1477             }
1478             catch(const logic_error& e)
1479             {
1480                 cerr << e.what() << endl;
1481             }
1482         }
1483     }
1484 
1485     // Maximum epochs number
1486     {
1487         const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumEpochsNumber");
1488 
1489         if(element)
1490         {
1491             const Index new_maximum_epochs_number = static_cast<Index>(atoi(element->GetText()));
1492 
1493             try
1494             {
1495                 set_maximum_epochs_number(new_maximum_epochs_number);
1496             }
1497             catch(const logic_error& e)
1498             {
1499                 cerr << e.what() << endl;
1500             }
1501         }
1502     }
1503 
1504     // Maximum time
1505     {
1506         const tinyxml2::XMLElement* element = root_element->FirstChildElement("MaximumTime");
1507 
1508         if(element)
1509         {
1510             const type new_maximum_time = static_cast<type>(atof(element->GetText()));
1511 
1512             try
1513             {
1514                 set_maximum_time(new_maximum_time);
1515             }
1516             catch(const logic_error& e)
1517             {
1518                 cerr << e.what() << endl;
1519             }
1520         }
1521     }
1522 
1523     // Reserve training error history
1524     {
1525         const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveTrainingErrorHistory");
1526 
1527         if(element)
1528         {
1529             const string new_reserve_training_error_history = element->GetText();
1530 
1531             try
1532             {
1533                 set_reserve_training_error_history(new_reserve_training_error_history != "0");
1534             }
1535             catch(const logic_error& e)
1536             {
1537                 cerr << e.what() << endl;
1538             }
1539         }
1540     }
1541 
1542     // Reserve selection error history
1543     {
1544         const tinyxml2::XMLElement* element = root_element->FirstChildElement("ReserveSelectionErrorHistory");
1545 
1546         if(element)
1547         {
1548             const string new_reserve_selection_error_history = element->GetText();
1549 
1550             try
1551             {
1552                 set_reserve_selection_error_history(new_reserve_selection_error_history != "0");
1553             }
1554             catch(const logic_error& e)
1555             {
1556                 cerr << e.what() << endl;
1557             }
1558         }
1559     }
1560 
1561     // Hardware use
1562     {
1563         const tinyxml2::XMLElement* element = root_element->FirstChildElement("HardwareUse");
1564 
1565         if(element)
1566         {
1567             const string new_hardware_use = element->GetText();
1568 
1569             try
1570             {
1571                 set_hardware_use(new_hardware_use);
1572             }
1573             catch(const logic_error& e)
1574             {
1575                 cerr << e.what() << endl;
1576             }
1577         }
1578     }
1579 }
1580 
1581 }
1582 
1583 // OpenNN: Open Neural Networks Library.
1584 // Copyright(C) 2005-2020 Artificial Intelligence Techniques, SL.
1585 //
1586 // This library is free software; you can redistribute it and/or
1587 // modify it under the terms of the GNU Lesser General Public
1588 // License as published by the Free Software Foundation; either
1589 // version 2.1 of the License, or any later version.
1590 //
1591 // This library is distributed in the hope that it will be useful,
1592 // but WITHOUT ANY WARRANTY; without even the implied warranty of
1593 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1594 // Lesser General Public License for more details.
1595 
1596 // You should have received a copy of the GNU Lesser General Public
1597 // License along with this library; if not, write to the Free Software
1598 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1599 
1600