1 /* Minimizer.h -- class for minimizing the cost function of an optimizable object
2 
3     Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts
4 
5     Author: Robin Hogan <r.j.hogan@ecmwf.int>
6 
7     This file is part of the Adept library.
8 
9 */
10 
11 #ifndef AdeptMinimizer_H
12 #define AdeptMinimizer_H 1
13 
14 #include <adept/Optimizable.h>
15 
16 namespace adept {
17 
18   enum MinimizerAlgorithm {
19     MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS = 0,
20     MINIMIZER_ALGORITHM_LEVENBERG,
21     MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT,
22     MINIMIZER_ALGORITHM_NUMBER_AVAILABLE
23   };
24 
25   enum MinimizerStatus {
26     MINIMIZER_STATUS_SUCCESS = 0,
27     MINIMIZER_STATUS_EMPTY_STATE,
28     MINIMIZER_STATUS_MAX_ITERATIONS_REACHED,
29     MINIMIZER_STATUS_FAILED_TO_CONVERGE,
30     MINIMIZER_STATUS_INVALID_COST_FUNCTION,
31     MINIMIZER_STATUS_INVALID_GRADIENT,
32     MINIMIZER_STATUS_INVALID_BOUNDS,
33     MINIMIZER_STATUS_NUMBER_AVAILABLE,
34     MINIMIZER_STATUS_NOT_YET_CONVERGED
35   };
36 
37   // Return a C string describing the minimizer status
38   const char* minimizer_status_string(MinimizerStatus status);
39 
40   // Return the order of a minimization algorithm: 0 indicates only
41   // the cost function is required, 1 indicates the first derivative
42   // is required, 2 indicates the second derivative is required, while
43   // -1 indicates that the algorithm is not recognized.
minimizer_algorithm_order(MinimizerAlgorithm algo)44   inline int minimizer_algorithm_order(MinimizerAlgorithm algo) {
45     switch (algo) {
46     case MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS:
47       return 1;
48       break;
49     case MINIMIZER_ALGORITHM_LEVENBERG:
50     case MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT:
51       return 2;
52       break;
53     default:
54       return -1;
55     }
56   }
57 
58   // Convenience function for initializing vectors representing the
59   // lower and upper bounds on state variables
minimizer_initialize_bounds(int nx,adept::Vector & x_lower,adept::Vector & x_upper)60   inline void minimizer_initialize_bounds(int nx, adept::Vector& x_lower,
61 					  adept::Vector& x_upper) {
62     x_lower.resize(nx);
63     x_upper.resize(nx);
64     x_lower = -std::numeric_limits<Real>::max();
65     x_upper =  std::numeric_limits<Real>::max();
66   }
67 
68   // A class that can minimize a function using various algorithms
69   class Minimizer {
70 
71   public:
72 
73     // Tedious C++98 initializations
Minimizer(MinimizerAlgorithm algo)74     Minimizer(MinimizerAlgorithm algo)
75       : algorithm_(algo),
76 	max_iterations_(100), // <=0 means no limit
77 	max_step_size_(-1.0),
78 	converged_gradient_norm_(0.1),
79 	ensure_updated_state_(-1),
80 	levenberg_damping_min_(1.0/128.0),
81 	levenberg_damping_max_(100000.0),
82 	levenberg_damping_multiplier_(2.0),
83 	levenberg_damping_divider_(5.0),
84 	levenberg_damping_start_(0.0),
85 	levenberg_damping_restart_(1.0/4.0) { }
86 
Minimizer(const std::string & algo)87     Minimizer(const std::string& algo)
88       : max_iterations_(100), // <=0 means no limit
89 	max_step_size_(-1.0),
90 	converged_gradient_norm_(0.1),
91 	ensure_updated_state_(-1),
92 	levenberg_damping_min_(1.0/128.0),
93 	levenberg_damping_max_(100000.0),
94 	levenberg_damping_multiplier_(2.0),
95 	levenberg_damping_divider_(5.0),
96 	levenberg_damping_start_(0.0),
97 	levenberg_damping_restart_(1.0/4.0)
98     { set_algorithm(algo); }
99 
100     // Unconstrained minimization
101     MinimizerStatus minimize(Optimizable& optimizable, Vector x);
102     // Constrained minimization
103     MinimizerStatus minimize(Optimizable& optimizable, Vector x,
104 			     const Vector& x_lower, const Vector& x_upper);
105 
106     // Functions to set parameters defining the general behaviour of
107     // minimization algorithms
set_algorithm(MinimizerAlgorithm algo)108     void set_algorithm(MinimizerAlgorithm algo) { algorithm_ = algo; }
109     void set_algorithm(const std::string& algo);
set_max_iterations(int mi)110     void set_max_iterations(int mi)             { max_iterations_ = mi; }
set_converged_gradient_norm(Real cgn)111     void set_converged_gradient_norm(Real cgn)  { converged_gradient_norm_ = cgn; }
set_max_step_size(Real mss)112     void set_max_step_size(Real mss)            { max_step_size_ = mss; }
113     // Ensure that the last call to compute the cost function uses the
114     // "solution" state vector returned by minimize. This ensures that
115     // any variables in user classes that inherit from Optimizable are
116     // up to date with the returned state vector. The "order" argument
117     // indicates which the order of derivatives required (provided
118     // they are supported by the minimizing algorithm):
119     // 0=cost_function, 1=cost_function_gradient,
120     // 2=cost_function_gradient_hessian.
121     void ensure_updated_state(int order = 2)    { ensure_updated_state_ = order; }
122 
123     // Return parameters defining behaviour of minimization algorithms
algorithm()124     MinimizerAlgorithm algorithm() { return algorithm_; }
125     std::string algorithm_name();
max_iterations()126     int max_iterations() { return max_iterations_; }
converged_gradient_norm()127     Real converged_gradient_norm() { return converged_gradient_norm_; }
128 
129     // Functions to set parameters defining the behaviour of the
130     // Levenberg and Levenberg-Marquardt algorithm
131     void set_levenberg_damping_limits(Real damp_min, Real damp_max);
132     void set_levenberg_damping_start(Real damp_start);
133     void set_levenberg_damping_restart(Real damp_restart);
134     void set_levenberg_damping_multiplier(Real damp_multiply, Real damp_divide);
135 
136     // Query aspects of the algorithm progress after it has completed
n_iterations()137     int  n_iterations()        const { return n_iterations_; }
n_samples()138     int  n_samples()           const { return n_samples_; }
cost_function()139     Real cost_function()       const { return cost_function_; }
gradient_norm()140     Real gradient_norm()       const { return gradient_norm_; }
start_cost_function()141     Real start_cost_function() const { return start_cost_function_; }
status()142     MinimizerStatus status()   const { return status_; }
143 
144   protected:
145 
146     // Specific minimization algorithms
147 
148     MinimizerStatus
149     minimize_limited_memory_bfgs(Optimizable& optimizable, Vector x);
150 
151     // Call the Levenberg-Marquardt algorithm; if use_additive_damping
152     // is true then the Levenberg algorithm is used instead
153     MinimizerStatus
154     minimize_levenberg_marquardt(Optimizable& optimizable, Vector x,
155 				 bool use_additive_damping = false);
156     MinimizerStatus
157     minimize_levenberg_marquardt_bounded(Optimizable& optimizable, Vector x,
158 					 const Vector& min_x,
159 					 const Vector& max_x,
160 					 bool use_additive_damping = false);
161 
162     // DATA
163 
164     // Minimizer type
165     MinimizerAlgorithm algorithm_;
166 
167     // Variables controling the general behaviour of the minimizer,
168     // used by all gradient-based algorithms
169     int max_iterations_; // <=0 means no limit
170     Real max_step_size_;
171     Real converged_gradient_norm_;
172     int ensure_updated_state_;
173 
174     // Variables controling the specific behaviour of the
175     // Levenberg-Marquardt minimizer
176     Real levenberg_damping_min_;
177     Real levenberg_damping_max_;
178     Real levenberg_damping_multiplier_;
179     Real levenberg_damping_divider_;
180     Real levenberg_damping_start_;
181     Real levenberg_damping_restart_;
182 
183     // Variables set during the running of an algorithm and available
184     // to the user afterwards
185 
186     // Number of iterations that successfully reduced the cost function
187     int n_iterations_;
188 
189     // Number of calculations of the cost function
190     int n_samples_;
191 
192     Real start_cost_function_;
193     Real cost_function_;
194     Real gradient_norm_;
195     MinimizerStatus status_;
196   };
197 
198   // Implement inline member functions
199 
200   // Functions to set parameters defining the behaviour of the
201   // Levenberg and Levenberg-Marquardt algorithm
202   inline void
set_levenberg_damping_limits(Real damp_min,Real damp_max)203   Minimizer::set_levenberg_damping_limits(Real damp_min, Real damp_max) {
204     if (damp_min <= 0.0) {
205       throw optimization_exception("Minimum damping factor in Levenberg-Marquardt algorithm must be positive");
206     }
207     else if (damp_max <= damp_min) {
208       throw optimization_exception("Maximum damping factor must be greater than minimum in Levenberg-Marquardt algorithm");
209     }
210     levenberg_damping_min_ = damp_min;
211     levenberg_damping_max_ = damp_max;
212   }
213   inline void
set_levenberg_damping_start(Real damp_start)214   Minimizer::set_levenberg_damping_start(Real damp_start) {
215     if (damp_start < 0.0) {
216       throw optimization_exception("Start damping factor in Levenberg-Marquardt algorithm must be positive or zero");
217     }
218     levenberg_damping_start_ = damp_start;
219   }
220   inline void
set_levenberg_damping_restart(Real damp_restart)221   Minimizer::set_levenberg_damping_restart(Real damp_restart) {
222     if (damp_restart <= 0.0) {
223       throw optimization_exception("Restart damping factor in Levenberg-Marquardt algorithm must be positive");
224     }
225     levenberg_damping_restart_ = damp_restart;
226   }
227   inline void
set_levenberg_damping_multiplier(Real damp_multiply,Real damp_divide)228   Minimizer::set_levenberg_damping_multiplier(Real damp_multiply,
229 					      Real damp_divide) {
230     if (damp_multiply <= 1.0 || damp_divide <= 1.0) {
231       throw optimization_exception("Damping multipliers in Levenberg-Marquardt algorithm must be greater than one");
232     }
233     levenberg_damping_multiplier_ = damp_multiply;
234     levenberg_damping_divider_    = damp_divide;
235   }
236 
237 };
238 
239 #endif
240