1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2004-2020 Yves Renard
5 
6  This file is a part of GetFEM
7 
8  GetFEM  is  free software;  you  can  redistribute  it  and/or modify it
9  under  the  terms  of the  GNU  Lesser General Public License as published
10  by  the  Free Software Foundation;  either version 3 of the License,  or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program  is  distributed  in  the  hope  that it will be useful,  but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You  should  have received a copy of the GNU Lesser General Public License
18  along  with  this program;  if not, write to the Free Software Foundation,
19  Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
20 
21  As a special exception, you  may use  this file  as it is a part of a free
22  software  library  without  restriction.  Specifically,  if   other  files
23  instantiate  templates  or  use macros or inline functions from this file,
24  or  you compile this  file  and  link  it  with other files  to produce an
25  executable, this file  does  not  by itself cause the resulting executable
26  to be covered  by the GNU Lesser General Public License.  This   exception
27  does not  however  invalidate  any  other  reasons why the executable file
28  might be covered by the GNU Lesser General Public License.
29 
30 ===========================================================================*/
31 
32 /**@file gmm_solver_bfgs.h
33    @author  Yves Renard <Yves.Renard@insa-lyon.fr>
34    @date October 14 2004.
35    @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm.
36  */
37 #ifndef GMM_BFGS_H
38 #define GMM_BFGS_H
39 
40 #include "gmm_kernel.h"
41 #include "gmm_iter.h"
42 
43 namespace gmm {
44 
45   // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno)
46   // Quasi Newton method for optimization problems.
47   // with Wolfe Line search.
48 
49 
50   // delta[k] = x[k+1] - x[k]
51   // gamma[k] = grad f(x[k+1]) - grad f(x[k])
52   // H[0] = I
53   // BFGS : zeta[k] = delta[k] - H[k] gamma[k]
54   // DFP  : zeta[k] = H[k] gamma[k]
55   // tau[k] = gamma[k]^T zeta[k]
56   // rho[k] = 1 / gamma[k]^T delta[k]
57   // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T)
58   //                 - rho[k]^2 tau[k] delta[k] delta[k]^T
59   // DFP  : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T
60   //                 - (1/tau[k])zeta[k] zeta[k]^T
61 
62   // Object representing the inverse of the Hessian
63   template <typename VECTOR> struct bfgs_invhessian {
64 
65     typedef typename linalg_traits<VECTOR>::value_type T;
66     typedef typename number_traits<T>::magnitude_type R;
67 
68     std::vector<VECTOR> delta, gamma, zeta;
69     std::vector<T> tau, rho;
70     int version;
71 
hmultbfgs_invhessian72     template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) {
73       copy(X, Y);
74       for (size_type k = 0 ; k < delta.size(); ++k) {
75 	T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]);
76 	switch (version) {
77 	case 0 : // BFGS
78 	  add(scaled(zeta[k], rho[k]*xdelta), Y);
79 	  add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
80 	  break;
81 	case 1 : // DFP
82 	  add(scaled(delta[k], rho[k]*xdelta), Y);
83 	  add(scaled(zeta[k], -xzeta/tau[k]), Y);
84 	  break;
85 	}
86       }
87     }
88 
restartbfgs_invhessian89     void restart(void) {
90       delta.resize(0); gamma.resize(0); zeta.resize(0);
91       tau.resize(0); rho.resize(0);
92     }
93 
94     template<typename VECT1, typename VECT2>
updatebfgs_invhessian95     void update(const VECT1 &deltak, const VECT2 &gammak) {
96       T vsp = vect_sp(deltak, gammak);
97       if (vsp == T(0)) return;
98       size_type N = vect_size(deltak), k = delta.size();
99       VECTOR Y(N);
100       hmult(gammak, Y);
101       delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
102       tau.resize(k+1); rho.resize(k+1);
103       resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N);
104       gmm::copy(deltak, delta[k]);
105       gmm::copy(gammak, gamma[k]);
106       rho[k] = R(1) / vsp;
107       if (version == 0)
108 	add(delta[k], scaled(Y, -1), zeta[k]);
109       else
110 	gmm::copy(Y, zeta[k]);
111       tau[k] = vect_sp(gammak,  zeta[k]);
112     }
113 
114     bfgs_invhessian(int v = 0) { version = v; }
115   };
116 
117 
118   template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
119   void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
120 	    int restart, iteration& iter, int version = 0,
121 	    double lambda_init=0.001, double print_norm=1.0) {
122 
123     typedef typename linalg_traits<VECTOR>::value_type T;
124     typedef typename number_traits<T>::magnitude_type R;
125 
126     bfgs_invhessian<VECTOR> invhessian(version);
127     VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
128     grad(x, r);
129     R lambda = lambda_init, valx = f(x), valy;
130     int nb_restart(0);
131 
132     if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " ";
133     while (! iter.finished_vect(r)) {
134 
135       invhessian.hmult(r, d); gmm::scale(d, T(-1));
136 
137       // Wolfe Line search
138       R derivative = gmm::vect_sp(r, d);
139       R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
140       bool unbounded = true, blocked = false, grad_computed = false;
141 
142       for(;;) {
143 	add(x, scaled(d, lambda), y);
144 	valy = f(y);
145 	if (iter.get_noisy() >= 2) {
146 	  cout.precision(15);
147 	  cout << "Wolfe line search, lambda = " << lambda
148  	       << " value = " << valy /print_norm << endl;
149 // 	       << " derivative = " << derivative
150 // 	       << " lambda min = " << lambda_min << " lambda max = "
151 // 	       << lambda_max << endl; getchar();
152 	}
153 	if (valy <= valx + m1 * lambda * derivative) {
154 	  grad(y, r2); grad_computed = true;
155 	  T derivative2 = gmm::vect_sp(r2, d);
156 	  if (derivative2 >= m2*derivative) break;
157 	  lambda_min = lambda;
158 	}
159 	else {
160 	  lambda_max = lambda;
161 	  unbounded = false;
162 	}
163 	if (unbounded) lambda *= R(10);
164 	else  lambda = (lambda_max + lambda_min) / R(2);
165 	if (lambda == lambda_max || lambda == lambda_min) break;
166 	// valy <= R(2)*valx replaced by
167 	// valy <= valx + gmm::abs(derivative)*lambda_init
168 	// for compatibility with negative values (08.24.07).
169 	if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
170 	    (lambda < R(lambda_init*1E-8) ||
171 	     (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
172 	{ blocked = true; lambda = lambda_init; break; }
173       }
174 
175       // Rank two update
176       ++iter;
177       if (!grad_computed) grad(y, r2);
178       gmm::add(scaled(r2, -1), r);
179       if ((iter.get_iteration() % restart) == 0 || blocked) {
180 	if (iter.get_noisy() >= 1) cout << "Restart\n";
181 	invhessian.restart();
182 	if (++nb_restart > 10) {
183 	  if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n";
184 	  return;
185 	}
186       }
187       else {
188 	invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
189 	nb_restart = 0;
190       }
191       copy(r2, r); copy(y, x); valx = valy;
192       if (iter.get_noisy() >= 1)
193 	cout << "BFGS value " << valx/print_norm << "\t";
194     }
195 
196   }
197 
198 
199   template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
200   inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
201 	    int restart, iteration& iter, int version = 1) {
202     bfgs(f, grad, x, restart, iter, version);
203 
204   }
205 
206 
207 }
208 
209 #endif
210 
211