1 /* -*- c++ -*- (enables emacs c++ mode) */ 2 /*=========================================================================== 3 4 Copyright (C) 2004-2020 Yves Renard 5 6 This file is a part of GetFEM 7 8 GetFEM is free software; you can redistribute it and/or modify it 9 under the terms of the GNU Lesser General Public License as published 10 by the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version along with the GCC Runtime Library 12 Exception either version 3.1 or (at your option) any later version. 13 This program is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License and GCC Runtime Library Exception for more details. 17 You should have received a copy of the GNU Lesser General Public License 18 along with this program; if not, write to the Free Software Foundation, 19 Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 20 21 As a special exception, you may use this file as it is a part of a free 22 software library without restriction. Specifically, if other files 23 instantiate templates or use macros or inline functions from this file, 24 or you compile this file and link it with other files to produce an 25 executable, this file does not by itself cause the resulting executable 26 to be covered by the GNU Lesser General Public License. This exception 27 does not however invalidate any other reasons why the executable file 28 might be covered by the GNU Lesser General Public License. 29 30 ===========================================================================*/ 31 32 /**@file gmm_solver_bfgs.h 33 @author Yves Renard <Yves.Renard@insa-lyon.fr> 34 @date October 14 2004. 35 @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm. 36 */ 37 #ifndef GMM_BFGS_H 38 #define GMM_BFGS_H 39 40 #include "gmm_kernel.h" 41 #include "gmm_iter.h" 42 43 namespace gmm { 44 45 // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno) 46 // Quasi Newton method for optimization problems. 47 // with Wolfe Line search. 48 49 50 // delta[k] = x[k+1] - x[k] 51 // gamma[k] = grad f(x[k+1]) - grad f(x[k]) 52 // H[0] = I 53 // BFGS : zeta[k] = delta[k] - H[k] gamma[k] 54 // DFP : zeta[k] = H[k] gamma[k] 55 // tau[k] = gamma[k]^T zeta[k] 56 // rho[k] = 1 / gamma[k]^T delta[k] 57 // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T) 58 // - rho[k]^2 tau[k] delta[k] delta[k]^T 59 // DFP : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T 60 // - (1/tau[k])zeta[k] zeta[k]^T 61 62 // Object representing the inverse of the Hessian 63 template <typename VECTOR> struct bfgs_invhessian { 64 65 typedef typename linalg_traits<VECTOR>::value_type T; 66 typedef typename number_traits<T>::magnitude_type R; 67 68 std::vector<VECTOR> delta, gamma, zeta; 69 std::vector<T> tau, rho; 70 int version; 71 hmultbfgs_invhessian72 template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) { 73 copy(X, Y); 74 for (size_type k = 0 ; k < delta.size(); ++k) { 75 T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]); 76 switch (version) { 77 case 0 : // BFGS 78 add(scaled(zeta[k], rho[k]*xdelta), Y); 79 add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y); 80 break; 81 case 1 : // DFP 82 add(scaled(delta[k], rho[k]*xdelta), Y); 83 add(scaled(zeta[k], -xzeta/tau[k]), Y); 84 break; 85 } 86 } 87 } 88 restartbfgs_invhessian89 void restart(void) { 90 delta.resize(0); gamma.resize(0); zeta.resize(0); 91 tau.resize(0); rho.resize(0); 92 } 93 94 template<typename VECT1, typename VECT2> updatebfgs_invhessian95 void update(const VECT1 &deltak, const VECT2 &gammak) { 96 T vsp = vect_sp(deltak, gammak); 97 if (vsp == T(0)) return; 98 size_type N = vect_size(deltak), k = delta.size(); 99 VECTOR Y(N); 100 hmult(gammak, Y); 101 delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1); 102 tau.resize(k+1); rho.resize(k+1); 103 resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N); 104 gmm::copy(deltak, delta[k]); 105 gmm::copy(gammak, gamma[k]); 106 rho[k] = R(1) / vsp; 107 if (version == 0) 108 add(delta[k], scaled(Y, -1), zeta[k]); 109 else 110 gmm::copy(Y, zeta[k]); 111 tau[k] = vect_sp(gammak, zeta[k]); 112 } 113 114 bfgs_invhessian(int v = 0) { version = v; } 115 }; 116 117 118 template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 119 void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x, 120 int restart, iteration& iter, int version = 0, 121 double lambda_init=0.001, double print_norm=1.0) { 122 123 typedef typename linalg_traits<VECTOR>::value_type T; 124 typedef typename number_traits<T>::magnitude_type R; 125 126 bfgs_invhessian<VECTOR> invhessian(version); 127 VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x)); 128 grad(x, r); 129 R lambda = lambda_init, valx = f(x), valy; 130 int nb_restart(0); 131 132 if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " "; 133 while (! iter.finished_vect(r)) { 134 135 invhessian.hmult(r, d); gmm::scale(d, T(-1)); 136 137 // Wolfe Line search 138 R derivative = gmm::vect_sp(r, d); 139 R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57; 140 bool unbounded = true, blocked = false, grad_computed = false; 141 142 for(;;) { 143 add(x, scaled(d, lambda), y); 144 valy = f(y); 145 if (iter.get_noisy() >= 2) { 146 cout.precision(15); 147 cout << "Wolfe line search, lambda = " << lambda 148 << " value = " << valy /print_norm << endl; 149 // << " derivative = " << derivative 150 // << " lambda min = " << lambda_min << " lambda max = " 151 // << lambda_max << endl; getchar(); 152 } 153 if (valy <= valx + m1 * lambda * derivative) { 154 grad(y, r2); grad_computed = true; 155 T derivative2 = gmm::vect_sp(r2, d); 156 if (derivative2 >= m2*derivative) break; 157 lambda_min = lambda; 158 } 159 else { 160 lambda_max = lambda; 161 unbounded = false; 162 } 163 if (unbounded) lambda *= R(10); 164 else lambda = (lambda_max + lambda_min) / R(2); 165 if (lambda == lambda_max || lambda == lambda_min) break; 166 // valy <= R(2)*valx replaced by 167 // valy <= valx + gmm::abs(derivative)*lambda_init 168 // for compatibility with negative values (08.24.07). 169 if (valy <= valx + R(2)*gmm::abs(derivative)*lambda && 170 (lambda < R(lambda_init*1E-8) || 171 (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8)))) 172 { blocked = true; lambda = lambda_init; break; } 173 } 174 175 // Rank two update 176 ++iter; 177 if (!grad_computed) grad(y, r2); 178 gmm::add(scaled(r2, -1), r); 179 if ((iter.get_iteration() % restart) == 0 || blocked) { 180 if (iter.get_noisy() >= 1) cout << "Restart\n"; 181 invhessian.restart(); 182 if (++nb_restart > 10) { 183 if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n"; 184 return; 185 } 186 } 187 else { 188 invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1)); 189 nb_restart = 0; 190 } 191 copy(r2, r); copy(y, x); valx = valy; 192 if (iter.get_noisy() >= 1) 193 cout << "BFGS value " << valx/print_norm << "\t"; 194 } 195 196 } 197 198 199 template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 200 inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x, 201 int restart, iteration& iter, int version = 1) { 202 bfgs(f, grad, x, restart, iter, version); 203 204 } 205 206 207 } 208 209 #endif 210 211