1 /* 2 This file is part of MADNESS. 3 4 Copyright (C) 2007,2010 Oak Ridge National Laboratory 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 20 For more information please contact: 21 22 Robert J. Harrison 23 Oak Ridge National Laboratory 24 One Bethel Valley Road 25 P.O. Box 2008, MS-6367 26 27 email: harrisonrj@ornl.gov 28 tel: 865-241-3937 29 fax: 865-572-0680 30 31 $Id$ 32 */ 33 #ifndef MADNESS_LINALG_SOLVERS_H__INCLUDED 34 #define MADNESS_LINALG_SOLVERS_H__INCLUDED 35 36 #include <madness/tensor/tensor.h> 37 #include <madness/world/print.h> 38 #include <iostream> 39 #include <madness/tensor/tensor_lapack.h> 40 41 /*! 42 \file solvers.h 43 \brief Defines interfaces for optimization and non-linear equation solvers 44 \ingroup solvers 45 */ 46 47 namespace madness { 48 49 /*! 50 \brief Solves non-linear equation using KAIN (returns coefficients to compute next vector) 51 52 \ingroup solvers 53 54 The Krylov-accelerated inexact-Newton method employs directional 55 derivatives to estimate the Jacobian in the subspace and 56 separately computes updates in the subspace and its complement. 57 58 We wish to solve the non-linear equations \f$ f(x)=0 \f$ where 59 \f$ f \f$ and \f$ x \f$ are vectors of the same dimension (e.g., 60 consider both being MADNESS functions). 61 62 Define the following matrices and vector (with \f$ i \f$ and \f$ 63 j \f$ denoting previous iterations in the Krylov subspace and 64 \f$ m \f$ the current iteration): 65 \f{eqnarray*}{ 66 Q_{i j} & = & \langle x_i \mid f_j \rangle \\ 67 A_{i j} & = & \langle x_i - x_m \mid f_j - f_m \rangle = Q_{i j} - Q_{m j} - Q_{i m} + Q{m m} \\ 68 b_i & =& -\langle x_i - x_m \mid f_m \rangle = -Q_{i m} + Q_{m m} 69 \f} 70 The subspace equation is of dimension \f$ m \f$ (assuming iterations 71 are indexed from zero) and is given by 72 \f[ 73 A c = b 74 \f] 75 The interior and exterior updates may be combined into one simple expression 76 as follows. First, define an additional element of the solution vector 77 \f[ 78 c_m = 1 - \sum_{i<m} c_i 79 \f] 80 and then the new vector (guess for next iteration) is given by 81 \f[ 82 x_{m+1} = \sum_{i \le m}{c_i ( x_i - f_i)} 83 \f] 84 85 To employ the solver, each iteration 86 -# Compute the additional row and column of the matrix \f$ Q \f$ 87 that is the inner product between solution vectors (\f$ x_i \f$) and residuals 88 (\f$ f_j \f$). 89 -# Call this routine to compute the coefficients \f$ c \f$ and from these 90 compute the next solution vector 91 -# Employ step restriction or line search as necessary to ensure stable/robust solution. 92 93 @param[in] Q The matrix of inner products between subspace vectors and residuals. 94 @param[in] rcond Threshold for discarding small singular values in the subspace equations. 95 @return Vector for computing next solution vector 96 */ 97 template <typename T> 98 Tensor<T> KAIN(const Tensor<T>& Q, double rcond=1e-12) { 99 const int nvec = Q.dim(0); 100 const int m = nvec-1; 101 102 if (nvec == 1) { 103 Tensor<T> c(1); 104 c(0L) = 1.0; 105 return c; 106 } 107 108 Tensor<T> A(m,m); 109 Tensor<T> b(m); 110 for (long i=0; i<m; ++i) { 111 b(i) = Q(m,m) - Q(i,m); 112 for (long j=0; j<m; ++j) { 113 A(i,j) = Q(i,j) - Q(m,j) - Q(i,m) + Q(m,m); 114 } 115 } 116 117 // print("Q"); 118 // print(Q); 119 // print("A"); 120 // print(A); 121 // print("b"); 122 // print(b); 123 124 Tensor<T> x; 125 Tensor<double> s, sumsq; 126 long rank; 127 gelss(A, b, rcond, x, s, rank, sumsq); 128 // print("singular values", s); 129 // print("rank", rank); 130 // print("solution", x); 131 132 Tensor<T> c(nvec); 133 T sumC = 0.0; 134 for (long i=0; i<m; ++i) sumC += x(i); 135 c(Slice(0,m-1)) = x; 136 // print("SUMC", nvec, m, sumC); 137 c(m) = 1.0 - sumC; 138 139 // print("returned C", c); 140 141 return c; 142 } 143 144 145 /// The interface to be provided by targets for non-linear equation solver 146 147 /// \ingroup solvers 148 struct SolverTargetInterface { 149 /// Should return the resdiual (vector F(x)) 150 virtual Tensor<double> residual(const Tensor<double>& x) = 0; 151 152 /// Override this to return \c true if the Jacobian is implemented provides_jacobianSolverTargetInterface153 virtual bool provides_jacobian() const {return false;} 154 155 /// Some solvers require the jacobian or are faster if an analytic form is available 156 157 /// J(i,j) = partial F[i] over partial x[j] where F(x) is the vector valued residual jacobianSolverTargetInterface158 virtual Tensor<double> jacobian(const Tensor<double>& x) { 159 throw "not implemented"; 160 } 161 162 /// Implement this if advantageous to compute residual and jacobian simultaneously residual_and_jacobianSolverTargetInterface163 virtual void residual_and_jacobian(const Tensor<double>& x, 164 Tensor<double>& residual, Tensor<double>& jacobian) { 165 residual = this->residual(x); 166 jacobian = this->jacobian(x); 167 } 168 ~SolverTargetInterfaceSolverTargetInterface169 virtual ~SolverTargetInterface() {} 170 }; 171 172 173 /// The interface to be provided by functions to be optimized 174 175 /// \ingroup solvers 176 struct OptimizationTargetInterface { 177 /// Should return the value of the objective function 178 virtual double value(const Tensor<double>& x) = 0; 179 180 /// Override this to return true if the derivative is implemented provides_gradientOptimizationTargetInterface181 virtual bool provides_gradient() const {return false;} 182 183 /// Should return the derivative of the function gradientOptimizationTargetInterface184 virtual Tensor<double> gradient(const Tensor<double>& x) { 185 throw "not implemented"; 186 } 187 188 /// Reimplement if more efficient to evaluate both value and gradient in one call value_and_gradientOptimizationTargetInterface189 virtual void value_and_gradient(const Tensor<double>& x, 190 double& value, 191 Tensor<double>& gradient) { 192 value = this->value(x); 193 gradient = this->gradient(x); 194 } 195 196 /// Numerical test of the derivative ... optionally prints to stdout, returns max abs error 197 double test_gradient(Tensor<double>& x, double value_precision, bool doprint=true); 198 ~OptimizationTargetInterfaceOptimizationTargetInterface199 virtual ~OptimizationTargetInterface(){} 200 }; 201 202 203 /// The interface to be provided by optimizers 204 205 /// \ingroup solvers 206 struct OptimizerInterface { 207 virtual bool optimize(Tensor<double>& x) = 0; 208 virtual bool converged() const = 0; 209 virtual double value() const = 0; 210 virtual double gradient_norm() const = 0; ~OptimizerInterfaceOptimizerInterface211 virtual ~OptimizerInterface(){} 212 }; 213 214 215 /// Unconstrained minimization via steepest descent 216 217 /// \ingroup solvers 218 class SteepestDescent : public OptimizerInterface { 219 std::shared_ptr<OptimizationTargetInterface> target; 220 const double tol; 221 double f; 222 double gnorm; 223 224 public: 225 SteepestDescent(const std::shared_ptr<OptimizationTargetInterface>& tar, 226 double tol = 1e-6, 227 double value_precision = 1e-12, 228 double gradient_precision = 1e-12); 229 230 bool optimize(Tensor<double>& x); 231 232 bool converged() const; 233 234 double gradient_norm() const; 235 236 double value() const; 237 ~SteepestDescent()238 virtual ~SteepestDescent() { } 239 }; 240 241 242 /// Optimization via quasi-Newton (BFGS or SR1 update) 243 244 /// \ingroup solvers 245 /// This is presently not a low memory algorithm ... we really need one! 246 class QuasiNewton : public OptimizerInterface { 247 protected: 248 std::string update; // One of BFGS or SR1 249 std::shared_ptr<OptimizationTargetInterface> target; 250 const int maxiter; 251 const double tol; 252 const double value_precision; // Numerical precision of value 253 const double gradient_precision; // Numerical precision of each element of residual 254 double f; 255 double gnorm; 256 Tensor<double> h; 257 int n; 258 bool printtest; 259 260 public: 261 262 /// make this static for other QN classed to have access to it 263 static double line_search(double a1, double f0, double dxgrad, 264 const Tensor<double>& x, const Tensor<double>& dx, 265 std::shared_ptr<OptimizationTargetInterface> target, 266 double value_precision); 267 268 /// make this static for other QN classed to have access to it 269 static void hessian_update_sr1(const Tensor<double>& s, const Tensor<double>& y, 270 Tensor<double>& hessian); 271 272 /// make this static for other QN classed to have access to it 273 static void hessian_update_bfgs(const Tensor<double>& dx, 274 const Tensor<double>& dg, Tensor<double>& hessian); 275 276 Tensor<double> new_search_direction(const Tensor<double>& g) const; 277 278 public: 279 QuasiNewton(const std::shared_ptr<OptimizationTargetInterface>& tar, 280 int maxiter = 20, 281 double tol = 1e-6, 282 double value_precision = 1e-12, 283 double gradient_precision = 1e-12); 284 285 /// Choose update method (currently only "BFGS" or "SR1") 286 void set_update(const std::string& method); 287 288 /// Choose update method (currently only "BFGS" or "SR1") 289 void set_test(const bool& test_level); 290 291 /// Runs the optimizer 292 293 /// @return True if converged 294 bool optimize(Tensor<double>& x); 295 296 /// After running the optimizer returns true if converged 297 298 /// @return True if converged 299 bool converged() const; 300 301 /// Value of objective function 302 303 /// @return Value of objective function 304 double value() const; 305 306 /// Resets Hessian to default guess reset_hessian()307 void reset_hessian() {h = Tensor<double>();} 308 309 /// Sets Hessian to given matrix set_hessian(const Tensor<double> & matrix)310 void set_hessian(const Tensor<double>& matrix) {h = madness::copy(matrix);} 311 312 /// Value of gradient norm 313 314 /// @return Norm of gradient of objective function 315 double gradient_norm() const; 316 ~QuasiNewton()317 virtual ~QuasiNewton() {} 318 }; 319 } 320 321 #endif // MADNESS_LINALG_SOLVERS_H__INCLUDED 322