1 /*
2   This file is part of MADNESS.
3 
4   Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20   For more information please contact:
21 
22   Robert J. Harrison
23   Oak Ridge National Laboratory
24   One Bethel Valley Road
25   P.O. Box 2008, MS-6367
26 
27   email: harrisonrj@ornl.gov
28   tel:   865-241-3937
29   fax:   865-572-0680
30 
31   $Id$
32 */
33 #ifndef MADNESS_LINALG_SOLVERS_H__INCLUDED
34 #define MADNESS_LINALG_SOLVERS_H__INCLUDED
35 
36 #include <madness/tensor/tensor.h>
37 #include <madness/world/print.h>
38 #include <iostream>
39 #include <madness/tensor/tensor_lapack.h>
40 
41 /*!
42   \file solvers.h
43   \brief Defines interfaces for optimization and non-linear equation solvers
44   \ingroup solvers
45 */
46 
47 namespace madness {
48 
49     /*!
50       \brief Solves non-linear equation using KAIN (returns coefficients to compute next vector)
51 
52       \ingroup solvers
53 
54       The Krylov-accelerated inexact-Newton method employs directional
55       derivatives to estimate the Jacobian in the subspace and
56       separately computes updates in the subspace and its complement.
57 
58       We wish to solve the non-linear equations \f$ f(x)=0 \f$ where
59       \f$ f \f$ and \f$ x \f$ are vectors of the same dimension (e.g.,
60       consider both being MADNESS functions).
61 
62       Define the following matrices and vector (with \f$ i \f$ and \f$
63       j \f$ denoting previous iterations in the Krylov subspace and
64       \f$ m \f$ the current iteration):
65       \f{eqnarray*}{
66       Q_{i j} & = & \langle x_i \mid f_j \rangle \\
67       A_{i j} & = & \langle x_i - x_m \mid f_j - f_m \rangle = Q_{i j} - Q_{m j} - Q_{i m} + Q{m m} \\
68       b_i & =& -\langle x_i - x_m \mid f_m \rangle = -Q_{i m} + Q_{m m}
69       \f}
70       The subspace equation is of dimension \f$ m \f$ (assuming iterations
71       are indexed from zero) and is given by
72       \f[
73       A c = b
74       \f]
75       The interior and exterior updates may be combined into one simple expression
76       as follows. First, define an additional element of the solution vector
77       \f[
78       c_m = 1 - \sum_{i<m} c_i
79       \f]
80       and then the new vector (guess for next iteration) is given by
81       \f[
82       x_{m+1} = \sum_{i \le m}{c_i ( x_i - f_i)}
83       \f]
84 
85       To employ the solver, each iteration
86       -# Compute the additional row and column of the matrix \f$ Q \f$
87       that is the inner product between solution vectors (\f$ x_i \f$) and residuals
88       (\f$ f_j \f$).
89       -# Call this routine to compute the coefficients \f$ c \f$ and from these
90       compute the next solution vector
91       -# Employ step restriction or line search as necessary to ensure stable/robust solution.
92 
93       @param[in] Q The matrix of inner products between subspace vectors and residuals.
94       @param[in] rcond Threshold for discarding small singular values in the subspace equations.
95       @return Vector for computing next solution vector
96     */
97     template <typename T>
98     Tensor<T> KAIN(const Tensor<T>& Q, double rcond=1e-12) {
99         const int nvec = Q.dim(0);
100         const int m = nvec-1;
101 
102         if (nvec == 1) {
103             Tensor<T> c(1);
104             c(0L) = 1.0;
105             return c;
106         }
107 
108         Tensor<T> A(m,m);
109         Tensor<T> b(m);
110         for (long i=0; i<m; ++i) {
111             b(i) = Q(m,m) - Q(i,m);
112             for (long j=0; j<m; ++j) {
113                 A(i,j) = Q(i,j) - Q(m,j) - Q(i,m) + Q(m,m);
114             }
115         }
116 
117     //     print("Q");
118     //     print(Q);
119     //     print("A");
120     //     print(A);
121     //     print("b");
122     //     print(b);
123 
124         Tensor<T> x;
125         Tensor<double> s, sumsq;
126         long rank;
127         gelss(A, b, rcond, x, s, rank, sumsq);
128 //         print("singular values", s);
129 //         print("rank", rank);
130 //         print("solution", x);
131 
132         Tensor<T> c(nvec);
133         T sumC = 0.0;
134         for (long i=0; i<m; ++i) sumC += x(i);
135         c(Slice(0,m-1)) = x;
136 //         print("SUMC", nvec, m, sumC);
137         c(m) = 1.0 - sumC;
138 
139 //         print("returned C", c);
140 
141         return c;
142     }
143 
144 
145     /// The interface to be provided by targets for non-linear equation solver
146 
147     /// \ingroup solvers
148     struct SolverTargetInterface {
149         /// Should return the resdiual (vector F(x))
150         virtual Tensor<double> residual(const Tensor<double>& x) = 0;
151 
152         /// Override this to return \c true if the Jacobian is implemented
provides_jacobianSolverTargetInterface153         virtual bool provides_jacobian() const {return false;}
154 
155         /// Some solvers require the jacobian or are faster if an analytic form is available
156 
157         /// J(i,j) = partial F[i] over partial x[j] where F(x) is the vector valued residual
jacobianSolverTargetInterface158         virtual Tensor<double> jacobian(const Tensor<double>& x) {
159             throw "not implemented";
160         }
161 
162         /// Implement this if advantageous to compute residual and jacobian simultaneously
residual_and_jacobianSolverTargetInterface163         virtual void residual_and_jacobian(const Tensor<double>& x,
164                                            Tensor<double>& residual, Tensor<double>& jacobian) {
165             residual = this->residual(x);
166             jacobian = this->jacobian(x);
167         }
168 
~SolverTargetInterfaceSolverTargetInterface169         virtual ~SolverTargetInterface() {}
170     };
171 
172 
173     /// The interface to be provided by functions to be optimized
174 
175     /// \ingroup solvers
176     struct OptimizationTargetInterface {
177         /// Should return the value of the objective function
178         virtual double value(const Tensor<double>& x) = 0;
179 
180         /// Override this to return true if the derivative is implemented
provides_gradientOptimizationTargetInterface181         virtual bool provides_gradient() const {return false;}
182 
183         /// Should return the derivative of the function
gradientOptimizationTargetInterface184         virtual Tensor<double> gradient(const Tensor<double>& x) {
185             throw "not implemented";
186         }
187 
188         /// Reimplement if more efficient to evaluate both value and gradient in one call
value_and_gradientOptimizationTargetInterface189         virtual void value_and_gradient(const Tensor<double>& x,
190                                         double& value,
191                                         Tensor<double>& gradient) {
192             value = this->value(x);
193             gradient = this->gradient(x);
194         }
195 
196         /// Numerical test of the derivative ... optionally prints to stdout, returns max abs error
197         double test_gradient(Tensor<double>& x, double value_precision, bool doprint=true);
198 
~OptimizationTargetInterfaceOptimizationTargetInterface199 	virtual ~OptimizationTargetInterface(){}
200     };
201 
202 
203     /// The interface to be provided by optimizers
204 
205     /// \ingroup solvers
206     struct OptimizerInterface {
207         virtual bool optimize(Tensor<double>& x) = 0;
208         virtual bool converged() const = 0;
209         virtual double value() const = 0;
210         virtual double gradient_norm() const = 0;
~OptimizerInterfaceOptimizerInterface211 	virtual ~OptimizerInterface(){}
212     };
213 
214 
215     /// Unconstrained minimization via steepest descent
216 
217     /// \ingroup solvers
218     class SteepestDescent : public OptimizerInterface {
219         std::shared_ptr<OptimizationTargetInterface> target;
220         const double tol;
221         double f;
222         double gnorm;
223 
224     public:
225         SteepestDescent(const std::shared_ptr<OptimizationTargetInterface>& tar,
226                         double tol = 1e-6,
227                         double value_precision = 1e-12,
228                         double gradient_precision = 1e-12);
229 
230         bool optimize(Tensor<double>& x);
231 
232         bool converged() const;
233 
234         double gradient_norm() const;
235 
236         double value() const;
237 
~SteepestDescent()238         virtual ~SteepestDescent() { }
239     };
240 
241 
242     /// Optimization via quasi-Newton (BFGS or SR1 update)
243 
244     /// \ingroup solvers
245     /// This is presently not a low memory algorithm ... we really need one!
246     class QuasiNewton : public OptimizerInterface {
247     protected:
248         std::string update;              // One of BFGS or SR1
249         std::shared_ptr<OptimizationTargetInterface> target;
250         const int maxiter;
251         const double tol;
252         const double value_precision;  // Numerical precision of value
253         const double gradient_precision; // Numerical precision of each element of residual
254         double f;
255         double gnorm;
256         Tensor<double> h;
257         int n;
258         bool printtest;
259 
260     public:
261 
262         /// make this static for other QN classed to have access to it
263         static double line_search(double a1, double f0, double dxgrad,
264                 const Tensor<double>& x, const Tensor<double>& dx,
265                 std::shared_ptr<OptimizationTargetInterface> target,
266                 double value_precision);
267 
268         /// make this static for other QN classed to have access to it
269         static void hessian_update_sr1(const Tensor<double>& s, const Tensor<double>& y,
270                 Tensor<double>& hessian);
271 
272         /// make this static for other QN classed to have access to it
273         static void hessian_update_bfgs(const Tensor<double>& dx,
274                      const Tensor<double>& dg, Tensor<double>& hessian);
275 
276         Tensor<double> new_search_direction(const Tensor<double>& g) const;
277 
278     public:
279         QuasiNewton(const std::shared_ptr<OptimizationTargetInterface>& tar,
280                     int maxiter = 20,
281                     double tol = 1e-6,
282                     double value_precision = 1e-12,
283                     double gradient_precision = 1e-12);
284 
285         /// Choose update method (currently only "BFGS" or "SR1")
286         void set_update(const std::string& method);
287 
288         /// Choose update method (currently only "BFGS" or "SR1")
289         void set_test(const bool& test_level);
290 
291         /// Runs the optimizer
292 
293         /// @return True if converged
294         bool optimize(Tensor<double>& x);
295 
296         /// After running the optimizer returns true if converged
297 
298         /// @return True if converged
299         bool converged() const;
300 
301         /// Value of objective function
302 
303         /// @return Value of objective function
304         double value() const;
305 
306         /// Resets Hessian to default guess
reset_hessian()307         void reset_hessian() {h = Tensor<double>();}
308 
309         /// Sets Hessian to given matrix
set_hessian(const Tensor<double> & matrix)310         void set_hessian(const Tensor<double>& matrix) {h = madness::copy(matrix);}
311 
312         /// Value of gradient norm
313 
314         /// @return Norm of gradient of objective function
315         double gradient_norm() const;
316 
~QuasiNewton()317 	virtual ~QuasiNewton() {}
318     };
319 }
320 
321 #endif // MADNESS_LINALG_SOLVERS_H__INCLUDED
322