1 //
2 // gdiis.cc
3 //
4 // Copyright (C) 1996 Limit Point Systems, Inc.
5 //
6 // Author: Edward Seidl <seidl@janed.com>
7 // Maintainer: LPS
8 //
9 // This file is part of the SC Toolkit.
10 //
11 // The SC Toolkit is free software; you can redistribute it and/or modify
12 // it under the terms of the GNU Library General Public License as published by
13 // the Free Software Foundation; either version 2, or (at your option)
14 // any later version.
15 //
16 // The SC Toolkit is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU Library General Public License for more details.
20 //
21 // You should have received a copy of the GNU Library General Public License
22 // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
23 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24 //
25 // The U.S. Government is granted a limited license as per AL 91-7.
26 //
27 
28 #ifdef __GNUC__
29 #pragma implementation
30 #endif
31 
32 #include <math.h>
33 
34 #include <util/state/stateio.h>
35 #include <math/optimize/gdiis.h>
36 #include <util/keyval/keyval.h>
37 #include <math/scmat/local.h>
38 #include <util/misc/formio.h>
39 
40 using namespace std;
41 using namespace sc;
42 
43 /////////////////////////////////////////////////////////////////////////
44 // GDIISOpt
45 
46 static ClassDesc GDIISOpt_cd(
47   typeid(GDIISOpt),"GDIISOpt",1,"public Optimize",
48   0, create<GDIISOpt>, create<GDIISOpt>);
49 
GDIISOpt(const Ref<KeyVal> & keyval)50 GDIISOpt::GDIISOpt(const Ref<KeyVal>&keyval):
51   Optimize(keyval),
52   diis_iter(0),
53   maxabs_gradient(-1.0)
54 {
55   nsave = keyval->intvalue("ngdiis");
56   if (keyval->error() != KeyVal::OK) nsave = 5;
57 
58   update_ << keyval->describedclassvalue("update");
59   update_->set_inverse();
60 
61   convergence_ = keyval->doublevalue("convergence");
62   if (keyval->error() != KeyVal::OK) convergence_ = 1.0e-6;
63 
64   accuracy_ = keyval->doublevalue("accuracy");
65   if (keyval->error() != KeyVal::OK) accuracy_ = 0.0001;
66 
67   RefSymmSCMatrix hessian(dimension(),matrixkit());
68   // get a guess hessian from the function
69   function()->guess_hessian(hessian);
70 
71   // see if any hessian matrix elements have been given in the input
72   if (keyval->exists("hessian")) {
73     int n = hessian.n();
74     for (int i=0; i<n; i++) {
75       if (keyval->exists("hessian",i)) {
76         for (int j=0; j<=i; j++) {
77           double tmp = keyval->doublevalue("hessian",i,j);
78           if (keyval->error() == KeyVal::OK) hessian(i,j) = tmp;
79         }
80       }
81     }
82   }
83   ihessian_ = function()->inverse_hessian(hessian);
84 
85   coords_ = new RefSCVector[nsave];
86   grad_ = new RefSCVector[nsave];
87   error_ = new RefSCVector[nsave];
88 
89   for (int i=0; i < nsave; i++) {
90     coords_[i] = matrixkit()->vector(dimension()); coords_[i]->assign(0.0);
91     grad_[i] = matrixkit()->vector(dimension()); grad_[i]->assign(0.0);
92     error_[i] = matrixkit()->vector(dimension()); error_[i]->assign(0.0);
93   }
94 }
95 
GDIISOpt(StateIn & s)96 GDIISOpt::GDIISOpt(StateIn&s):
97   SavableState(s),
98   Optimize(s)
99 {
100   s.get(nsave);
101   s.get(diis_iter);
102   ihessian_ = matrixkit()->symmmatrix(dimension());
103   ihessian_.restore(s);
104   update_ << SavableState::restore_state(s);
105   s.get(convergence_);
106   s.get(accuracy_);
107   s.get(maxabs_gradient);
108   coords_ = new RefSCVector[nsave];
109   grad_ = new RefSCVector[nsave];
110   error_ = new RefSCVector[nsave];
111   for (int i=0; i < nsave; i++) {
112     coords_[i] = matrixkit()->vector(dimension());
113     grad_[i] = matrixkit()->vector(dimension());
114     error_[i] = matrixkit()->vector(dimension());
115     coords_[i].restore(s);
116     grad_[i].restore(s);
117     error_[i].restore(s);
118   }
119 }
120 
~GDIISOpt()121 GDIISOpt::~GDIISOpt()
122 {
123   delete[] coords_;
124   delete[] grad_;
125   delete[] error_;
126 }
127 
128 void
save_data_state(StateOut & s)129 GDIISOpt::save_data_state(StateOut&s)
130 {
131   Optimize::save_data_state(s);
132   s.put(nsave);
133   s.put(diis_iter);
134   ihessian_.save(s);
135   SavableState::save_state(update_.pointer(),s);
136   s.put(convergence_);
137   s.put(accuracy_);
138   s.put(maxabs_gradient);
139   for (int i=0; i < nsave; i++) {
140     coords_[i].save(s);
141     grad_[i].save(s);
142     error_[i].save(s);
143   }
144 }
145 
146 void
init()147 GDIISOpt::init()
148 {
149   Optimize::init();
150   maxabs_gradient = -1.0;
151 }
152 
153 int
update()154 GDIISOpt::update()
155 {
156   int i,j,ii,jj;
157 
158   // these are good candidates to be input options
159   const double maxabs_gradient_to_desired_accuracy = 0.05;
160   const double maxabs_gradient_to_next_desired_accuracy = 0.005;
161   const double roundoff_error_factor = 1.1;
162 
163   // the gradient convergence criterion.
164   double old_maxabs_gradient = maxabs_gradient;
165   RefSCVector xcurrent;
166   RefSCVector gcurrent;
167 
168   // get the next gradient at the required level of accuracy.
169   // usually only one pass is needed, unless we happen to find
170   // that the accuracy was set too low.
171   int accurate_enough;
172   do {
173     // compute the current point
174     function()->set_desired_gradient_accuracy(accuracy_);
175 
176     xcurrent = function()->get_x();
177     gcurrent = function()->gradient().copy();
178 
179     // compute the gradient convergence criterion now so i can see if
180     // the accuracy needs to be tighter
181     maxabs_gradient = gcurrent.maxabs();
182     // compute the required accuracy
183     accuracy_ = maxabs_gradient * maxabs_gradient_to_desired_accuracy;
184 
185     // The roundoff_error_factor is thrown in to allow for round off making
186     // the current gcurrent.maxabs() a bit smaller than the previous,
187     // which would make the current required accuracy less than the
188     // gradient's actual accuracy and cause everything to be recomputed.
189     accurate_enough = (function()->actual_gradient_accuracy() <=
190                        accuracy_*roundoff_error_factor);
191 
192     if (!accurate_enough) {
193       ExEnv::out0() << indent
194            << "NOTICE: function()->actual_gradient_accuracy() > accuracy_:\n"
195            << indent
196            << scprintf(
197              "        function()->actual_gradient_accuracy() = %15.8e",
198              function()->actual_gradient_accuracy()) << endl << indent
199            << scprintf(
200              "                                     accuracy_ = %15.8e",
201              accuracy_) << endl;
202     }
203   } while(!accurate_enough);
204 
205   if (old_maxabs_gradient >= 0.0 && old_maxabs_gradient < maxabs_gradient) {
206     ExEnv::out0() << indent
207          << scprintf("NOTICE: maxabs_gradient increased from %8.4e to %8.4e",
208                      old_maxabs_gradient, maxabs_gradient) << endl;
209   }
210 
211   // update the hessian
212   if (update_.nonnull()) {
213     update_->update(ihessian_,function(),xcurrent,gcurrent);
214   }
215 
216   diis_iter++;
217 
218   int howmany = (diis_iter < nsave) ? diis_iter : nsave;
219 
220   if (diis_iter <= nsave) {
221     coords_[diis_iter-1] = xcurrent;
222     grad_[diis_iter-1] = gcurrent;
223   } else {
224     for (i=0; i < nsave-1; i++) {
225       coords_[i] = coords_[i+1];
226       grad_[i] = grad_[i+1];
227     }
228     coords_[nsave-1] = xcurrent;
229     grad_[nsave-1] = gcurrent;
230   }
231 
232   // take the step
233   if (diis_iter==1 || maxabs_gradient > 0.05) {
234     // just take the Newton-Raphson step first iteration
235     RefSCVector xdisp = -1.0*(ihessian_ * gcurrent);
236     // try steepest descent
237     // RefSCVector xdisp = -1.0*gcurrent;
238 
239     // scale displacement vector if it's too large
240     double tot = sqrt(xdisp.scalar_product(xdisp));
241     if (tot > max_stepsize_) {
242       double scal = max_stepsize_/tot;
243       ExEnv::out0() << endl << indent
244            << scprintf("stepsize of %f is too big, scaling by %f",tot,scal)
245            << endl;
246       xdisp.scale(scal);
247       tot *= scal;
248     }
249 
250     RefSCVector xnext = xcurrent + xdisp;
251 
252     conv_->reset();
253     conv_->get_grad(function());
254     conv_->get_x(function());
255     conv_->set_nextx(xnext);
256 
257     // check for conergence before resetting the geometry
258     int converged = conv_->converged();
259     if (converged)
260       return converged;
261 
262     ExEnv::out0() << endl << indent
263          << scprintf("taking step of size %f", tot) << endl;
264 
265     function()->set_x(xnext);
266 
267     // make the next gradient computed more accurate, since it will
268     // be smaller
269     accuracy_ = maxabs_gradient * maxabs_gradient_to_next_desired_accuracy;
270 
271     return converged;
272   }
273 
274   // form the error vectors
275   for (i=0; i < howmany; i++)
276     error_[i] = -1.0*(ihessian_ * grad_[i]);
277 
278   // and form the A matrix
279   RefSCMatrix A;
280   RefSCVector coeff;
281   int ntry=0;
282 
283   do {
284     int num = howmany-ntry;
285 
286     RefSCDimension size = new SCDimension(num+1);
287     Ref<SCMatrixKit> lkit = new LocalSCMatrixKit;
288     A = lkit->matrix(size,size);
289     coeff = lkit->vector(size);
290 
291     for (ii=0, i=ntry; i < howmany; i++,ii++) {
292       coeff(ii) = 0;
293       for (j=ntry,jj=0; j <= i; j++,jj++) {
294         A(ii,jj) = error_[i].scalar_product(error_[j]);
295         A(jj,ii) = A(ii,jj);
296       }
297     }
298 
299     A->scale(1.0/A(0,0));
300 
301     coeff(num) = 1.0;
302     for (i=0; i < num; i++)
303       A(num,i) = A(i,num) = 1.0;
304 
305     A(num,num) = 0;
306 
307     ntry++;
308 
309   } while (fabs(A.solve_lin(coeff)) < 1.0e-12);
310 
311   RefSCVector xstar = matrixkit()->vector(dimension());
312   RefSCVector delstar = matrixkit()->vector(dimension());
313 
314   xstar.assign(0.0);
315   delstar.assign(0.0);
316 
317   for (i=0,ii=ntry-1; ii < howmany; i++,ii++) {
318     RefSCVector tmp = grad_[ii].copy(); tmp.scale(coeff[i]);
319     delstar.accumulate(tmp);
320     tmp = coords_[ii].copy(); tmp.scale(coeff[i]);
321     xstar.accumulate(tmp);
322   }
323 
324   RefSCVector xdisp = xstar - xcurrent - ihessian_*delstar;
325   // scale displacement vector if it's too large
326   double tot = sqrt(xdisp.scalar_product(xdisp));
327   if (tot > max_stepsize_) {
328     double scal = max_stepsize_/tot;
329     ExEnv::out0() << endl << indent
330          << scprintf("stepsize of %f is too big, scaling by %f",tot,scal)
331          << endl;
332     xdisp.scale(scal);
333     tot *= scal;
334   }
335 
336   RefSCVector xnext = xcurrent + xdisp;
337 
338   conv_->reset();
339   conv_->get_grad(function());
340   conv_->get_x(function());
341   conv_->set_nextx(xnext);
342 
343   // check for conergence before resetting the geometry
344   int converged = conv_->converged();
345   if (converged)
346     return converged;
347 
348   ExEnv::out0() << endl << indent
349        << scprintf("taking step of size %f", tot) << endl;
350 
351   function()->set_x(xnext);
352 
353   // make the next gradient computed more accurate, since it will
354   // be smaller
355   accuracy_ = maxabs_gradient * maxabs_gradient_to_next_desired_accuracy;
356 
357   return converged;
358 }
359 
360 /////////////////////////////////////////////////////////////////////////////
361 
362 // Local Variables:
363 // mode: c++
364 // c-file-style: "ETS"
365 // End:
366