1 /*
2   This file is part of MADNESS.
3 
4   Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20   For more information please contact:
21 
22   Robert J. Harrison
23   Oak Ridge National Laboratory
24   One Bethel Valley Road
25   P.O. Box 2008, MS-6367
26 
27   email: harrisonrj@ornl.gov
28   tel:   865-241-3937
29   fax:   865-572-0680
30 
31 */
32 
33 
34 /// \file chem/molecular_optimizer.h
35 /// \brief optimize the geometrical structure of a molecule
36 #ifndef MADNESS_CHEM_MOLECULAR_OPTIMIZER_H__INCLUDED
37 #define MADNESS_CHEM_MOLECULAR_OPTIMIZER_H__INCLUDED
38 
39 #include <madness/tensor/solvers.h>
40 #include <chem/molecule.h>
41 
42 namespace madness {
43 
44 
45 struct MolecularOptimizationTargetInterface : public OptimizationTargetInterface {
46 
47     /// return the molecule of the target
moleculeMolecularOptimizationTargetInterface48     virtual Molecule& molecule() {
49         MADNESS_EXCEPTION("you need to return a molecule",1);
50         return *(new Molecule());   // this is a memory leak silencing warnings
51     }
52 };
53 
54 
55 
56 /// Molecular optimizer derived from the QuasiNewton optimizer
57 
58 /// Essentially the QuasiNewton optimizer, but with the additional feature
59 /// of projecting out rotational and translational degrees of freedom
60 class MolecularOptimizer : public OptimizerInterface {
61 
62 public:
63     /// same ctor as the QuasiNewton optimizer
64     MolecularOptimizer(const std::shared_ptr<MolecularOptimizationTargetInterface>& tar,
65             int maxiter = 20, double tol = 1e-6, double value_precision = 1e-12,
66             double gradient_precision = 1e-12)
67         : update("BFGS")
68         , target(tar)
69         , maxiter(maxiter)
70         , tol(tol)
71         , value_precision(value_precision)
72         , gradient_precision(gradient_precision)
73         , f(tol*1e16)
74         , gnorm(tol*1e16)
75         , printtest(false)
76         , cg_method("polak_ribiere") {
77     }
78 
79     /// optimize the underlying molecule
80 
81     /// @param[in]  x   the coordinates to compute energy and gradient
optimize(Tensor<double> & x)82     bool optimize(Tensor<double>& x) {
83         bool converge;
84         converge=optimize_quasi_newton(x);
85 //        converge=optimize_conjugate_gradients(x);
86         return converge;
87     }
88 
converged()89     bool converged() const {return gradient_norm()<tol;}
90 
value()91     double value() const {return 0.0;}
92 
gradient_norm()93     double gradient_norm() const {return gnorm;}
94 
95     /// set an (initial) hessian
set_hessian(const Tensor<double> & hess)96     void set_hessian(const Tensor<double>& hess) {
97         h=copy(hess);
98     }
99 
100 private:
101 
102     /// How to update the hessian: BFGS or SR1
103     std::string update;
104     std::shared_ptr<MolecularOptimizationTargetInterface> target;
105     const int maxiter;
106     const double tol;       // the gradient convergence threshold
107     const double value_precision;  // Numerical precision of value
108     const double gradient_precision; // Numerical precision of each element of residual
109     double f;
110     double gnorm;
111     Tensor<double> h;
112     bool printtest;
113 
114     /// conjugate_gradients method
115     std::string cg_method;
116 
optimize_quasi_newton(Tensor<double> & x)117     bool optimize_quasi_newton(Tensor<double>& x) {
118 
119         if(printtest)  target->test_gradient(x, value_precision);
120 
121         bool h_is_identity = (h.size() == 0);
122         if (h_is_identity) {
123             int n = x.dim(0);
124             h = Tensor<double>(n,n);
125 
126             // mass-weight the initial hessian
127             for (int i=0; i<target->molecule().natom(); ++i) {
128                 h(3*i  ,3*i  )=1.0/(target->molecule().get_atom(i).mass);
129                 h(3*i+1,3*i+1)=1.0/(target->molecule().get_atom(i).mass);
130                 h(3*i+2,3*i+2)=1.0/(target->molecule().get_atom(i).mass);
131             }
132             h*=10.0;
133             madness::print("using the identity as initial Hessian");
134         } else {
135             Tensor<double> normalmodes;
136             Tensor<double> freq=MolecularOptimizer::compute_frequencies(
137                     target->molecule(),h,normalmodes);
138             madness::print("\ngopt: projected vibrational frequencies (cm-1)\n");
139             printf("frequency in cm-1   ");
140             for (int i=0; i<freq.size(); ++i) {
141                 printf("%10.3f",constants::au2invcm*freq(i));
142             }
143 
144         }
145 
146         remove_external_dof(h,target->molecule());
147 
148         // the previous gradient
149         Tensor<double> gp;
150         // the displacement
151         Tensor<double> dx;
152 
153         for (int iter=0; iter<maxiter; ++iter) {
154             Tensor<double> gradient;
155 
156             target->value_and_gradient(x, f, gradient);
157             print("gopt: new energy",f);
158             const double rawgnorm = gradient.normf()/sqrt(gradient.size());
159             print("gopt: raw gradient norm ",rawgnorm);
160 
161             // remove external degrees of freedom (translation and rotation)
162             Tensor<double> project_ext=projector_external_dof(target->molecule());
163             gradient=inner(gradient,project_ext);
164             gnorm = gradient.normf()/sqrt(gradient.size());
165             print("gopt: projected gradient norm ",gnorm);
166             const double gradratio=rawgnorm/gnorm;
167 
168             printf(" QuasiNewton iteration %2d value %.12e gradient %.2e  %.2e\n",
169                     iter,f,gnorm,gradratio);
170             if (converged()) break;
171 
172 //            if (iter == 1 && h_is_identity) {
173 //                // Default initial Hessian is scaled identity but
174 //                // prefer to reuse any existing approximation.
175 //                h.scale(gradient.trace(gp)/gp.trace(dx));
176 //            }
177 
178             if (iter > 0) {
179                 if (update == "BFGS") QuasiNewton::hessian_update_bfgs(dx, gradient-gp,h);
180                 else QuasiNewton::hessian_update_sr1(dx, gradient-gp,h);
181             }
182 
183 
184             remove_external_dof(h,target->molecule());
185             Tensor<double> v, e;
186             syev(h, v, e);
187             Tensor<double> normalmodes;
188             Tensor<double> freq=MolecularOptimizer::compute_frequencies(
189                     target->molecule(),h,normalmodes);
190             madness::print("\ngopt: projected vibrational frequencies (cm-1)\n");
191             printf("frequency in cm-1   ");
192             for (int i=0; i<freq.size(); ++i) {
193                 printf("%10.3f",constants::au2invcm*freq(i));
194             }
195             printf("\n");
196 
197             // this will invert the hessian, multiply with the gradient and
198             // return the displacements
199             dx = new_search_direction2(gradient,h);
200 
201             // line search only for large displacements > 0.01 bohr = 2pm
202             double step=1.0;
203             double maxdx=dx.absmax();
204             if (h_is_identity and (maxdx>0.01)) step = QuasiNewton::line_search(1.0, f,
205                     dx.trace(gradient), x, dx,target, value_precision);
206 
207             dx.scale(step);
208             x += dx;
209             gp = gradient;
210         }
211 
212         if (printtest) {
213             print("final hessian");
214             print(h);
215         }
216         return converged();
217     }
218 
optimize_conjugate_gradients(Tensor<double> & x)219     bool optimize_conjugate_gradients(Tensor<double>& x) {
220 
221 //        Tensor<double> project_ext=projector_external_dof(target->molecule());
222 
223 
224         // initial energy and gradient gradient
225         double energy=0.0;
226         Tensor<double> gradient;
227 
228         // first step is steepest descent
229         Tensor<double> displacement(x.size());
230         Tensor<double> oldgradient;
231         Tensor<double> old_displacement(x.size());
232         old_displacement.fill(0.0);
233 
234         for (int iter=1; iter<maxiter; ++iter) {
235 
236             // displace coordinates
237             if (iter>1) x+=displacement;
238 
239             // compute energy and gradient
240             target->value_and_gradient(x, energy, gradient);
241             print("gopt: new energy",energy);
242             gnorm = gradient.normf()/sqrt(gradient.size());
243             print("gopt: raw gradient norm ",gnorm);
244 
245             // remove external degrees of freedom (translation and rotation)
246             Tensor<double> project_ext=projector_external_dof(target->molecule());
247             gradient=inner(gradient,project_ext);
248             gnorm = gradient.normf()/sqrt(gradient.size());
249             print("gopt: projected gradient norm ",gnorm);
250 
251             // compute new displacement
252             if (iter==1) {
253                 displacement=-gradient;
254             } else {
255                 double beta=0.0;
256                 if (cg_method=="fletcher_reeves")
257                     beta=gradient.normf()/oldgradient.normf();
258                 if (cg_method=="polak_ribiere")
259                     beta=gradient.normf()/(gradient-oldgradient).normf();
260                 displacement=-gradient + beta * old_displacement;
261             }
262 
263             // save displacement for the next step
264             old_displacement=displacement;
265 
266             if (converged() and (displacement.normf()/displacement.size()<tol)) break;
267         }
268 
269         return converged();
270     }
271 
272     /// effectively invert the hessian and multiply with the gradient
new_search_direction2(const Tensor<double> & g,const Tensor<double> & hessian)273     Tensor<double> new_search_direction2(const Tensor<double>& g,
274             const Tensor<double>& hessian) const {
275         Tensor<double> dx, s;
276         double tol = gradient_precision;
277         double trust = 1.0; // This applied in spectral basis
278 
279         // diagonalize the hessian:
280         // VT H V = lambda
281         // H^-1   = V lambda^-1 VT
282         Tensor<double> v, e;
283         syev(hessian, v, e);
284 
285         // Transform gradient into spectral basis
286         // H^-1 g = V lambda^-1 VT g
287         Tensor<double> gv = inner(g,v); // this is VT g == gT V == gv
288 
289         // Take step applying restriction
290         int nneg=0, nsmall=0, nrestrict=0;
291         for (int i=0; i<e.size(); ++i) {
292             if (e[i] < -tol) {
293                 if (printtest) printf(
294                         "   forcing negative eigenvalue to be positive %d %.1e\n", i, e[i]);
295                 nneg++;
296                 //e[i] = -2.0*e[i]; // Enforce positive search direction
297                 e[i] = -0.1*e[i]; // Enforce positive search direction
298             }
299             else if (e[i] < tol) {
300                 if (printtest) printf(
301                         "   forcing small eigenvalue to be zero %d %.1e\n", i, e[i]);
302                 nsmall++;
303                 e[i] = tol;
304                 gv[i]=0.0;   // effectively removing this direction
305             }
306 
307             // this is the step -lambda^-1 gv
308             gv[i] = -gv[i] / e[i];
309             if (std::abs(gv[i]) > trust) { // Step restriction
310                 double gvnew = trust*std::abs(gv(i))/gv[i];
311                 if (printtest) printf(
312                         "   restricting step in spectral direction %d %.1e --> %.1e\n",
313                         i, gv[i], gvnew);
314                 nrestrict++;
315                 gv[i] = gvnew;
316             }
317         }
318         if (nneg || nsmall || nrestrict)
319             printf("   nneg=%d nsmall=%d nrestrict=%d\n", nneg, nsmall, nrestrict);
320 
321         // Transform back from spectral basis to give the displacements
322         // disp = -V lambda^-1 VT g = V lambda^-1 gv
323         return inner(v,gv);
324     }
325 
326 public:
327 
328     /// compute the projector to remove transl. and rot. degrees of freedom
329 
330     /// taken from http://www.gaussian.com/g_whitepap/vib.htm
331     /// I don't really understand the concept behind the projectors, but it
332     /// seems to work, and it is not written down explicitly anywhere.
333     /// NOTE THE ERROR IN THE FORMULAS ON THE WEBPAGE !
projector_external_dof(const Molecule & mol)334     static Tensor<double> projector_external_dof(const Molecule& mol) {
335 
336         // compute the translation vectors
337         Tensor<double> transx(3*mol.natom());
338         Tensor<double> transy(3*mol.natom());
339         Tensor<double> transz(3*mol.natom());
340         for (int i=0; i<mol.natom(); ++i) {
341             transx[3*i  ]=sqrt(mol.get_atom(i).get_mass_in_au());
342             transy[3*i+1]=sqrt(mol.get_atom(i).get_mass_in_au());
343             transz[3*i+2]=sqrt(mol.get_atom(i).get_mass_in_au());
344         }
345 
346         // compute the rotation vectors
347 
348         // move the molecule to its center of mass and compute
349         // the moment of inertia tensor
350         Tensor<double> com=mol.center_of_mass();
351         Molecule mol2=mol;
352         mol2.translate(-1.0*com);
353         Tensor<double> I=mol2.moment_of_inertia();
354         I.scale(constants::atomic_mass_in_au);
355 
356         // diagonalize the moment of inertia
357         Tensor<double> v,e;
358         syev(I, v, e);  // v being the "X" tensor on the web site
359         v=transpose(v);
360 
361 //        Tensor<double> B(e.size());
362 //        for (long i=0; i<e.size(); ++i) B(i)=1.0/(2.0*e(i));
363 //        print("rotational constants in cm-1");
364 //        print(constants::au2invcm*B);
365 
366         // rotation vectors
367         Tensor<double> rotx(3*mol.natom());
368         Tensor<double> roty(3*mol.natom());
369         Tensor<double> rotz(3*mol.natom());
370 
371         for (int iatom=0; iatom<mol.natom(); ++iatom) {
372 
373             // coordinates wrt the center of mass ("R" on the web site)
374             Tensor<double> coord(3);
375             coord(0l)=mol.get_atom(iatom).x-com(0l);
376             coord(1l)=mol.get_atom(iatom).y-com(1l);
377             coord(2l)=mol.get_atom(iatom).z-com(2l);
378 
379             // note the wrong formula on the Gaussian website:
380             // multiply with sqrt(mass), do not divide!
381             coord.scale(sqrt(mol.get_atom(iatom).get_mass_in_au()));
382 
383             // p is the dot product of R and X on the web site
384             Tensor<double> p=inner(coord,v);
385 
386             // Eq. (5)
387             rotx(3*iatom + 0)=p(1)*v(0,2)-p(2)*v(0,1);
388             rotx(3*iatom + 1)=p(1)*v(1,2)-p(2)*v(1,1);
389             rotx(3*iatom + 2)=p(1)*v(2,2)-p(2)*v(2,1);
390 
391             roty(3*iatom + 0)=p(2)*v(0,0)-p(0l)*v(0,2);
392             roty(3*iatom + 1)=p(2)*v(1,0)-p(0l)*v(1,2);
393             roty(3*iatom + 2)=p(2)*v(2,0)-p(0l)*v(2,2);
394 
395             rotz(3*iatom + 0)=p(0l)*v(0,1)-p(1)*v(0,0);
396             rotz(3*iatom + 1)=p(0l)*v(1,1)-p(1)*v(1,0);
397             rotz(3*iatom + 2)=p(0l)*v(2,1)-p(1)*v(2,0);
398 
399         }
400 
401         // move the translational and rotational vectors to a common tensor
402         Tensor<double> ext_dof(6,3*mol.natom());
403         ext_dof(0l,_)=transx;
404         ext_dof(1l,_)=transy;
405         ext_dof(2l,_)=transz;
406         ext_dof(3l,_)=rotx;
407         ext_dof(4l,_)=roty;
408         ext_dof(5l,_)=rotz;
409 
410         // normalize
411         for (int i=0; i<6; ++i) {
412             double norm=ext_dof(i,_).normf();
413             if (norm>1.e-14) ext_dof(i,_).scale(1.0/norm);
414             else ext_dof(i,_)=0.0;
415         }
416 
417         // compute overlap to orthonormalize the projectors
418         Tensor<double> ovlp=inner(ext_dof,ext_dof,1,1);
419         syev(ovlp,v,e);
420         ext_dof=inner(v,ext_dof,0,0);
421 
422         // normalize or remove the dof if necessary (e.g. linear molecules)
423         for (int i=0; i<6; ++i) {
424             if (e(i)<1.e-14) {
425                 ext_dof(i,_).scale(0.0);      // take out this degree of freedom
426             } else {
427                 ext_dof(i,_).scale(1.0/sqrt(e(i)));   // normalize
428             }
429         }
430 
431         // construct projector on the complement of the rotations
432         Tensor<double> projector(3*mol.natom(),3*mol.natom());
433         for (int i=0; i<3*mol.natom(); ++i) projector(i,i)=1.0;
434 
435         // compute the outer products of the projectors
436         // 1- \sum_i | t_i >< t_i |
437         projector-=inner(ext_dof,ext_dof,0,0);
438 
439         // construct random tensor for orthogonalization
440         Tensor<double> D(3*mol.natom(),3*mol.natom());
441         D.fillrandom();
442         for (int i=0; i<6; ++i) D(i,_)=ext_dof(i,_);
443 
444         // this works only for non-linear molecules -- projector seems simpler
445 //        ovlp=inner(D,D,1,1);
446 //        cholesky(ovlp); // destroys ovlp
447 //        Tensor<double> L=transpose(ovlp);
448 //        Tensor<double> Linv=inverse(L);
449 //        D=inner(Linv,D,1,0);
450 //        ovlp=inner(D,D,1,1);
451 //
452 //        for (int i=0; i<6; ++i) D(i,_)=0.0;
453 //        D=copy(D(Slice(6,8),_));
454 
455 //        return transpose(D);
456 
457         return projector;
458 
459     }
460 
461     /// remove translational degrees of freedom from the hessian
remove_external_dof(Tensor<double> & hessian,const Molecule & mol)462     static void remove_external_dof(Tensor<double>& hessian,
463             const Molecule& mol) {
464 
465         // compute the translation of the center of mass
466         Tensor<double> projector_ext=projector_external_dof(mol);
467 
468         // this is P^T * H * P
469         hessian=inner(projector_ext,inner(hessian,projector_ext),0,0);
470     }
471 
472 
473     /// returns the vibrational frequencies
474 
475     /// @param[in]  hessian the hessian matrix (not mass-weighted)
476     /// @param[out] normalmodes the normal modes
477     /// @param[in]  project_tr whether to project out translation and rotation
478     /// @param[in]  print_hessian   whether to print the hessian matrix
479     /// @return the frequencies in atomic units
480     static Tensor<double> compute_frequencies(const Molecule& molecule,
481             const Tensor<double>& hessian, Tensor<double>& normalmodes,
482             const bool project_tr=true, const bool print_hessian=false) {
483 
484         // compute mass-weighing matrices
485         Tensor<double> M=molecule.massweights();
486         Tensor<double> Minv(3*molecule.natom(),3*molecule.natom());
487         for (int i=0; i<3*molecule.natom(); ++i) Minv(i,i)=1.0/M(i,i);
488 
489         // mass-weight the hessian
490         Tensor<double> mwhessian=inner(M,inner(hessian,M));
491 
492         // remove translation and rotation
493         if (project_tr) MolecularOptimizer::remove_external_dof(mwhessian,molecule);
494 
495         if (print_hessian) {
496             if (project_tr) {
497                 print("mass-weighted hessian with translation and rotation projected out");
498             } else {
499                 print("mass-weighted unprojected hessian");
500             }
501             Tensor<double> mmhessian=inner(Minv,inner(mwhessian,Minv));
502             print(mwhessian);
503             print("mass-weighted unprojected hessian; mass-weighing undone");
504             print(mmhessian);
505         }
506 
507         Tensor<double> freq;
508         syev(mwhessian,normalmodes,freq);
509         for (long i=0; i<freq.size(); ++i) {
510             if (freq(i)>0.0) freq(i)=sqrt(freq(i)); // real frequencies
511             else freq(i)=-sqrt(-freq(i));           // imaginary frequencies
512         }
513         return freq;
514     }
515 
516 
compute_reduced_mass(const Molecule & molecule,const Tensor<double> & normalmodes)517     static Tensor<double> compute_reduced_mass(const Molecule& molecule,
518             const Tensor<double>& normalmodes) {
519 
520         Tensor<double> M=molecule.massweights();
521         Tensor<double> D=MolecularOptimizer::projector_external_dof(molecule);
522         Tensor<double> L=copy(normalmodes);
523         Tensor<double> DL=inner(D,L);
524         Tensor<double> MDL=inner(M,DL);
525         Tensor<double> mu(3*molecule.natom());
526 
527         for (int i=0; i<3*molecule.natom(); ++i) {
528             double mu1=0.0;
529             for (int j=0; j<3*molecule.natom(); ++j) mu1+=MDL(j,i)*MDL(j,i);
530             if (mu1>1.e-14) mu(i)=1.0/(mu1*constants::atomic_mass_in_au);
531         }
532         return mu;
533     }
534 
535 };
536 
537 }
538 
539 #endif //MADNESS_CHEM_MOLECULAR_OPTIMIZER_H__INCLUDED
540