/* This file is part of MADNESS. Copyright (C) 2007,2010 Oak Ridge National Laboratory This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA For more information please contact: Robert J. Harrison Oak Ridge National Laboratory One Bethel Valley Road P.O. Box 2008, MS-6367 email: harrisonrj@ornl.gov tel: 865-241-3937 fax: 865-572-0680 */ /// \file chem/molecular_optimizer.h /// \brief optimize the geometrical structure of a molecule #ifndef MADNESS_CHEM_MOLECULAR_OPTIMIZER_H__INCLUDED #define MADNESS_CHEM_MOLECULAR_OPTIMIZER_H__INCLUDED #include #include namespace madness { struct MolecularOptimizationTargetInterface : public OptimizationTargetInterface { /// return the molecule of the target virtual Molecule& molecule() { MADNESS_EXCEPTION("you need to return a molecule",1); return *(new Molecule()); // this is a memory leak silencing warnings } }; /// Molecular optimizer derived from the QuasiNewton optimizer /// Essentially the QuasiNewton optimizer, but with the additional feature /// of projecting out rotational and translational degrees of freedom class MolecularOptimizer : public OptimizerInterface { public: /// same ctor as the QuasiNewton optimizer MolecularOptimizer(const std::shared_ptr& tar, int maxiter = 20, double tol = 1e-6, double value_precision = 1e-12, double gradient_precision = 1e-12) : update("BFGS") , target(tar) , maxiter(maxiter) , tol(tol) , value_precision(value_precision) , gradient_precision(gradient_precision) , f(tol*1e16) , gnorm(tol*1e16) , printtest(false) , cg_method("polak_ribiere") { } /// optimize the underlying molecule /// @param[in] x the coordinates to compute energy and gradient bool optimize(Tensor& x) { bool converge; converge=optimize_quasi_newton(x); // converge=optimize_conjugate_gradients(x); return converge; } bool converged() const {return gradient_norm()& hess) { h=copy(hess); } private: /// How to update the hessian: BFGS or SR1 std::string update; std::shared_ptr target; const int maxiter; const double tol; // the gradient convergence threshold const double value_precision; // Numerical precision of value const double gradient_precision; // Numerical precision of each element of residual double f; double gnorm; Tensor h; bool printtest; /// conjugate_gradients method std::string cg_method; bool optimize_quasi_newton(Tensor& x) { if(printtest) target->test_gradient(x, value_precision); bool h_is_identity = (h.size() == 0); if (h_is_identity) { int n = x.dim(0); h = Tensor(n,n); // mass-weight the initial hessian for (int i=0; imolecule().natom(); ++i) { h(3*i ,3*i )=1.0/(target->molecule().get_atom(i).mass); h(3*i+1,3*i+1)=1.0/(target->molecule().get_atom(i).mass); h(3*i+2,3*i+2)=1.0/(target->molecule().get_atom(i).mass); } h*=10.0; madness::print("using the identity as initial Hessian"); } else { Tensor normalmodes; Tensor freq=MolecularOptimizer::compute_frequencies( target->molecule(),h,normalmodes); madness::print("\ngopt: projected vibrational frequencies (cm-1)\n"); printf("frequency in cm-1 "); for (int i=0; imolecule()); // the previous gradient Tensor gp; // the displacement Tensor dx; for (int iter=0; iter gradient; target->value_and_gradient(x, f, gradient); print("gopt: new energy",f); const double rawgnorm = gradient.normf()/sqrt(gradient.size()); print("gopt: raw gradient norm ",rawgnorm); // remove external degrees of freedom (translation and rotation) Tensor project_ext=projector_external_dof(target->molecule()); gradient=inner(gradient,project_ext); gnorm = gradient.normf()/sqrt(gradient.size()); print("gopt: projected gradient norm ",gnorm); const double gradratio=rawgnorm/gnorm; printf(" QuasiNewton iteration %2d value %.12e gradient %.2e %.2e\n", iter,f,gnorm,gradratio); if (converged()) break; // if (iter == 1 && h_is_identity) { // // Default initial Hessian is scaled identity but // // prefer to reuse any existing approximation. // h.scale(gradient.trace(gp)/gp.trace(dx)); // } if (iter > 0) { if (update == "BFGS") QuasiNewton::hessian_update_bfgs(dx, gradient-gp,h); else QuasiNewton::hessian_update_sr1(dx, gradient-gp,h); } remove_external_dof(h,target->molecule()); Tensor v, e; syev(h, v, e); Tensor normalmodes; Tensor freq=MolecularOptimizer::compute_frequencies( target->molecule(),h,normalmodes); madness::print("\ngopt: projected vibrational frequencies (cm-1)\n"); printf("frequency in cm-1 "); for (int i=0; i 0.01 bohr = 2pm double step=1.0; double maxdx=dx.absmax(); if (h_is_identity and (maxdx>0.01)) step = QuasiNewton::line_search(1.0, f, dx.trace(gradient), x, dx,target, value_precision); dx.scale(step); x += dx; gp = gradient; } if (printtest) { print("final hessian"); print(h); } return converged(); } bool optimize_conjugate_gradients(Tensor& x) { // Tensor project_ext=projector_external_dof(target->molecule()); // initial energy and gradient gradient double energy=0.0; Tensor gradient; // first step is steepest descent Tensor displacement(x.size()); Tensor oldgradient; Tensor old_displacement(x.size()); old_displacement.fill(0.0); for (int iter=1; iter1) x+=displacement; // compute energy and gradient target->value_and_gradient(x, energy, gradient); print("gopt: new energy",energy); gnorm = gradient.normf()/sqrt(gradient.size()); print("gopt: raw gradient norm ",gnorm); // remove external degrees of freedom (translation and rotation) Tensor project_ext=projector_external_dof(target->molecule()); gradient=inner(gradient,project_ext); gnorm = gradient.normf()/sqrt(gradient.size()); print("gopt: projected gradient norm ",gnorm); // compute new displacement if (iter==1) { displacement=-gradient; } else { double beta=0.0; if (cg_method=="fletcher_reeves") beta=gradient.normf()/oldgradient.normf(); if (cg_method=="polak_ribiere") beta=gradient.normf()/(gradient-oldgradient).normf(); displacement=-gradient + beta * old_displacement; } // save displacement for the next step old_displacement=displacement; if (converged() and (displacement.normf()/displacement.size() new_search_direction2(const Tensor& g, const Tensor& hessian) const { Tensor dx, s; double tol = gradient_precision; double trust = 1.0; // This applied in spectral basis // diagonalize the hessian: // VT H V = lambda // H^-1 = V lambda^-1 VT Tensor v, e; syev(hessian, v, e); // Transform gradient into spectral basis // H^-1 g = V lambda^-1 VT g Tensor gv = inner(g,v); // this is VT g == gT V == gv // Take step applying restriction int nneg=0, nsmall=0, nrestrict=0; for (int i=0; i trust) { // Step restriction double gvnew = trust*std::abs(gv(i))/gv[i]; if (printtest) printf( " restricting step in spectral direction %d %.1e --> %.1e\n", i, gv[i], gvnew); nrestrict++; gv[i] = gvnew; } } if (nneg || nsmall || nrestrict) printf(" nneg=%d nsmall=%d nrestrict=%d\n", nneg, nsmall, nrestrict); // Transform back from spectral basis to give the displacements // disp = -V lambda^-1 VT g = V lambda^-1 gv return inner(v,gv); } public: /// compute the projector to remove transl. and rot. degrees of freedom /// taken from http://www.gaussian.com/g_whitepap/vib.htm /// I don't really understand the concept behind the projectors, but it /// seems to work, and it is not written down explicitly anywhere. /// NOTE THE ERROR IN THE FORMULAS ON THE WEBPAGE ! static Tensor projector_external_dof(const Molecule& mol) { // compute the translation vectors Tensor transx(3*mol.natom()); Tensor transy(3*mol.natom()); Tensor transz(3*mol.natom()); for (int i=0; i com=mol.center_of_mass(); Molecule mol2=mol; mol2.translate(-1.0*com); Tensor I=mol2.moment_of_inertia(); I.scale(constants::atomic_mass_in_au); // diagonalize the moment of inertia Tensor v,e; syev(I, v, e); // v being the "X" tensor on the web site v=transpose(v); // Tensor B(e.size()); // for (long i=0; i rotx(3*mol.natom()); Tensor roty(3*mol.natom()); Tensor rotz(3*mol.natom()); for (int iatom=0; iatom coord(3); coord(0l)=mol.get_atom(iatom).x-com(0l); coord(1l)=mol.get_atom(iatom).y-com(1l); coord(2l)=mol.get_atom(iatom).z-com(2l); // note the wrong formula on the Gaussian website: // multiply with sqrt(mass), do not divide! coord.scale(sqrt(mol.get_atom(iatom).get_mass_in_au())); // p is the dot product of R and X on the web site Tensor p=inner(coord,v); // Eq. (5) rotx(3*iatom + 0)=p(1)*v(0,2)-p(2)*v(0,1); rotx(3*iatom + 1)=p(1)*v(1,2)-p(2)*v(1,1); rotx(3*iatom + 2)=p(1)*v(2,2)-p(2)*v(2,1); roty(3*iatom + 0)=p(2)*v(0,0)-p(0l)*v(0,2); roty(3*iatom + 1)=p(2)*v(1,0)-p(0l)*v(1,2); roty(3*iatom + 2)=p(2)*v(2,0)-p(0l)*v(2,2); rotz(3*iatom + 0)=p(0l)*v(0,1)-p(1)*v(0,0); rotz(3*iatom + 1)=p(0l)*v(1,1)-p(1)*v(1,0); rotz(3*iatom + 2)=p(0l)*v(2,1)-p(1)*v(2,0); } // move the translational and rotational vectors to a common tensor Tensor ext_dof(6,3*mol.natom()); ext_dof(0l,_)=transx; ext_dof(1l,_)=transy; ext_dof(2l,_)=transz; ext_dof(3l,_)=rotx; ext_dof(4l,_)=roty; ext_dof(5l,_)=rotz; // normalize for (int i=0; i<6; ++i) { double norm=ext_dof(i,_).normf(); if (norm>1.e-14) ext_dof(i,_).scale(1.0/norm); else ext_dof(i,_)=0.0; } // compute overlap to orthonormalize the projectors Tensor ovlp=inner(ext_dof,ext_dof,1,1); syev(ovlp,v,e); ext_dof=inner(v,ext_dof,0,0); // normalize or remove the dof if necessary (e.g. linear molecules) for (int i=0; i<6; ++i) { if (e(i)<1.e-14) { ext_dof(i,_).scale(0.0); // take out this degree of freedom } else { ext_dof(i,_).scale(1.0/sqrt(e(i))); // normalize } } // construct projector on the complement of the rotations Tensor projector(3*mol.natom(),3*mol.natom()); for (int i=0; i<3*mol.natom(); ++i) projector(i,i)=1.0; // compute the outer products of the projectors // 1- \sum_i | t_i >< t_i | projector-=inner(ext_dof,ext_dof,0,0); // construct random tensor for orthogonalization Tensor D(3*mol.natom(),3*mol.natom()); D.fillrandom(); for (int i=0; i<6; ++i) D(i,_)=ext_dof(i,_); // this works only for non-linear molecules -- projector seems simpler // ovlp=inner(D,D,1,1); // cholesky(ovlp); // destroys ovlp // Tensor L=transpose(ovlp); // Tensor Linv=inverse(L); // D=inner(Linv,D,1,0); // ovlp=inner(D,D,1,1); // // for (int i=0; i<6; ++i) D(i,_)=0.0; // D=copy(D(Slice(6,8),_)); // return transpose(D); return projector; } /// remove translational degrees of freedom from the hessian static void remove_external_dof(Tensor& hessian, const Molecule& mol) { // compute the translation of the center of mass Tensor projector_ext=projector_external_dof(mol); // this is P^T * H * P hessian=inner(projector_ext,inner(hessian,projector_ext),0,0); } /// returns the vibrational frequencies /// @param[in] hessian the hessian matrix (not mass-weighted) /// @param[out] normalmodes the normal modes /// @param[in] project_tr whether to project out translation and rotation /// @param[in] print_hessian whether to print the hessian matrix /// @return the frequencies in atomic units static Tensor compute_frequencies(const Molecule& molecule, const Tensor& hessian, Tensor& normalmodes, const bool project_tr=true, const bool print_hessian=false) { // compute mass-weighing matrices Tensor M=molecule.massweights(); Tensor Minv(3*molecule.natom(),3*molecule.natom()); for (int i=0; i<3*molecule.natom(); ++i) Minv(i,i)=1.0/M(i,i); // mass-weight the hessian Tensor mwhessian=inner(M,inner(hessian,M)); // remove translation and rotation if (project_tr) MolecularOptimizer::remove_external_dof(mwhessian,molecule); if (print_hessian) { if (project_tr) { print("mass-weighted hessian with translation and rotation projected out"); } else { print("mass-weighted unprojected hessian"); } Tensor mmhessian=inner(Minv,inner(mwhessian,Minv)); print(mwhessian); print("mass-weighted unprojected hessian; mass-weighing undone"); print(mmhessian); } Tensor freq; syev(mwhessian,normalmodes,freq); for (long i=0; i0.0) freq(i)=sqrt(freq(i)); // real frequencies else freq(i)=-sqrt(-freq(i)); // imaginary frequencies } return freq; } static Tensor compute_reduced_mass(const Molecule& molecule, const Tensor& normalmodes) { Tensor M=molecule.massweights(); Tensor D=MolecularOptimizer::projector_external_dof(molecule); Tensor L=copy(normalmodes); Tensor DL=inner(D,L); Tensor MDL=inner(M,DL); Tensor mu(3*molecule.natom()); for (int i=0; i<3*molecule.natom(); ++i) { double mu1=0.0; for (int j=0; j<3*molecule.natom(); ++j) mu1+=MDL(j,i)*MDL(j,i); if (mu1>1.e-14) mu(i)=1.0/(mu1*constants::atomic_mass_in_au); } return mu; } }; } #endif //MADNESS_CHEM_MOLECULAR_OPTIMIZER_H__INCLUDED