1 /* 2 This file is part of MADNESS. 3 4 Copyright (C) 2007,2010 Oak Ridge National Laboratory 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 20 For more information please contact: 21 22 Robert J. Harrison 23 Oak Ridge National Laboratory 24 One Bethel Valley Road 25 P.O. Box 2008, MS-6367 26 27 email: harrisonrj@ornl.gov 28 tel: 865-241-3937 29 fax: 865-572-0680 30 31 */ 32 33 34 /// \file chem/molecular_optimizer.h 35 /// \brief optimize the geometrical structure of a molecule 36 #ifndef MADNESS_CHEM_MOLECULAR_OPTIMIZER_H__INCLUDED 37 #define MADNESS_CHEM_MOLECULAR_OPTIMIZER_H__INCLUDED 38 39 #include <madness/tensor/solvers.h> 40 #include <chem/molecule.h> 41 42 namespace madness { 43 44 45 struct MolecularOptimizationTargetInterface : public OptimizationTargetInterface { 46 47 /// return the molecule of the target moleculeMolecularOptimizationTargetInterface48 virtual Molecule& molecule() { 49 MADNESS_EXCEPTION("you need to return a molecule",1); 50 return *(new Molecule()); // this is a memory leak silencing warnings 51 } 52 }; 53 54 55 56 /// Molecular optimizer derived from the QuasiNewton optimizer 57 58 /// Essentially the QuasiNewton optimizer, but with the additional feature 59 /// of projecting out rotational and translational degrees of freedom 60 class MolecularOptimizer : public OptimizerInterface { 61 62 public: 63 /// same ctor as the QuasiNewton optimizer 64 MolecularOptimizer(const std::shared_ptr<MolecularOptimizationTargetInterface>& tar, 65 int maxiter = 20, double tol = 1e-6, double value_precision = 1e-12, 66 double gradient_precision = 1e-12) 67 : update("BFGS") 68 , target(tar) 69 , maxiter(maxiter) 70 , tol(tol) 71 , value_precision(value_precision) 72 , gradient_precision(gradient_precision) 73 , f(tol*1e16) 74 , gnorm(tol*1e16) 75 , printtest(false) 76 , cg_method("polak_ribiere") { 77 } 78 79 /// optimize the underlying molecule 80 81 /// @param[in] x the coordinates to compute energy and gradient optimize(Tensor<double> & x)82 bool optimize(Tensor<double>& x) { 83 bool converge; 84 converge=optimize_quasi_newton(x); 85 // converge=optimize_conjugate_gradients(x); 86 return converge; 87 } 88 converged()89 bool converged() const {return gradient_norm()<tol;} 90 value()91 double value() const {return 0.0;} 92 gradient_norm()93 double gradient_norm() const {return gnorm;} 94 95 /// set an (initial) hessian set_hessian(const Tensor<double> & hess)96 void set_hessian(const Tensor<double>& hess) { 97 h=copy(hess); 98 } 99 100 private: 101 102 /// How to update the hessian: BFGS or SR1 103 std::string update; 104 std::shared_ptr<MolecularOptimizationTargetInterface> target; 105 const int maxiter; 106 const double tol; // the gradient convergence threshold 107 const double value_precision; // Numerical precision of value 108 const double gradient_precision; // Numerical precision of each element of residual 109 double f; 110 double gnorm; 111 Tensor<double> h; 112 bool printtest; 113 114 /// conjugate_gradients method 115 std::string cg_method; 116 optimize_quasi_newton(Tensor<double> & x)117 bool optimize_quasi_newton(Tensor<double>& x) { 118 119 if(printtest) target->test_gradient(x, value_precision); 120 121 bool h_is_identity = (h.size() == 0); 122 if (h_is_identity) { 123 int n = x.dim(0); 124 h = Tensor<double>(n,n); 125 126 // mass-weight the initial hessian 127 for (int i=0; i<target->molecule().natom(); ++i) { 128 h(3*i ,3*i )=1.0/(target->molecule().get_atom(i).mass); 129 h(3*i+1,3*i+1)=1.0/(target->molecule().get_atom(i).mass); 130 h(3*i+2,3*i+2)=1.0/(target->molecule().get_atom(i).mass); 131 } 132 h*=10.0; 133 madness::print("using the identity as initial Hessian"); 134 } else { 135 Tensor<double> normalmodes; 136 Tensor<double> freq=MolecularOptimizer::compute_frequencies( 137 target->molecule(),h,normalmodes); 138 madness::print("\ngopt: projected vibrational frequencies (cm-1)\n"); 139 printf("frequency in cm-1 "); 140 for (int i=0; i<freq.size(); ++i) { 141 printf("%10.3f",constants::au2invcm*freq(i)); 142 } 143 144 } 145 146 remove_external_dof(h,target->molecule()); 147 148 // the previous gradient 149 Tensor<double> gp; 150 // the displacement 151 Tensor<double> dx; 152 153 for (int iter=0; iter<maxiter; ++iter) { 154 Tensor<double> gradient; 155 156 target->value_and_gradient(x, f, gradient); 157 print("gopt: new energy",f); 158 const double rawgnorm = gradient.normf()/sqrt(gradient.size()); 159 print("gopt: raw gradient norm ",rawgnorm); 160 161 // remove external degrees of freedom (translation and rotation) 162 Tensor<double> project_ext=projector_external_dof(target->molecule()); 163 gradient=inner(gradient,project_ext); 164 gnorm = gradient.normf()/sqrt(gradient.size()); 165 print("gopt: projected gradient norm ",gnorm); 166 const double gradratio=rawgnorm/gnorm; 167 168 printf(" QuasiNewton iteration %2d value %.12e gradient %.2e %.2e\n", 169 iter,f,gnorm,gradratio); 170 if (converged()) break; 171 172 // if (iter == 1 && h_is_identity) { 173 // // Default initial Hessian is scaled identity but 174 // // prefer to reuse any existing approximation. 175 // h.scale(gradient.trace(gp)/gp.trace(dx)); 176 // } 177 178 if (iter > 0) { 179 if (update == "BFGS") QuasiNewton::hessian_update_bfgs(dx, gradient-gp,h); 180 else QuasiNewton::hessian_update_sr1(dx, gradient-gp,h); 181 } 182 183 184 remove_external_dof(h,target->molecule()); 185 Tensor<double> v, e; 186 syev(h, v, e); 187 Tensor<double> normalmodes; 188 Tensor<double> freq=MolecularOptimizer::compute_frequencies( 189 target->molecule(),h,normalmodes); 190 madness::print("\ngopt: projected vibrational frequencies (cm-1)\n"); 191 printf("frequency in cm-1 "); 192 for (int i=0; i<freq.size(); ++i) { 193 printf("%10.3f",constants::au2invcm*freq(i)); 194 } 195 printf("\n"); 196 197 // this will invert the hessian, multiply with the gradient and 198 // return the displacements 199 dx = new_search_direction2(gradient,h); 200 201 // line search only for large displacements > 0.01 bohr = 2pm 202 double step=1.0; 203 double maxdx=dx.absmax(); 204 if (h_is_identity and (maxdx>0.01)) step = QuasiNewton::line_search(1.0, f, 205 dx.trace(gradient), x, dx,target, value_precision); 206 207 dx.scale(step); 208 x += dx; 209 gp = gradient; 210 } 211 212 if (printtest) { 213 print("final hessian"); 214 print(h); 215 } 216 return converged(); 217 } 218 optimize_conjugate_gradients(Tensor<double> & x)219 bool optimize_conjugate_gradients(Tensor<double>& x) { 220 221 // Tensor<double> project_ext=projector_external_dof(target->molecule()); 222 223 224 // initial energy and gradient gradient 225 double energy=0.0; 226 Tensor<double> gradient; 227 228 // first step is steepest descent 229 Tensor<double> displacement(x.size()); 230 Tensor<double> oldgradient; 231 Tensor<double> old_displacement(x.size()); 232 old_displacement.fill(0.0); 233 234 for (int iter=1; iter<maxiter; ++iter) { 235 236 // displace coordinates 237 if (iter>1) x+=displacement; 238 239 // compute energy and gradient 240 target->value_and_gradient(x, energy, gradient); 241 print("gopt: new energy",energy); 242 gnorm = gradient.normf()/sqrt(gradient.size()); 243 print("gopt: raw gradient norm ",gnorm); 244 245 // remove external degrees of freedom (translation and rotation) 246 Tensor<double> project_ext=projector_external_dof(target->molecule()); 247 gradient=inner(gradient,project_ext); 248 gnorm = gradient.normf()/sqrt(gradient.size()); 249 print("gopt: projected gradient norm ",gnorm); 250 251 // compute new displacement 252 if (iter==1) { 253 displacement=-gradient; 254 } else { 255 double beta=0.0; 256 if (cg_method=="fletcher_reeves") 257 beta=gradient.normf()/oldgradient.normf(); 258 if (cg_method=="polak_ribiere") 259 beta=gradient.normf()/(gradient-oldgradient).normf(); 260 displacement=-gradient + beta * old_displacement; 261 } 262 263 // save displacement for the next step 264 old_displacement=displacement; 265 266 if (converged() and (displacement.normf()/displacement.size()<tol)) break; 267 } 268 269 return converged(); 270 } 271 272 /// effectively invert the hessian and multiply with the gradient new_search_direction2(const Tensor<double> & g,const Tensor<double> & hessian)273 Tensor<double> new_search_direction2(const Tensor<double>& g, 274 const Tensor<double>& hessian) const { 275 Tensor<double> dx, s; 276 double tol = gradient_precision; 277 double trust = 1.0; // This applied in spectral basis 278 279 // diagonalize the hessian: 280 // VT H V = lambda 281 // H^-1 = V lambda^-1 VT 282 Tensor<double> v, e; 283 syev(hessian, v, e); 284 285 // Transform gradient into spectral basis 286 // H^-1 g = V lambda^-1 VT g 287 Tensor<double> gv = inner(g,v); // this is VT g == gT V == gv 288 289 // Take step applying restriction 290 int nneg=0, nsmall=0, nrestrict=0; 291 for (int i=0; i<e.size(); ++i) { 292 if (e[i] < -tol) { 293 if (printtest) printf( 294 " forcing negative eigenvalue to be positive %d %.1e\n", i, e[i]); 295 nneg++; 296 //e[i] = -2.0*e[i]; // Enforce positive search direction 297 e[i] = -0.1*e[i]; // Enforce positive search direction 298 } 299 else if (e[i] < tol) { 300 if (printtest) printf( 301 " forcing small eigenvalue to be zero %d %.1e\n", i, e[i]); 302 nsmall++; 303 e[i] = tol; 304 gv[i]=0.0; // effectively removing this direction 305 } 306 307 // this is the step -lambda^-1 gv 308 gv[i] = -gv[i] / e[i]; 309 if (std::abs(gv[i]) > trust) { // Step restriction 310 double gvnew = trust*std::abs(gv(i))/gv[i]; 311 if (printtest) printf( 312 " restricting step in spectral direction %d %.1e --> %.1e\n", 313 i, gv[i], gvnew); 314 nrestrict++; 315 gv[i] = gvnew; 316 } 317 } 318 if (nneg || nsmall || nrestrict) 319 printf(" nneg=%d nsmall=%d nrestrict=%d\n", nneg, nsmall, nrestrict); 320 321 // Transform back from spectral basis to give the displacements 322 // disp = -V lambda^-1 VT g = V lambda^-1 gv 323 return inner(v,gv); 324 } 325 326 public: 327 328 /// compute the projector to remove transl. and rot. degrees of freedom 329 330 /// taken from http://www.gaussian.com/g_whitepap/vib.htm 331 /// I don't really understand the concept behind the projectors, but it 332 /// seems to work, and it is not written down explicitly anywhere. 333 /// NOTE THE ERROR IN THE FORMULAS ON THE WEBPAGE ! projector_external_dof(const Molecule & mol)334 static Tensor<double> projector_external_dof(const Molecule& mol) { 335 336 // compute the translation vectors 337 Tensor<double> transx(3*mol.natom()); 338 Tensor<double> transy(3*mol.natom()); 339 Tensor<double> transz(3*mol.natom()); 340 for (int i=0; i<mol.natom(); ++i) { 341 transx[3*i ]=sqrt(mol.get_atom(i).get_mass_in_au()); 342 transy[3*i+1]=sqrt(mol.get_atom(i).get_mass_in_au()); 343 transz[3*i+2]=sqrt(mol.get_atom(i).get_mass_in_au()); 344 } 345 346 // compute the rotation vectors 347 348 // move the molecule to its center of mass and compute 349 // the moment of inertia tensor 350 Tensor<double> com=mol.center_of_mass(); 351 Molecule mol2=mol; 352 mol2.translate(-1.0*com); 353 Tensor<double> I=mol2.moment_of_inertia(); 354 I.scale(constants::atomic_mass_in_au); 355 356 // diagonalize the moment of inertia 357 Tensor<double> v,e; 358 syev(I, v, e); // v being the "X" tensor on the web site 359 v=transpose(v); 360 361 // Tensor<double> B(e.size()); 362 // for (long i=0; i<e.size(); ++i) B(i)=1.0/(2.0*e(i)); 363 // print("rotational constants in cm-1"); 364 // print(constants::au2invcm*B); 365 366 // rotation vectors 367 Tensor<double> rotx(3*mol.natom()); 368 Tensor<double> roty(3*mol.natom()); 369 Tensor<double> rotz(3*mol.natom()); 370 371 for (int iatom=0; iatom<mol.natom(); ++iatom) { 372 373 // coordinates wrt the center of mass ("R" on the web site) 374 Tensor<double> coord(3); 375 coord(0l)=mol.get_atom(iatom).x-com(0l); 376 coord(1l)=mol.get_atom(iatom).y-com(1l); 377 coord(2l)=mol.get_atom(iatom).z-com(2l); 378 379 // note the wrong formula on the Gaussian website: 380 // multiply with sqrt(mass), do not divide! 381 coord.scale(sqrt(mol.get_atom(iatom).get_mass_in_au())); 382 383 // p is the dot product of R and X on the web site 384 Tensor<double> p=inner(coord,v); 385 386 // Eq. (5) 387 rotx(3*iatom + 0)=p(1)*v(0,2)-p(2)*v(0,1); 388 rotx(3*iatom + 1)=p(1)*v(1,2)-p(2)*v(1,1); 389 rotx(3*iatom + 2)=p(1)*v(2,2)-p(2)*v(2,1); 390 391 roty(3*iatom + 0)=p(2)*v(0,0)-p(0l)*v(0,2); 392 roty(3*iatom + 1)=p(2)*v(1,0)-p(0l)*v(1,2); 393 roty(3*iatom + 2)=p(2)*v(2,0)-p(0l)*v(2,2); 394 395 rotz(3*iatom + 0)=p(0l)*v(0,1)-p(1)*v(0,0); 396 rotz(3*iatom + 1)=p(0l)*v(1,1)-p(1)*v(1,0); 397 rotz(3*iatom + 2)=p(0l)*v(2,1)-p(1)*v(2,0); 398 399 } 400 401 // move the translational and rotational vectors to a common tensor 402 Tensor<double> ext_dof(6,3*mol.natom()); 403 ext_dof(0l,_)=transx; 404 ext_dof(1l,_)=transy; 405 ext_dof(2l,_)=transz; 406 ext_dof(3l,_)=rotx; 407 ext_dof(4l,_)=roty; 408 ext_dof(5l,_)=rotz; 409 410 // normalize 411 for (int i=0; i<6; ++i) { 412 double norm=ext_dof(i,_).normf(); 413 if (norm>1.e-14) ext_dof(i,_).scale(1.0/norm); 414 else ext_dof(i,_)=0.0; 415 } 416 417 // compute overlap to orthonormalize the projectors 418 Tensor<double> ovlp=inner(ext_dof,ext_dof,1,1); 419 syev(ovlp,v,e); 420 ext_dof=inner(v,ext_dof,0,0); 421 422 // normalize or remove the dof if necessary (e.g. linear molecules) 423 for (int i=0; i<6; ++i) { 424 if (e(i)<1.e-14) { 425 ext_dof(i,_).scale(0.0); // take out this degree of freedom 426 } else { 427 ext_dof(i,_).scale(1.0/sqrt(e(i))); // normalize 428 } 429 } 430 431 // construct projector on the complement of the rotations 432 Tensor<double> projector(3*mol.natom(),3*mol.natom()); 433 for (int i=0; i<3*mol.natom(); ++i) projector(i,i)=1.0; 434 435 // compute the outer products of the projectors 436 // 1- \sum_i | t_i >< t_i | 437 projector-=inner(ext_dof,ext_dof,0,0); 438 439 // construct random tensor for orthogonalization 440 Tensor<double> D(3*mol.natom(),3*mol.natom()); 441 D.fillrandom(); 442 for (int i=0; i<6; ++i) D(i,_)=ext_dof(i,_); 443 444 // this works only for non-linear molecules -- projector seems simpler 445 // ovlp=inner(D,D,1,1); 446 // cholesky(ovlp); // destroys ovlp 447 // Tensor<double> L=transpose(ovlp); 448 // Tensor<double> Linv=inverse(L); 449 // D=inner(Linv,D,1,0); 450 // ovlp=inner(D,D,1,1); 451 // 452 // for (int i=0; i<6; ++i) D(i,_)=0.0; 453 // D=copy(D(Slice(6,8),_)); 454 455 // return transpose(D); 456 457 return projector; 458 459 } 460 461 /// remove translational degrees of freedom from the hessian remove_external_dof(Tensor<double> & hessian,const Molecule & mol)462 static void remove_external_dof(Tensor<double>& hessian, 463 const Molecule& mol) { 464 465 // compute the translation of the center of mass 466 Tensor<double> projector_ext=projector_external_dof(mol); 467 468 // this is P^T * H * P 469 hessian=inner(projector_ext,inner(hessian,projector_ext),0,0); 470 } 471 472 473 /// returns the vibrational frequencies 474 475 /// @param[in] hessian the hessian matrix (not mass-weighted) 476 /// @param[out] normalmodes the normal modes 477 /// @param[in] project_tr whether to project out translation and rotation 478 /// @param[in] print_hessian whether to print the hessian matrix 479 /// @return the frequencies in atomic units 480 static Tensor<double> compute_frequencies(const Molecule& molecule, 481 const Tensor<double>& hessian, Tensor<double>& normalmodes, 482 const bool project_tr=true, const bool print_hessian=false) { 483 484 // compute mass-weighing matrices 485 Tensor<double> M=molecule.massweights(); 486 Tensor<double> Minv(3*molecule.natom(),3*molecule.natom()); 487 for (int i=0; i<3*molecule.natom(); ++i) Minv(i,i)=1.0/M(i,i); 488 489 // mass-weight the hessian 490 Tensor<double> mwhessian=inner(M,inner(hessian,M)); 491 492 // remove translation and rotation 493 if (project_tr) MolecularOptimizer::remove_external_dof(mwhessian,molecule); 494 495 if (print_hessian) { 496 if (project_tr) { 497 print("mass-weighted hessian with translation and rotation projected out"); 498 } else { 499 print("mass-weighted unprojected hessian"); 500 } 501 Tensor<double> mmhessian=inner(Minv,inner(mwhessian,Minv)); 502 print(mwhessian); 503 print("mass-weighted unprojected hessian; mass-weighing undone"); 504 print(mmhessian); 505 } 506 507 Tensor<double> freq; 508 syev(mwhessian,normalmodes,freq); 509 for (long i=0; i<freq.size(); ++i) { 510 if (freq(i)>0.0) freq(i)=sqrt(freq(i)); // real frequencies 511 else freq(i)=-sqrt(-freq(i)); // imaginary frequencies 512 } 513 return freq; 514 } 515 516 compute_reduced_mass(const Molecule & molecule,const Tensor<double> & normalmodes)517 static Tensor<double> compute_reduced_mass(const Molecule& molecule, 518 const Tensor<double>& normalmodes) { 519 520 Tensor<double> M=molecule.massweights(); 521 Tensor<double> D=MolecularOptimizer::projector_external_dof(molecule); 522 Tensor<double> L=copy(normalmodes); 523 Tensor<double> DL=inner(D,L); 524 Tensor<double> MDL=inner(M,DL); 525 Tensor<double> mu(3*molecule.natom()); 526 527 for (int i=0; i<3*molecule.natom(); ++i) { 528 double mu1=0.0; 529 for (int j=0; j<3*molecule.natom(); ++j) mu1+=MDL(j,i)*MDL(j,i); 530 if (mu1>1.e-14) mu(i)=1.0/(mu1*constants::atomic_mass_in_au); 531 } 532 return mu; 533 } 534 535 }; 536 537 } 538 539 #endif //MADNESS_CHEM_MOLECULAR_OPTIMIZER_H__INCLUDED 540