1#if HAVE_CONFIG_H 2# include "config.fh" 3#endif 4 program scf 5C$Id: scf.F,v 1.18 2007/03/23 19:24:36 d3g293 Exp $ 6 implicit double precision (a-h, o-z) 7#include "cscf.h" 8#include "mafdecls.fh" 9#include "global.fh" 10c 11 integer*8 nints, maxint !cste 12c 13c CAUTION: integer precision requirements 14c nints, maxint, etc. are proportional to the number of basis functions 15c to the fourth power! 216**4 is greater than the largest number 16c that can be represented as a 32-bit signed interger, so 64-bit 17c arithmetic is needed to count integrals when calculating more than 18c 14 Be atoms with 15 basis functions each. Since integrals are counted 19c over all iterations, 18 iterations with 7 atoms can result in precision 20c problems. Note that the wave function could be calculated correctly 21c for much larger basis sets without 64-bit integers because the required 22c indexing is usually proportional to nbfn**2, which is good to 46,340 23c basis functions, except that the task counter runs as (nbfn/ichunk)**4, 24c so with ichunk = 10, 32-bit integers yield correct wavefunctions out to 25c 2145 basis functions (maxatom=143), or 4290 (maxatom=286) with ichunk = 20, ... 26c 27c This warning applies to the Global Arrays implementation as well! 28c functions of special concern are ga_igop and nga_read_inc. 29c 30#define USE_TRANSFORM 1 31 integer heap, stack 32 data tinit, tonel, ttwoel, tdiag, tdens, tprint /6*0.0d0/ 33 data eone, etwo, energy, deltad /4*0.0d0/ 34c 35c initalize the parallel message passing environment 36c 37#include "mp3.fh" 38 call ga_initialize() 39c 40c Allocate memory 41c 42 heap = 32000000 43 stack = 32000000 44 if (.not.ma_init(MT_DBL, stack, heap)) 45 + call ga_error("ma_init failed",-1) 46 call flush(6) 47 me = ga_nodeid() 48 nproc = ga_nnodes() 49c 50c initialize a bunch of stuff and initial density matrix 51c 52 rjunk = timer() 53c 54c get input from file be.inpt 55c 56 call input 57c 58c create and allocate global arrays 59c 60 call setarrays 61 if (ga_nodeid().eq.0) write(6,*) 'bytes of memory used by node 0:' 62 + ,ga_inquire_memory() 63 call ininrm 64c 65c create initial guess for density matrix by using single atom 66c densities 67c 68 call denges 69 tinit = timer() 70#if USE_TRANSFORM 71c 72c make initial orthogonal orbital set for solution method using 73c similarity transform 74c 75 call makeob 76#endif 77c 78c make info for sparsity test 79c 80 call makesz(schwmax) 81c 82c print preliminary data before any long compute segments start 83 if (ga_nodeid().eq.0) call flush(6) 84c 85c *** iterate *** 86c 87 do 10 iter = 1, mxiter 88c 89c make the one particle contribution to the fock matrix 90c and get the partial contribution to the energy 91c 92 call oneel(schwmax, eone) 93 tonel = tonel + timer() 94c 95c compute the two particle contributions to the fock matrix and 96c get the total energy. 97c 98 call twoel(schwmax, etwo) 99 ttwoel = ttwoel + timer() 100c 101c Diagonalize the fock matrix. The diagonalizers used in this 102c subroutine are actually sequential, not parallel. 103c 104 call diagon(tester,iter) 105 tdiag = tdiag + timer() 106c 107c make the new density matrix in g_work from orbitals in g_orbs, 108c compute the norm of the change in the density matrix and 109c then update the density matrix in g_dens with damping. 110c 111 call makden 112 deltad = dendif() 113 if (iter.eq.1) then 114 scale = 0.0d0 115 else if (iter .le. 5) then 116 if (nbfn .gt. 60) then 117 scale = 0.5d0 118 else 119 scale = 0.0d0 120 endif 121 else 122 scale = 0.0d0 123 endif 124 call damp(scale) 125 tdens = tdens + timer() 126c 127c add up energy and print out convergence information 128c 129 if (me.eq.0) then 130 energy = enrep + eone + etwo 131 call prnout(iter, energy, deltad, tester) 132 tprint = tprint + timer() 133 endif 134c 135c if converged then exit iteration loop 136c 137 if (deltad .lt. tol) goto 20 138 call ga_igop(9, icut4, 1, '+') !cste 139 if(icut4 .eq. 0) then !cste 140c something has gone wrong--print what you know and quit. 141 write(6,*) 'no two-electron integrals computed!' !cste 142 goto 20 !cste 143 endif !cste 144 10 continue 145 iter = iter - 1 !cste 146 if(me.eq.0) 147 $ write(6,*) ' SCF failed to converge in ', iter, ' iters' 148c...v....1....v....2....v....3....v....4....v....5....v....6....v....7.. 149c 150c finished ... print out eigenvalues and occupied orbitals 151c 152 20 continue 153 call ga_igop(6, icut1, 1, '+') 154 call ga_igop(7, icut2, 1, '+') 155 call ga_igop(8, icut3, 1, '+') 156 if (me.eq.0) then 157c 158c print out timing information 159c 160 call prnfin(energy) 161 write(6,1) tinit, tonel, ttwoel, tdiag, tdens, tprint, 162 $ nproc 163 1 format(/5x,' init ',4x,' onel ',4x,' twoel ',4x,' diag ',4x, 164 $ ' dens print ncpu'/ 165 $ 5x,'------',4x,'------',4x,'-------',4x,'------',4x, 166 $ '------ ------ ------'/ 167 $ 2f10.2,f11.2,3f10.2, i7/) 168 totsec = tinit+tonel+ttwoel+tdiag+tdens+tprint 169 write(6,*)'elapsed time in seconds ',totsec 170c 171c print out information on # integrals evaluated each iteration 172c 173 174 nints = icut1+icut2+icut3 175 frac = dble(icut3)/dble(nints) 176 177 write(6,2) icut1, icut2, icut3, nints, frac 178 2 format(/'No. of integrals screened or computed (all iters) ' 179 $ /'-------------------------------------'/ 180 $ /1x,' failed #ij test failed #kl test', 181 $ ' #compute #total', 182 $ ' fraction', 183 $ /1x,' --------------- ---------------', 184 $ ' --------------- ---------------', 185 $ ' --------', 186 $ /1x,4(1x,i15),f9.6) 187 maxint = nbfn !cste 188 maxint = maxint**4 * iter !cste 189 if(nints .ne. maxint) then !cste 190 write(6,*)'Inconsistent number of integrals, should be ', !cste 191 $ maxint !cste 192 write(6,*)'Note: largest 32-bit integer is 2,147,483,647' !cste 193 write(6,*)'Probably due to insufficient integer precision in GA.'!cste 194 endif !cste 195#ifdef MSG_COMMS_MPI 196 call ga_print_stats() 197#else 198 call stats 199#endif 200 endif 201c 202 call closearrays 203 call ga_terminate 204#ifdef MSG_COMMS_MPI 205 call mpi_finalize 206#else 207 call pend 208#endif 209c 210 end 211c 212 subroutine makesz(schwmax) 213 implicit double precision (a-h, o-z) 214#include "cscf.h" 215#include "mafdecls.fh" 216#include "global.fh" 217#include "mp3def.fh" 218 dimension work(ichunk,ichunk) 219 integer lo(2),hi(2),i,j,iloc,jloc,ld 220 logical dotask, next_chunk 221c 222c schwarz(ij) = (ij|ij) for sparsity test 223c 224 icut1 = 0 225 icut2 = 0 226 icut3 = 0 227c 228 call ga_zero(g_schwarz) 229 call ga_zero(g_counter) 230 schwmax = 0.0d0 231 dotask = next_chunk(lo,hi) 232 ld = ichunk 233 do while (dotask) 234 do i = lo(1), hi(1) 235 iloc = i - lo(1) + 1 236 do j = lo(2), hi(2) 237 jloc = j - lo(2) + 1 238 call g(gg,i,j,i,j) 239 work(iloc,jloc) = sqrt(gg) 240 schwmax = max(schwmax, work(iloc,jloc)) 241 end do 242 end do 243 call nga_put(g_schwarz,lo,hi,work,ld) 244 dotask = next_chunk(lo,hi) 245 end do 246 call ga_dgop(11,schwmax,1,'max') 247c 248 return 249 end 250c 251 subroutine ininrm 252 implicit double precision (a-h, o-z) 253#include "cscf.h" 254#include "mafdecls.fh" 255#include "global.fh" 256#include "mp3def.fh" 257 integer*8 maxint 258c 259c write a little welcome message 260c 261 maxint = nbfn 262 maxint = maxint**4 263 if (ga_nodeid().eq.0) then 264 write(6,1) natom, nocc, nbfn, maxint, tol, ichunk 2651 format(/' Example Direct Self Consistent Field Program '/ 266 $ ' -------------------------------------------- '// 267 $ ' no. of atoms .............. ',i5/ 268 $ ' no. of occupied orbitals .. ',i5/ 269 $ ' no. of basis functions .... ',i5/ 270 $ ' basis functions^4 ' ,i15/ 271 $ ' convergence threshold ..... ',1pd9.2/ 272 $ ' chunk size .................',i5) 273 write(6,*) !cste 274 call flush(6) !cste 275 endif !cste 276c 277c generate normalisation coefficients for the basis functions 278c and the index array iky 279c 280 do 10 i = 1, nbfn 281 iky(i) = i*(i-1)/2 282 10 continue 283c 284 do 20 i = 1, nbfn 285 rnorm(i) = (expnt(i)*2.0d0/pi)**0.75d0 286 20 continue 287c 288c initialize common for computing f0 289c 290 call setfm 291c 292 end 293 double precision function h(i,j) 294 implicit double precision (a-h, o-z) 295#include "cscf.h" 296#include "mafdecls.fh" 297#include "global.fh" 298#include "mp3def.fh" 299cvd$r novector 300cvd$r noconcur 301c 302c generate the one particle hamiltonian matrix element 303c over the normalized primitive 1s functions i and j 304c 305 f0val = 0.0d0 306 sum = 0.0d0 307 rab2 = (x(i)-x(j))**2 + (y(i)-y(j))**2 + (z(i)-z(j))**2 308 facij = expnt(i)*expnt(j)/(expnt(i)+expnt(j)) 309 expij = exprjh(-facij*rab2) 310 repij = (2.0d0*pi/(expnt(i)+expnt(j))) * expij 311c 312c first do the nuclear attraction integrals 313c 314 do 10 iat = 1, natom 315 xp = (x(i)*expnt(i) + x(j)*expnt(j))/(expnt(i)+expnt(j)) 316 yp = (y(i)*expnt(i) + y(j)*expnt(j))/(expnt(i)+expnt(j)) 317 zp = (z(i)*expnt(i) + z(j)*expnt(j))/(expnt(i)+expnt(j)) 318 rpc2 = (xp-ax(iat))**2 + (yp-ay(iat))**2 + (zp-az(iat))**2 319c 320 call f0(f0val, (expnt(i)+expnt(j))*rpc2) 321 sum = sum - repij * q(iat) * f0val 322 10 continue 323c 324c add on the kinetic energy term 325c 326 sum = sum + facij*(3.0d0-2.0d0*facij*rab2) * 327 $ (pi/(expnt(i)+expnt(j)))**1.5d0 * expij 328c 329c finally multiply by the normalization constants 330c 331 h = sum * rnorm(i) * rnorm(j) 332c 333 end 334 double precision function s(i,j) 335 implicit double precision (a-h, o-z) 336#include "cscf.h" 337#include "mafdecls.fh" 338#include "global.fh" 339#include "mp3def.fh" 340c 341c generate the overlap matrix element between the normalized 342c primitve gaussian 1s functions i and j 343c 344 rab2 = (x(i)-x(j))**2 + (y(i)-y(j))**2 + (z(i)-z(j))**2 345 facij = expnt(i)*expnt(j)/(expnt(i)+expnt(j)) 346 s = (pi/(expnt(i)+expnt(j)))**1.5d0 * exprjh(-facij*rab2) * 347 $ rnorm(i)*rnorm(j) 348c 349 end 350 subroutine makden 351 implicit double precision (a-h, o-z) 352#include "cscf.h" 353#include "mafdecls.fh" 354#include "global.fh" 355#include "mp3def.fh" 356c dimension work(maxnbfn,maxnbfn), torbs(maxnbfn,maxnbfn) 357 dimension work(ichunk,ichunk), orbsi(ichunk,maxnbfn) 358 dimension orbsj(ichunk,maxnbfn) 359 integer lo(2), hi(2), tlo(2), thi(2), i, j, iloc, jloc, ld 360 logical dotask, next_chunk 361c 362c generate density matrix from orbitals in g_orbs. the first 363c nocc orbitals are doubly occupied. 364c 365 call ga_zero(g_counter) 366 dotask = next_chunk(lo,hi) 367 ld = ichunk 368 do while (dotask) 369 tlo(1) = lo(1) 370 thi(1) = hi(1) 371 tlo(2) = 1 372 thi(2) = nocc 373 call nga_get(g_orbs,tlo,thi,orbsi,ld) 374 tlo(1) = lo(2) 375 thi(1) = hi(2) 376 call nga_get(g_orbs,tlo,thi,orbsj,ld) 377 do i = lo(1), hi(1) 378 iloc = i - lo(1) + 1 379 do j = lo(2), hi(2) 380 jloc = j - lo(2) + 1 381 p = 0.0d00 382 do k = 1, nocc 383 p = p + orbsi(iloc,k)*orbsj(jloc,k) 384 end do 385 work(iloc,jloc) = 2.0d00*p 386 end do 387 end do 388 call nga_put(g_work,lo,hi,work,ld) 389 dotask = next_chunk(lo,hi) 390 end do 391 return 392 end 393c 394 subroutine oneel(schwmax, eone) 395 implicit double precision (a-h, o-z) 396#include "cscf.h" 397#include "mafdecls.fh" 398#include "global.fh" 399#include "mp3def.fh" 400 integer lo(2), hi(2), i, j, iloc, jloc, ld 401 dimension work(ichunk,ichunk),tfock(ichunk,ichunk) 402 logical dotask, next_chunk 403c 404c fill in the one-electron part of the fock matrix and 405c compute the one-electron energy contribution 406c 407 408 me = ga_nodeid() 409 nproc = ga_nnodes() 410c 411 call ga_zero(g_counter) 412 dotask = next_chunk(lo,hi) 413 ld = ichunk 414 do while (dotask) 415 call nga_get(g_schwarz,lo,hi,work,ld) 416 do j = lo(2), hi(2) 417 jloc = j - lo(2) + 1 418 do i = lo(1), hi(1) 419 iloc = i - lo(1) + 1 420 tfock(iloc,jloc) = 0.0d00 421 if (work(iloc,jloc)*schwmax.gt.tol2e) 422 + tfock(iloc,jloc) = h(i,j) 423 end do 424 end do 425 call nga_put(g_fock,lo,hi,tfock,ld) 426 dotask = next_chunk(lo,hi) 427 end do 428 eone = 0.5d00*contract_matrices(g_fock,g_dens) 429c 430 end 431#if 0 432 integer function nxtask(nproc) 433 parameter (ichunk = 10) 434 save icount, nleft 435 data nleft, icount /0, 0/ 436c 437c wrapper round nxtval() to increase granularity 438c and thus reduce no. of requests to shared counter 439c 440 if(nproc.gt.0) then 441 if(nleft.eq.0) then 442#ifdef MSG_COMMS_MPI 443 icount = nxtval_ga(nproc) * ichunk 444#else 445 icount = nxtval(nproc) * ichunk 446#endif 447 nleft = ichunk 448 endif 449 nxtask = icount 450 icount = icount + 1 451 nleft = nleft -1 452 else 453 nleft = 0 454 nxtask = 0 455#ifdef MSG_COMMS_MPI 456 junk = nxtval_ga(nproc) 457#else 458 junk = nxtval(nproc) 459#endif 460 endif 461c 462c following does dumb static load balancing 463c 464c$$$ if(nproc.gt.0) then 465c$$$ if (nleft .eq. 0) then 466c$$$ icount = ga_nodeid() 467c$$$ nleft = 1 468c$$$ endif 469c$$$ nxtask = icount 470c$$$ icount = icount + ga_nnodes() 471c$$$ else 472c$$$ nleft = 0 473c$$$ nxtask = 0 474c$$$ endif 475 end 476#endif 477c 478 logical function next_chunk(lo,hi) 479#include "cscf.h" 480 integer one 481 parameter (one = 1) 482 integer imax, lo(2), hi(2), ilo, jlo 483 itask = nga_read_inc(g_counter,one,one) 484 imax = nbfn/ichunk 485 if (nbfn - ichunk*imax.gt.0) imax = imax + 1 486 if (itask.lt.imax*imax) then 487 ilo = mod(itask,imax) 488 jlo = (itask-ilo)/imax 489 lo(1) = ilo*ichunk + 1 490 lo(2) = jlo*ichunk + 1 491 hi(1) = min((ilo+1)*ichunk,nbfn) 492 hi(2) = min((jlo+1)*ichunk,nbfn) 493 next_chunk = .true. 494 else 495 next_chunk = .false. 496 endif 497 return 498 end 499c 500 logical function next_4chunk(lo,hi,ilo,jlo,klo,llo) 501#include "cscf.h" 502 integer one 503 parameter (one = 1) 504 integer*8 imax, itask, itmp !cste 505 integer lo(4), hi(4), ilo, jlo, klo, llo !cste 506c 507 itask = nga_read_inc(g_counter,one,one) 508 imax = nbfn/ichunk 509 if (nbfn - ichunk*imax.gt.0) imax = imax + 1 510 if (itask. lt. 0) then !cste 511 write(6,*) 'next_4chunk: itask negative:',itask, !cste 512 * ' imax:',imax,' nbfn:',nbfn,' ichunk:',ichunk!cste 513 write(6,*) 'probable GA integer precision problem if ' !cste 514 * ,'imax^4 > 2^31' !cste 515 call flush(6) !cste 516 stop 'next_4chunk' !cste 517 end if !cste 518 if (itask.lt.imax**4) then 519 ilo = mod(itask,imax) 520 itmp = (itask - ilo)/imax 521 jlo = mod(itmp,imax) 522 itmp = (itmp - jlo)/imax 523 klo = mod(itmp,imax) 524 llo = (itmp - klo)/imax 525 lo(1) = ilo*ichunk + 1 526 lo(2) = jlo*ichunk + 1 527 lo(3) = klo*ichunk + 1 528 lo(4) = llo*ichunk + 1 529 hi(1) = min((ilo+1)*ichunk,nbfn) 530 hi(2) = min((jlo+1)*ichunk,nbfn) 531 hi(3) = min((klo+1)*ichunk,nbfn) 532 hi(4) = min((llo+1)*ichunk,nbfn) 533 next_4chunk = .true. 534 else 535 next_4chunk = .false. 536 endif 537 return 538 end 539c 540 subroutine clean_chunk(chunk) 541#include "cscf.h" 542 double precision chunk(ichunk,ichunk) 543 integer i,j 544 do j = 1, ichunk 545 do i = 1, ichunk 546 chunk(i,j) = 0.0d00 547 end do 548 end do 549 return 550 end 551c 552 subroutine twoel(schwmax, etwo) 553 implicit double precision (a-h, o-z) 554#include "cscf.h" 555#include "mafdecls.fh" 556#include "global.fh" 557#include "mp3def.fh" 558 double precision f_ij(ichunk,ichunk),d_kl(ichunk,ichunk) 559 double precision f_ik(ichunk,ichunk),d_jl(ichunk,ichunk) 560 double precision s_ij(ichunk,ichunk),s_kl(ichunk,ichunk) 561 double precision schwmax, one 562cste integer nproc !cste 563 integer*8 ijkls, ijcnt,klcnt,ijklcnt !cste 564 integer lo(4),hi(4),lo_ik(2),hi_ik(2),lo_jl(2),hi_jl(2) !cste 565 integer i,j,k,l,iloc,jloc,kloc,lloc,ld,ich,it,jt,kt,lt !cste 566 logical dotask, next_4chunk 567c 568c add in the two-electron contribution to the fock matrix 569c 570cste nproc = ga_nnodes() 571 one = 1.0d00 572 ijcnt = icut1 !cste 573 klcnt = icut2 !cste 574 ijklcnt = icut3 !cste 575c 576 call ga_zero(g_counter) 577 ld = maxnbfn 578 ich = ichunk 579 dotask = next_4chunk(lo,hi,it,jt,kt,lt) 580 itask = 0 581cste ijkls = 0 !cste 582 do while (dotask) 583cste ijkl=(hi(1)-lo(1)+1)*(hi(2)-lo(2)+1)* !cste 584cste * (hi(3)-lo(3)+1)*(hi(4)-lo(4)+1) !cste 585cste ijkls = ijkls + ijkl !cste 586cste write(6,*)itask,lo,hi,ijkl,ijkls !cste 587 lo_ik(1) = lo(1) 588 lo_ik(2) = lo(3) 589 hi_ik(1) = hi(1) 590 hi_ik(2) = hi(3) 591 lo_jl(1) = lo(2) 592 lo_jl(2) = lo(4) 593 hi_jl(1) = hi(2) 594 hi_jl(2) = hi(4) 595 call nga_get(g_schwarz,lo,hi,s_ij,ich) 596 call nga_get(g_schwarz,lo(3),hi(3),s_kl,ich) 597 call nga_get(g_dens,lo(3),hi(3),d_kl,ich) 598 call nga_get(g_dens,lo_jl,hi_jl,d_jl,ich) 599 itask = itask + 1 600 call clean_chunk(f_ij) 601 call clean_chunk(f_ik) 602 do i = lo(1), hi(1) 603 iloc = i-lo(1) + 1 604 do j = lo(2), hi(2) 605 jloc = j-lo(2) + 1 606 if (s_ij(iloc,jloc)*schwmax .lt. tol2e) then 607 icut1 = icut1 + (hi(3)-lo(3)+1)*(hi(4)-lo(4)+1) !cste 608 else 609 do k = lo(3), hi(3) 610 kloc = k-lo(3) + 1 611 do l = lo(4), hi(4) 612 lloc = l-lo(4) + 1 613 if (s_ij(iloc,jloc)*s_kl(kloc,lloc).lt.tol2e) then 614 icut2 = icut2 + 1 615 else 616 call g(gg, i, j, k, l) 617 f_ij(iloc,jloc) = f_ij(iloc,jloc) 618 + + gg*d_kl(kloc,lloc) 619 f_ik(iloc,kloc) = f_ik(iloc,kloc) 620 + - 0.5d00*gg*d_jl(jloc,lloc) 621 icut3 = icut3 + 1 622 endif 623 end do 624 end do 625 endif 626 end do 627 end do 628 call nga_acc(g_fock,lo,hi,f_ij,ich,one) 629 call nga_acc(g_fock,lo_ik,hi_ik,f_ik,ich,one) 630 dotask = next_4chunk(lo,hi,it,jt,kt,lt) 631 end do 632 etwo = 0.5d00*contract_matrices(g_fock,g_dens) 633 ijcnt = icut1 - ijcnt 634 klcnt = icut2 - klcnt 635 ijklcnt = icut3 - ijklcnt 636cste write(6,*) 'node ', ga_nodeid(), ijcnt, klcnt, ijklcnt !cste 637cste * ,icut1,icut2,icut3 !cste 638cste call flush(6) !cste 639 icut4 = icut3 !cste 640 if (icut3 .gt. 0) return !cste 641c 642c no integrals may be calculated if there is no work for 643c this node (ichunk too big), or, something is wrong 644c 645 write(6,*) 'no two-electron integrals computed by node', !cste 646 * ga_nodeid() !cste 647 call flush(6) !cste 648 return 649cste stop 'twoel computed no integrals' !cste 650 end 651c 652 subroutine damp(fac) 653 implicit double precision (a-h, o-z) 654#include "cscf.h" 655#include "mafdecls.fh" 656#include "global.fh" 657#include "mp3def.fh" 658c 659c create damped density matrix as a linear combination of 660c old density matrix and density matrix formed from new orbitals 661c 662 ofac = 1.0d0 - fac 663 call ga_add(fac,g_dens,ofac,g_work,g_dens) 664 return 665 end 666c 667 subroutine prnout(iter, energy, deltad, tester) 668 implicit double precision (a-h, o-z) 669#include "cscf.h" 670#include "mafdecls.fh" 671#include "global.fh" 672#include "mp3def.fh" 673c 674c printout results of each iteration 675c 676 if (ga_nodeid().ne.0) return 677 write(6,1) iter, energy, deltad, tester 678 call flush(6) 6791 format(' iter=',i3,', energy=',f15.8,', deltad=',1pd9.2, 680 $ ', deltaf=',d9.2) 681 return 682 end 683c 684 double precision function dendif() 685 implicit double precision (a-h, o-z) 686#include "cscf.h" 687#include "mafdecls.fh" 688#include "global.fh" 689#include "mp3def.fh" 690 double precision xdiff 691 dimension dens_c(ichunk,ichunk),work_c(ichunk,ichunk) 692 integer lo(2), hi(2), i, j, ld 693 logical dotask, next_chunk 694c 695c compute largest change in density matrix elements 696c 697 denmax = 0.0d0 698 call ga_zero(g_counter) 699 dotask = next_chunk(lo,hi) 700 ld = ichunk 701 do while(dotask) 702 call nga_get(g_dens,lo,hi,dens_c,ld) 703 call nga_get(g_work,lo,hi,work_c,ld) 704 do j = 1, hi(2)-lo(2)+1 705 do i = 1, hi(1)-lo(1)+1 706 xdiff = abs(dens_c(i,j)-work_c(i,j)) 707 if (xdiff.gt.denmax) denmax = xdiff 708 end do 709 end do 710 dotask = next_chunk(lo,hi) 711 end do 712 call ga_dgop(1,denmax,1,'max') 713 dendif = denmax 714 return 715 end 716c 717 double precision function testfock() 718 implicit double precision (a-h, o-z) 719#include "cscf.h" 720#include "mafdecls.fh" 721#include "global.fh" 722#include "mp3def.fh" 723 double precision xmax, xtmp 724 dimension work(ichunk,ichunk) 725 integer lo(2), hi(2), i, j, iloc, jloc, ld 726 logical dotask, next_chunk 727c 728c compute largest change in density matrix elements 729c 730 xmax = 0.0d0 731 call ga_zero(g_counter) 732 dotask = next_chunk(lo,hi) 733 ld = ichunk 734 do while(dotask) 735 call nga_get(g_fock,lo,hi,work,ld) 736 do j = lo(2), hi(2) 737 jloc = j - lo(2) + 1 738 do i = lo(1), hi(1) 739 iloc = i - lo(1) + 1 740 if (i.ne.j) then 741 xtmp = abs(work(iloc,jloc)) 742 if (xtmp.gt.xmax) xmax = xtmp 743 endif 744 end do 745 end do 746 dotask = next_chunk(lo,hi) 747 end do 748 call ga_dgop(1,xmax,1,'max') 749 testfock = xmax 750 return 751 end 752c 753 subroutine shiftfock(shift) 754 implicit double precision (a-h, o-z) 755#include "cscf.h" 756#include "mafdecls.fh" 757#include "global.fh" 758#include "mp3def.fh" 759 double precision shift 760 dimension work(ichunk,ichunk) 761 integer lo(2), hi(2), i, j, iloc, jloc, ld, icnt 762 logical dotask, next_chunk 763c 764c compute largest change in density matrix elements 765c 766 call ga_zero(g_counter) 767 dotask = next_chunk(lo,hi) 768 ld = ichunk 769 do while(dotask) 770 call nga_get(g_fock,lo,hi,work,ld) 771 icnt = 0 772 do j = lo(2), hi(2) 773 jloc = j - lo(2) + 1 774 do i = lo(1), hi(1) 775 iloc = i - lo(1) + 1 776 if (i.eq.j.and.i.gt.nocc) then 777 work(iloc,jloc) = work(iloc,jloc) + shift 778 icnt = icnt + 1 779 endif 780 end do 781 end do 782 if (icnt.gt.0) call nga_put(g_fock,lo,hi,work,ld) 783 dotask = next_chunk(lo,hi) 784 end do 785 return 786 end 787c 788 subroutine prnfin(energy) 789 implicit double precision (a-h, o-z) 790#include "cscf.h" 791#include "mafdecls.fh" 792#include "global.fh" 793#include "mp3def.fh" 794 dimension orbs(maxnbfn, maxnbfn) 795 integer lo(2),hi(2),ld 796c 797c printout final results 798c 799 if (ga_nodeid().ne.0) return 800 write(6,1) energy 801 1 format(//' final energy = ',f18.11//' eigenvalues') 802 call output(eigv, 1, min(nbfn,nocc+5), 1, 1, nbfn, 1, 1) 803c 804 return 805 end 806 subroutine g(value,i,j,k,l) 807 implicit double precision (a-h, o-z) 808#include "cscf.h" 809#include "mafdecls.fh" 810#include "global.fh" 811#include "mp3def.fh" 812c 813c compute the two electon integral (ij|kl) over normalized 814c primitive 1s gaussians 815c 816 f0val = 0.0d0 817 rab2 = (x(i)-x(j))**2 + (y(i)-y(j))**2 + (z(i)-z(j))**2 818 rcd2 = (x(k)-x(l))**2 + (y(k)-y(l))**2 + (z(k)-z(l))**2 819 facij = expnt(i)*expnt(j)/(expnt(i)+expnt(j)) 820 fackl = expnt(k)*expnt(l)/(expnt(k)+expnt(l)) 821 exijkl = exprjh(- facij*rab2 - fackl*rcd2) 822 denom = (expnt(i)+expnt(j))*(expnt(k)+expnt(l)) * 823 $ sqrt(expnt(i)+expnt(j)+expnt(k)+expnt(l)) 824 fac = (expnt(i)+expnt(j))*(expnt(k)+expnt(l)) / 825 $ (expnt(i)+expnt(j)+expnt(k)+expnt(l)) 826c 827 xp = (x(i)*expnt(i) + x(j)*expnt(j))/(expnt(i)+expnt(j)) 828 yp = (y(i)*expnt(i) + y(j)*expnt(j))/(expnt(i)+expnt(j)) 829 zp = (z(i)*expnt(i) + z(j)*expnt(j))/(expnt(i)+expnt(j)) 830 xq = (x(k)*expnt(k) + x(l)*expnt(l))/(expnt(k)+expnt(l)) 831 yq = (y(k)*expnt(k) + y(l)*expnt(l))/(expnt(k)+expnt(l)) 832 zq = (z(k)*expnt(k) + z(l)*expnt(l))/(expnt(k)+expnt(l)) 833 rpq2 = (xp-xq)**2 + (yp-yq)**2 + (zp-zq)**2 834c 835 call f0(f0val, fac*rpq2) 836 value = (2.0d0 * pi**2.5d0 / denom) * exijkl * f0val * 837 $ rnorm(i)*rnorm(j)*rnorm(k)*rnorm(l) 838 return 839 end 840c 841 subroutine diagon(tester, iter) 842c subroutine diagon(fock, orbs, evals, work, tester, iter) 843 implicit double precision (a-h, o-z) 844#include "cscf.h" 845#include "mafdecls.fh" 846#include "global.fh" 847#include "mp3def.fh" 848 double precision r_zero, r_one, shift, tester 849c 850#if USE_TRANSFORM 851c 852c use similarity transform to solve standard eigenvalue problem 853c (overlap matrix has been transformed out of the problem) 854c 855 r_one = 1.0d00 856 r_zero = 0.0d00 857 call ga_dgemm('n','n',nbfn,nbfn,nbfn,r_one,g_fock,g_orbs, 858 + r_zero,g_tfock) 859 call ga_dgemm('t','n',nbfn,nbfn,nbfn,r_one,g_orbs,g_tfock, 860 + r_zero,g_fock) 861 tester = testfock() 862 shift = 0.0d00 863 if (tester.gt.0.3d0) then 864 shift = 0.3d0 865 else 866 if (nbfn .gt. 60) then 867 shift = 0.1d0 868 else 869 shift = 0.0d0 870 endif 871 endif 872 if (iter.ge.2.and.shift.ne.0.0d00) then 873 call shiftfock(shift) 874 endif 875 call ga_copy(g_orbs,g_tfock) 876 call ga_diag_std_seq(g_fock, g_work, eigv) 877c 878c Back transform eigenvectors 879c 880 call ga_dgemm('n','n',nbfn,nbfn,nbfn,r_one,g_tfock,g_work, 881 + r_zero,g_orbs) 882 if (iter.ge.2.and.shift.ne.0.0d00) then 883 do 50 i = nocc+1, nbfn 884 eigv(i) = eigv(i) - shift 885 50 continue 886 endif 887#else 888c 889c Keep remaking overlap matrix since ga_diag_seq does not 890c guarantee that g_ident is preserved. 891c 892 call makoverlap 893 call ga_diag_seq(g_fock, g_ident, g_orbs, eigv) 894 tester = 0.0d00 895#endif 896 return 897 end 898c 899 subroutine makeob 900 implicit double precision (a-h, o-z) 901#include "cscf.h" 902#include "mafdecls.fh" 903#include "global.fh" 904#include "mp3def.fh" 905 double precision work(ichunk,ichunk),orbs(ichunk,ichunk) 906 double precision eval(maxnbfn) 907 integer lo(2),hi(2),ld,me,i,j,iloc,jloc 908 logical dotask, next_chunk 909c 910c generate set of orthonormal vectors by creating a random 911c symmetric matrix and solving associated generalized eigenvalue 912c problem using the correct overlap matrix. 913c 914 me = ga_nodeid() 915 call ga_zero(g_counter) 916 dotask = next_chunk(lo,hi) 917 ld = ichunk 918 do while (dotask) 919 do j = lo(2), hi(2) 920 jloc = j - lo(2) + 1 921 do i = lo(1), hi(1) 922 iloc = i - lo(1) + 1 923 work(iloc,jloc) = s(i,j) 924 orbs(iloc,jloc) = drand(0) 925 end do 926 end do 927 call nga_put(g_ident,lo,hi,work,ld) 928 call nga_put(g_fock,lo,hi,orbs,ld) 929 dotask = next_chunk(lo,hi) 930 end do 931 call ga_symmetrize(g_fock) 932 call ga_diag_seq(g_fock, g_ident, g_orbs, eval) 933c 934 return 935 end 936c 937 subroutine denges 938 implicit double precision (a-h, o-z) 939#include "cscf.h" 940#include "mafdecls.fh" 941#include "global.fh" 942#include "mp3def.fh" 943c 944c Form guess density from superposition of atomic densities in the AO 945c basis set ... instead of doing the atomic SCF hardwire for this 946c small basis set for the Be atom. 947c 948 integer one, itask, lo(2), hi(2), ld 949 dimension atdens(15,15) 950 data atdens/ 951 $ 0.000002,0.000027,0.000129,0.000428,0.000950,0.001180, 952 $ 0.000457,-0.000270,-0.000271,0.000004,0.000004,0.000004, 953 $ 0.000004,0.000004,0.000004,0.000027,0.000102,0.000987, 954 $ 0.003269,0.007254,0.009007,0.003492,-0.002099,-0.002108, 955 $ 0.000035,0.000035,0.000035,0.000035,0.000035,0.000035, 956 $ 0.000129,0.000987,0.002381,0.015766,0.034988,0.043433, 957 $ 0.016835,-0.010038,-0.010082,0.000166,0.000166,0.000166, 958 $ 0.000166,0.000166,0.000166,0.000428,0.003269,0.015766, 959 $ 0.026100,0.115858,0.144064,0.055967,-0.035878,-0.035990, 960 $ 0.000584,0.000584,0.000584,0.000584,0.000584,0.000584, 961 $ 0.000950,0.007254,0.034988,0.115858,0.128586,0.320120, 962 $ 0.124539,-0.083334,-0.083536,0.001346,0.001346,0.001346, 963 $ 0.001346,0.001346,0.001346,0.001180,0.009007,0.043433, 964 $ 0.144064,0.320120,0.201952,0.159935,-0.162762,-0.162267, 965 $ 0.002471,0.002471,0.002471,0.002471,0.002471,0.002471, 966 $ 0.000457,0.003492,0.016835,0.055967,0.124539,0.159935, 967 $ 0.032378,-0.093780,-0.093202,0.001372,0.001372,0.001372, 968 $ 0.001372,0.001372,0.001372,-0.000270,-0.002099,-0.010038, 969 $ -0.035878,-0.083334,-0.162762,-0.093780,0.334488,0.660918, 970 $ -0.009090,-0.009090,-0.009090,-0.009090,-0.009090,-0.009090, 971 $ -0.000271,-0.002108,-0.010082,-0.035990,-0.083536,-0.162267, 972 $ -0.093202,0.660918,0.326482,-0.008982,-0.008982,-0.008981, 973 $ -0.008981,-0.008981,-0.008982,0.000004,0.000035,0.000166, 974 $ 0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008982, 975 $ 0.000062,0.000124,0.000124,0.000124,0.000124,0.000124, 976 $ 0.000004,0.000035,0.000166,0.000584,0.001346,0.002471, 977 $ 0.001372,-0.009090,-0.008982,0.000124,0.000062,0.000124, 978 $ 0.000124,0.000124,0.000124,0.000004,0.000035,0.000166, 979 $ 0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008981, 980 $ 0.000124,0.000124,0.000062,0.000124,0.000124,0.000124, 981 $ 0.000004,0.000035,0.000166,0.000584,0.001346,0.002471, 982 $ 0.001372,-0.009090,-0.008981,0.000124,0.000124,0.000124, 983 $ 0.000062,0.000124,0.000124,0.000004,0.000035,0.000166, 984 $ 0.000584,0.001346,0.002471,0.001372,-0.009090,-0.008981, 985 $ 0.000124,0.000124,0.000124,0.000124,0.000062,0.000124, 986 $ 0.000004,0.000035,0.000166,0.000584,0.001346,0.002471, 987 $ 0.001372,-0.009090,-0.008982,0.000124,0.000124,0.000124, 988 $ 0.000124,0.000124,0.000062/ 989c 990c Create initial guess for density matrix in global array 991c 992 call ga_zero(g_dens) 993 call ga_zero(g_counter) 994 one = 1 995 ld = 15 996c 997c Correct for a factor of two along the diagonal 998c 999 do i = 1, ld 1000 atdens(i,i) = 2.0d00*atdens(i,i) 1001 end do 1002 itask = nga_read_inc(g_counter,one,one) 1003 do while(itask.lt.natom) 1004 ioff = itask*15 1005 lo(1) = ioff+1 1006 lo(2) = ioff+1 1007 hi(1) = ioff+15 1008 hi(2) = ioff+15 1009 call nga_put(g_dens,lo,hi,atdens,ld) 1010 itask = nga_read_inc(g_counter,one,one) 1011 end do 1012 call ga_sync 1013 return 1014 end 1015c 1016 subroutine setarrays 1017 implicit double precision (a-h, o-z) 1018#include "cscf.h" 1019#include "mafdecls.fh" 1020#include "global.fh" 1021#include "mp3def.fh" 1022 integer one, two, dims(2) 1023 logical status 1024 one = 1 1025 two = 2 1026 g_counter = ga_create_handle() 1027 call ga_set_data(g_counter,one,one,MT_INT) 1028 status = ga_allocate(g_counter) 1029 call ga_zero(g_counter) 1030 1031 dims(1) = nbfn 1032 dims(2) = nbfn 1033 g_dens = ga_create_handle() 1034 call ga_set_data(g_dens, two, dims, MT_DBL) 1035 status = ga_allocate(g_dens) 1036 call ga_zero(g_dens) 1037 1038 g_schwarz = ga_create_handle() 1039 call ga_set_data(g_schwarz, two, dims, MT_DBL) 1040 status = ga_allocate(g_schwarz) 1041 call ga_zero(g_schwarz) 1042 1043 g_fock = ga_create_handle() 1044 call ga_set_data(g_fock, two, dims, MT_DBL) 1045 status = ga_allocate(g_fock) 1046 call ga_zero(g_fock) 1047 1048 1049 g_tfock = ga_create_handle() 1050 call ga_set_data(g_tfock, two, dims, MT_DBL) 1051 status = ga_allocate(g_tfock) 1052 call ga_zero(g_tfock) 1053 1054 g_work = ga_create_handle() 1055 call ga_set_data(g_work, two, dims, MT_DBL) 1056 status = ga_allocate(g_work) 1057 call ga_zero(g_work) 1058 1059 g_ident = ga_create_handle() 1060 call ga_set_data(g_ident, two, dims, MT_DBL) 1061 status = ga_allocate(g_ident) 1062 call ga_zero(g_ident) 1063 1064 g_orbs = ga_create_handle() 1065 call ga_set_data(g_orbs, two, dims, MT_DBL) 1066 status = ga_allocate(g_orbs) 1067 call ga_zero(g_orbs) 1068 1069 return 1070 end 1071 subroutine closearrays 1072 implicit double precision (a-h, o-z) 1073#include "cscf.h" 1074#include "mafdecls.fh" 1075#include "global.fh" 1076#include "mp3def.fh" 1077 logical status 1078c 1079 status = ga_destroy(g_counter) 1080 status = ga_destroy(g_dens) 1081 status = ga_destroy(g_schwarz) 1082 status = ga_destroy(g_fock) 1083 status = ga_destroy(g_tfock) 1084 status = ga_destroy(g_work) 1085 status = ga_destroy(g_ident) 1086 status = ga_destroy(g_orbs) 1087c 1088 return 1089 end 1090c 1091 subroutine makoverlap 1092 implicit double precision (a-h, o-z) 1093#include "cscf.h" 1094#include "mafdecls.fh" 1095#include "global.fh" 1096#include "mp3def.fh" 1097 integer me, lo(2), hi(2), ptr, ld(2) 1098 integer ld1, ld2 1099 me = ga_nodeid() 1100 call nga_distribution(g_ident, me, lo, hi) 1101 call nga_access(g_ident, lo, hi, ptr, ld) 1102 ld1 = hi(1) - lo(1) + 1 1103 ld2 = hi(2) - lo(2) + 1 1104 call setoverlap(dbl_mb(ptr),lo,hi,ld1,ld2) 1105 call nga_release(g_ident) 1106 return 1107 end 1108c 1109 subroutine setoverlap(a,lo,hi,ld1,ld2) 1110#include "cscf.h" 1111#include "mafdecls.fh" 1112#include "global.fh" 1113#include "mp3def.fh" 1114 integer lo(2), hi(2) 1115 integer ld1, ld2, ii, jj 1116 double precision a(ld1,ld2) 1117 do i = 1, ld1 1118 ii = i + lo(1) - 1 1119 do j = 1, ld2 1120 jj = j + lo(2) - 1 1121#if USE_TRANSFORM 1122 if (ii.eq.jj) then 1123 a(i,j) = 1.0d00 1124 else 1125 a(i,j) = 0.0d00 1126 endif 1127#else 1128 a(i,j) = s(ii,jj) 1129#endif 1130 end do 1131 end do 1132 return 1133 end 1134c 1135 subroutine print_ga_block(g_a) 1136 implicit double precision(a-h,o-z) 1137#include "cscf.h" 1138#include "mafdecls.fh" 1139#include "global.fh" 1140#include "mp3def.fh" 1141 integer lo(2), hi(2), ptr, ld1, ld2 1142c 1143 me = ga_nodeid() 1144 call nga_distribution(g_a, me, lo, hi) 1145 ld1 = hi(1) - lo(1) + 1 1146 ld2 = hi(2) - lo(2) + 1 1147 call nga_access(g_a, lo, hi, ptr, ld) 1148 call dump_chunk(dbl_mb(ptr),ld1,ld2) 1149 call nga_release(g_a) 1150c 1151 return 1152 end 1153c 1154 subroutine print_ga_block_ij(g_a,tlo) 1155 implicit double precision(a-h,o-z) 1156#include "cscf.h" 1157#include "mafdecls.fh" 1158#include "global.fh" 1159#include "mp3def.fh" 1160 integer lo(2), hi(2), ptr, ld1, ld2 1161c 1162 me = ga_nodeid() 1163 call nga_distribution(g_a, me, lo, hi) 1164 ld1 = hi(1) - lo(1) + 1 1165 ld2 = hi(2) - lo(2) + 1 1166 call nga_access(g_a, tlo, hi, ptr, ld) 1167 call dump_chunk(dbl_mb(ptr),ld1,ld2) 1168 call nga_release(g_a) 1169c 1170 return 1171 end 1172c 1173 subroutine dump_chunk(a,ld1,ld2) 1174 implicit double precision (a-h, o-z) 1175#include "cscf.h" 1176#include "mafdecls.fh" 1177#include "global.fh" 1178#include "mp3def.fh" 1179 integer ld1, ld2 1180 double precision a(ld1, ld2) 1181 do i = 1, min(10,ld1) 1182 write(6,100) (a(i,j), j = 1, min(10,ld2)) 1183 end do 1184 write(6,*) 1185 trace = 0.0d0 1186 do i=1,ld2 1187 trace = trace +a(i,i) 1188 end do 1189 write(6,*) 'trace=',trace 1190 100 format(10f10.4) 1191 return 1192 end 1193c 1194 double precision function contract_matrices(g_a,g_b) 1195 implicit double precision(a-h,o-z) 1196#include "cscf.h" 1197#include "mafdecls.fh" 1198#include "global.fh" 1199#include "mp3def.fh" 1200 integer lo(2), hi(2), ptr_a, ptr_b, ld, ld1, ld2 1201 double precision a(ichunk,ichunk),b(ichunk,ichunk) 1202 double precision value 1203 logical dotask, next_chunk 1204c 1205c evalute sum_ij a_ij*b_ij 1206c 1207 value = 0.0d00 1208 call ga_zero(g_counter) 1209 dotask = next_chunk(lo,hi) 1210 ld = ichunk 1211 do while (dotask) 1212 call nga_get(g_a,lo,hi,a,ld) 1213 call nga_get(g_b,lo,hi,b,ld) 1214 do j = 1, hi(2)-lo(2)+1 1215 do i = 1, hi(1)-lo(1)+1 1216 value = value + a(i,j)*b(i,j) 1217 end do 1218 end do 1219 dotask = next_chunk(lo,hi) 1220 end do 1221 call ga_dgop(3,value,1,'+') 1222 contract_matrices=value 1223c 1224 return 1225 end 1226