1 subroutine qshell_sort 2c 3c$Id$ 4c 5c 6c Sort quadrature shells, most work to least, based on radius 7c (assuming largest radius has most angular pieces). 8c 9 implicit none 10#include "cdft.fh" 11 integer temp1, temp2, temp3, temp4 12 integer i, hsize 13c 14c build heap 15c 16 do i = nqshells/2, 1, -1 17 call heapify(nqshells, i) 18 enddo 19c 20c main part of sort algorithm 21c 22 hsize = nqshells 23 do i = nqshells, 2, -1 24c 25c swap element i and 1 26c 27 temp1 = iqshell(1,1) 28 temp2 = iqshell(2,1) 29 temp3 = iqshell(3,1) 30 temp4 = iqshell(4,1) 31c 32 iqshell(1,1) = iqshell(1,i) 33 iqshell(2,1) = iqshell(2,i) 34 iqshell(3,1) = iqshell(3,i) 35 iqshell(4,1) = iqshell(4,i) 36c 37 iqshell(1,i) = temp1 38 iqshell(2,i) = temp2 39 iqshell(3,i) = temp3 40 iqshell(4,i) = temp4 41c 42c maintain heap property from element 1 down 43c 44 hsize= hsize - 1 45 call heapify(hsize, 1) 46c 47 enddo 48 return 49 end 50 subroutine heapify(n, elem) 51c 52c establish heap property for a tree branch rooted at elem 53c 54 implicit none 55c 56#include "cdft.fh" 57c 58 integer n, elem 59 integer left, right, smallest, i 60 integer temp1, temp2, temp3, temp4 61 integer ictr_left, irsh_left, ictr_i, irsh_i 62 double precision rpts_left, rpts_i 63 integer ictr_right, irsh_right, ictr_smallest, irsh_smallest 64 double precision rpts_right, rpts_smallest 65c 66 i = elem 67c 68c Main Loop 69c 70100 continue 71 left = 2*i 72 right = 2*i + 1 73 if (left. gt. n .and. right .gt. n) return !we traversed entire branch 74c 75c check heap property among element i and its children 76c 77 ictr_left = iatype(iqshell(3,left)) 78 irsh_left = iqshell(1,left) 79 rpts_left = rpts(irsh_left,ictr_left) 80 ictr_i = iatype(iqshell(3,i)) 81 irsh_i = iqshell(1,i) 82 rpts_i = rpts(irsh_i,ictr_i) 83 if (left .le. n .and. rpts_left .lt. rpts_i) then 84 smallest = left 85 else 86 smallest = i 87 endif 88 ictr_right = iatype(iqshell(3,right)) 89 irsh_right = iqshell(1,right) 90 rpts_right = rpts(irsh_right,ictr_right) 91 ictr_smallest = iatype(iqshell(3,smallest)) 92 irsh_smallest = iqshell(1,smallest) 93 rpts_smallest = rpts(irsh_smallest,ictr_smallest) 94 if (right .le. n .and. rpts_right .lt. rpts_smallest) 95 & smallest = right 96c 97 if (smallest .ne. i) then 98c 99c swap array elements if smallest is not i 100c 101 temp1 = iqshell(1,i) 102 temp2 = iqshell(2,i) 103 temp3 = iqshell(3,i) 104 temp4 = iqshell(4,i) 105c 106 iqshell(1,i) = iqshell(1,smallest) 107 iqshell(2,i) = iqshell(2,smallest) 108 iqshell(3,i) = iqshell(3,smallest) 109 iqshell(4,i) = iqshell(4,smallest) 110c 111 iqshell(1,smallest) = temp1 112 iqshell(2,smallest) = temp2 113 iqshell(3,smallest) = temp3 114 iqshell(4,smallest) = temp4 115c 116c traverse down the tree 117c 118 i = smallest 119 goto 100 120 endif 121 return 122 end 123 Subroutine mbf_ao_max(rtdb, rq0, zprim, coord) 124c 125C$Id$ 126c 127 implicit none 128#include "errquit.fh" 129c 130#include "rtdb.fh" 131#include "mafdecls.fh" 132#include "global.fh" 133#include "msgids.fh" 134#include "stdio.fh" 135#include "cdft.fh" 136#include "bas.fh" 137c 138 integer rtdb 139c 140c Cartesian Coordinates of Integration Center 141c 142 double precision coord(3,ncenters) 143c 144c Compute the quadrature points for a given 145c set of radial shells. 146c 147 integer irsh, iang, iqsh, l, ia_ictr 148 double precision r 149c 150c Distance Squared between Sampling Points and Centers 151c 152 double precision rq0(ncenters) 153c 154 double precision zprim(nbf_ao_mxprim) 155 integer ncontrset, n1, icset, ictr, itype, nprimo, ncontr, 156 & isphere, nbf_ang, iprimo 157 double precision zmin 158 double precision acc_AO_gauss 159 integer mbf, nq, mbfnq 160 integer m, me,nproc 161 integer avail 162 double precision x, y, z, r2 163 integer nxtask,nt1,nt2 164 external nxtask 165c 166c determine node id 167c 168 me = ga_nodeid() 169 nproc=ga_nnodes() 170c 171c 172c Loop over all the radial shells. 173c 174 acc_AO_gauss = dble(iAOacc) 175 max_mbf = 0 176 max_pr_mbf = 0 177 max_pr_nq = 0 178 max_pr_mbfnq = 0 179 nt1 = 0 180 nt2 = nxtask(nproc,1) 181 do 70 iqsh = 1, nqshells 182 if(nt1.eq.nt2) then 183c 184 irsh = iqshell(1,iqsh) 185 ictr = iqshell(3,iqsh) 186 iang = iqshell(4,iqsh) 187c 188 ia_ictr = iatype(ictr) 189 r = rpts(irsh,ia_ictr) 190c 191 nq = 0 192c 193 if (leb) then 194c 195 nq=nq+ntheta(iang) 196 else 197 nq=nq+ntheta(iang)*nphi(iang) 198c 199 endif 200 do 40 m = 1, ncenters 201 x = coord(1,ictr) - coord(1,m) 202 y = coord(2,ictr) - coord(2,m) 203 z = coord(3,ictr) - coord(3,m) 204 r2 = sqrt(x*x + y*y + z*z) 205 rq0(m)=(r2-r)*(r2-r) 206 40 continue 207c 208 if (.not.bas_numcont(AO_bas_han, ncontrset)) 209 & call errquit('Exiting in mbf_ao_max.',1, BASIS_ERR) 210c 211 n1 = 0 212c 213 do 60 icset = 1,ncontrset 214 if (.not.bas_cn2ce(AO_bas_han, icset, ictr)) 215 & call errquit('Exiting in mbf_ao_max.',2, BASIS_ERR) 216c 217c get info about current contraction set 218c 219 if (.not.bas_continfo(AO_bas_han, icset, 220 & itype, nprimo, ncontr, isphere)) 221 & call errquit('Exiting in mbf_ao_max.',3, BASIS_ERR) 222c 223c angular momentum 224c 225 l = 0 226 if (itype .lt. 0)then 227#if 0 228 call errquit('mbf_ao_max: sp-type orbital not coded', 5, 229 & BASIS_ERR) 230#else 231 l=1 232#endif 233 else 234 l = itype 235 endif 236c 237c cartesian/spherical harmonic 238c 239 nbf_ang = 0 240 if (isphere .eq. 0)then ! cartesian set 241 nbf_ang = (l+1)*(l+2)/2 242 elseif (isphere .eq. 1)then ! spherical harmonic 243 nbf_ang = 2*l+1 244 else 245 call errquit('mbf_ao_max: illegal isphere value', 6, 246 & BASIS_ERR) 247 endif 248c 249c get exponents and contraction coefficients for this contraction set 250c 251 if (.not.bas_get_exponent(AO_bas_han, icset, zprim)) 252 & call errquit('Exiting in mbf_ao_max.',7, BASIS_ERR) 253c 254c Determine the minimum Gaussian exponent. 255c 256 zmin = 1.D+06 257 do 50 iprimo = 1,nprimo 258 zmin = min(zprim(iprimo),zmin) 259 50 continue 260c 261c Only include those basis functions that are "non-zero" for at least one 262c point in the sampling set. 263c 264 if (zmin*rq0(ictr).gt.acc_AO_gauss)goto 60 265 if (l.eq.0)then 266c 267c =============> S Contractions <============= 268c 269 n1 = n1 + ncontr 270 elseif (l.eq.1)then 271c 272c =============> P Contractions <============= 273c 274 n1 = n1+ncontr*3 275 elseif (l.eq.2)then 276c 277c =============> D Contractions <============= 278c 279 n1 = n1 + ncontr*6 280 else 281c 282c =============> General Case <============= 283c 284 n1 = n1 + ncontr*nbf_ang 285 endif 286c 287 60 continue 288c 289 mbf = n1 290c 291c need to determine max_mbf and max(mbf,nq) pair 292c 293 if (mbf.gt.max_mbf)then 294 max_mbf = mbf 295 endif 296 mbfnq = mbf*nq 297 if (mbfnq.gt.max_pr_mbfnq)then 298 max_pr_mbfnq = mbfnq 299 max_pr_mbf = mbf 300 max_pr_nq = nq 301 endif 302 303c 304 nt1 = nt1 + 1 305 nt2 = nxtask(nproc,1) 306 else 307 nt1 = nt1 + 1 308 endif 309c 310c 311 70 continue 312 nt2 = nxtask(-nproc,1) 313 call ga_igop(dft_mxmbfnq, max_pr_mbfnq, 1, 'max') 314 call ga_igop(dft_mxmbf, max_mbf, 1, 'max') 315c 316c This can be further reduced by blocking the computed 317c angular grid. 318c 319c Assume we want to use no more than 1/3 of physical memory (stack + heap) for 320c the quadrature. So, lets put chi(nq,mbf) and delchi(nq,3,mbf) 321c in roughly 1/3 - 8Mb by chunking up nq. 322c 323c find - (minimum)amount local available memory on all nodes 324c 325 call ga_sync 326 avail = MA_inquire_avail(mt_dbl) 327 call ga_igop(msg_min_stack_avail, avail, 1, 'min') 328 329c write(6,*)' avail = ',avail 330 avail = avail/3 331c write(6,*)' avail/3 = ',avail 332 avail = avail - 1024*1024 333 if(avail.lt.0) 334 & call errquit('xc_setquad: out of memory',avail, MEM_ERR) 335c write(6,*)' amt to be used for xc = ',avail 336c 337 if ( (4*max_pr_mbfnq) .gt. avail )then 338 nq_chunk = avail/4/max_mbf 339c 340c redefine max_pr_mbfnq 341c 342 max_pr_mbfnq = max_mbf*nq_chunk 343c 344c reset store_wght to false (not working yet) 345c 346 store_wght=.false. 347 else 348c 349c everything fits no chunking necessary 350c 351 nq_chunk = 0 352c 353c redefine nq_task 354c 355Cedo if(nquad_task.eq.1) then 356 if (.not. rtdb_get(rtdb, 'dft:nquad_task', mt_int, 1, 357 & nquad_task))then 358 nquad_task = min(avail/2/(4*max_pr_mbfnq),6) 359 if(nquad_task.lt.1) nquad_task=1 360 nqmax=nqmax*nquad_task 361 if (.not. rtdb_put(rtdb, 'dft:nquad_task', mt_int, 1, 362 & nquad_task)) 363 & call errquit('mbf_ao_max: rtdb_put failed', 911, 364 & RTDB_ERR) 365 endif 366 367 endif 368c write(6,*)' nq_chunk = ',nq_chunk 369 if (rtdb_get(rtdb, 'dft:nq_chunk', mt_int, 1, nq_chunk))then 370 if (me.eq.0)write(LuOut,*)' nq_chunk input override= ',nq_chunk 371c 372c redefine max_pr_mbfnq 373c 374 max_pr_mbfnq = max_mbf*nq_chunk 375 endif 376c 377c check nquad_task (if .ne. 1 makes no sense with chunking turned on ... reset) 378c 379 if (nq_chunk.ne.0)then 380 if (nquad_task.gt.1)then 381 nquad_task = 1 382c 383 endif 384 endif 385 if (.not. rtdb_put(rtdb, 'dft:nquad_task', mt_int, 1, 386 & nquad_task)) 387 & call errquit('mbf_ao_max: rtdb_put failed', 911, 388 & RTDB_ERR) 389c 390c write(6,*)' iAOacc, acc_AO_gauss: ',iAOacc, acc_AO_gauss 391c write(6,*)' max_mbf, max_pr_mbfnq, max_pr_mbf, max_pr_nq: ', 392c & max_mbf, max_pr_mbfnq, max_pr_mbf, max_pr_nq 393c 394 return 395 end 396 double precision function dft_gaussian_range(n, alpha, eps) 397 implicit none 398c 399 integer n 400 double precision alpha, eps 401c 402c Return an approximation to the outer solution of 403c . r^n*exp(-ar^2) = eps 404c . r = (n*ln(-ln(eps)) - n*ln(a) - 4*ln(eps)) / 405c . 4*sqrt(-alpha*ln(eps)) 406c 407c Accuracy improves with smaller eps. 408c 409 double precision logeps 410c 411 logeps = log(eps) 412c 413 dft_gaussian_range = 414 $ (n*log(-logeps) - n*log(alpha) - 4.0d0*logeps) / 415 $ sqrt(-16.0d0*alpha*logeps) 416c 417 end 418 419 double precision function r_neglected(k, alpha, eps) 420 implicit none 421c 422 integer k 423 double precision alpha, eps 424c 425c For a function f(r) = r^k*exp(-alpha*r^2) determine 426c the radial distance r such that the fraction of the 427c function norm that is neglected if the 3D volume 428c integration is terminated at a distance r is less 429c than or equal to eps. 430c 431 double precision r, test, neglected, step 432c 433 step = 0.5d0 434 r = 1.0d0 435 10 test = neglected(k,alpha,r) 436 if (test .gt. eps) then 437 r = r + step 438 else 439 r = r - step 440 if (r .lt. 0.0d0) r = 0.0d0 441 step = step*0.5d0 442 r = r + step 443 endif 444 if (step .gt. 0.01d0) goto 10 445c 446 r_neglected = r 447c 448 end 449 double precision function neglected(k,alpha,r) 450 implicit none 451c 452 integer k 453 double precision alpha, r 454c 455c For a function f(r) = r^k*exp(-alpha*r^2) determine 456c the fraction of the function norm that is neglected 457c if the 3D volume integration is terminated at a 458c distance r. 459c 460c neglected = int(t^2*f(t),t=r..infinity)/int(t^2*f(t),t=0..infinity) 461c 462 double precision ik 463c 464 neglected = ik(k+2,alpha,r)/ik(k+2,alpha,0.0d0) 465c 466 end 467 double precision function ik(k,alpha,r) 468 implicit none 469c 470 integer k 471 double precision alpha, r 472c 473c I(k) = int(t^k exp(-alpha*t^2), t=0..infinity) 474c 475c I(k) = [(k-1)*I(k-2) + r^(k-1)*exp(-alpha*r^2)]/(2*alpha) 476c 477 integer i, ilo 478 double precision value 479#if defined(SGI) || defined(WIN32) ||defined(LINUX) 480 double precision derfc 481#else 482 double precision erfc 483#endif 484c 485 ilo = mod(k,2) 486c 487 if (ilo .eq. 0) then 488#if defined(SGI) || defined(WIN32) ||defined(LINUX) 489 value = 0.5d0*sqrt(4.0d0*atan(1.0d0)/alpha)* 490 $ derfc(sqrt(alpha)*r) 491#else 492 value = 0.5d0*sqrt(4.0d0*atan(1.0d0)/alpha)* 493 $ erfc(sqrt(alpha)*r) 494#endif 495 else 496 value = exp(-alpha*r*r)/(2.0d0*alpha) 497 endif 498c 499 do i = ilo+2,k,2 500 value = ((i-1)*value + r**(i-1)*exp(-alpha*r*r))/(2.0d0*alpha) 501 enddo 502c 503 ik = value 504c 505 end 506 507 Subroutine grid_repack(xyzw, qxyz, qwght, nq,rad,istep) 508c 509C$Id$ 510c 511 implicit none 512c 513 integer nq 514 double precision xyzw(4,nq), qxyz(3,nq), qwght(nq), 515 . rad 516c 517 integer n,istep 518c 519 istep=istep+1 520 nq=dble(xyzw(1,istep)) 521 rad=xyzw(2,istep) 522 do 30 n = 1, nq 523c 524 qxyz(1,n) = xyzw(1,n+istep) 525 qxyz(2,n) = xyzw(2,n+istep) 526 qxyz(3,n) = xyzw(3,n+istep) 527c 528 qwght(n) = xyzw(4,n+istep) 529c 530! write(0,'(A,i4,4F16.9)') 531! . ' RE ',n+istep, 532! , qxyz(1,n),qxyz(2,n),qxyz(3,n),qwght(n) 533 30 continue 534 istep=istep+nq 535c write(6,*)' repacked buffer ' 536 return 537 end 538