1      subroutine qshell_sort
2c
3c$Id$
4c
5c
6c     Sort quadrature shells, most work to least, based on radius
7c     (assuming largest radius has most angular pieces).
8c
9      implicit none
10#include "cdft.fh"
11      integer temp1, temp2, temp3, temp4
12      integer i, hsize
13c
14c     build heap
15c
16      do i = nqshells/2, 1, -1
17         call heapify(nqshells, i)
18      enddo
19c
20c     main part of sort algorithm
21c
22      hsize = nqshells
23      do i    = nqshells, 2, -1
24c
25c        swap element i and 1
26c
27         temp1 = iqshell(1,1)
28         temp2 = iqshell(2,1)
29         temp3 = iqshell(3,1)
30         temp4 = iqshell(4,1)
31c
32         iqshell(1,1) = iqshell(1,i)
33         iqshell(2,1) = iqshell(2,i)
34         iqshell(3,1) = iqshell(3,i)
35         iqshell(4,1) = iqshell(4,i)
36c
37         iqshell(1,i) = temp1
38         iqshell(2,i) = temp2
39         iqshell(3,i) = temp3
40         iqshell(4,i) = temp4
41c
42c        maintain heap property from element 1 down
43c
44         hsize= hsize - 1
45         call heapify(hsize, 1)
46c
47      enddo
48      return
49      end
50      subroutine heapify(n, elem)
51c
52c     establish heap property for a tree branch rooted at elem
53c
54      implicit none
55c
56#include "cdft.fh"
57c
58      integer n, elem
59      integer left, right, smallest, i
60      integer temp1, temp2, temp3, temp4
61      integer ictr_left, irsh_left, ictr_i, irsh_i
62      double precision rpts_left, rpts_i
63      integer ictr_right, irsh_right, ictr_smallest, irsh_smallest
64      double precision rpts_right, rpts_smallest
65c
66      i = elem
67c
68c     Main Loop
69c
70100   continue
71        left  = 2*i
72        right = 2*i + 1
73        if (left. gt. n .and. right .gt. n) return   !we traversed entire branch
74c
75c       check heap property among element i and its children
76c
77        ictr_left = iatype(iqshell(3,left))
78        irsh_left = iqshell(1,left)
79        rpts_left = rpts(irsh_left,ictr_left)
80        ictr_i = iatype(iqshell(3,i))
81        irsh_i = iqshell(1,i)
82        rpts_i = rpts(irsh_i,ictr_i)
83        if (left .le. n .and. rpts_left .lt. rpts_i) then
84           smallest = left
85        else
86           smallest = i
87        endif
88        ictr_right = iatype(iqshell(3,right))
89        irsh_right = iqshell(1,right)
90        rpts_right = rpts(irsh_right,ictr_right)
91        ictr_smallest = iatype(iqshell(3,smallest))
92        irsh_smallest = iqshell(1,smallest)
93        rpts_smallest = rpts(irsh_smallest,ictr_smallest)
94        if (right .le. n .and. rpts_right .lt. rpts_smallest)
95     &     smallest = right
96c
97        if (smallest .ne. i) then
98c
99c          swap array elements if smallest is not i
100c
101           temp1 = iqshell(1,i)
102           temp2 = iqshell(2,i)
103           temp3 = iqshell(3,i)
104           temp4 = iqshell(4,i)
105c
106           iqshell(1,i) = iqshell(1,smallest)
107           iqshell(2,i) = iqshell(2,smallest)
108           iqshell(3,i) = iqshell(3,smallest)
109           iqshell(4,i) = iqshell(4,smallest)
110c
111           iqshell(1,smallest) = temp1
112           iqshell(2,smallest) = temp2
113           iqshell(3,smallest) = temp3
114           iqshell(4,smallest) = temp4
115c
116c          traverse down the tree
117c
118           i = smallest
119      goto 100
120      endif
121      return
122      end
123      Subroutine mbf_ao_max(rtdb,  rq0, zprim, coord)
124c
125C$Id$
126c
127      implicit none
128#include "errquit.fh"
129c
130#include "rtdb.fh"
131#include "mafdecls.fh"
132#include "global.fh"
133#include "msgids.fh"
134#include "stdio.fh"
135#include "cdft.fh"
136#include "bas.fh"
137c
138      integer rtdb
139c
140c     Cartesian Coordinates of Integration Center
141c
142      double precision coord(3,ncenters)
143c
144c     Compute the quadrature points for a given
145c     set of radial shells.
146c
147      integer irsh, iang, iqsh,  l, ia_ictr
148      double precision r
149c
150c     Distance Squared between Sampling Points and Centers
151c
152      double precision rq0(ncenters)
153c
154      double precision zprim(nbf_ao_mxprim)
155      integer ncontrset, n1, icset, ictr, itype, nprimo, ncontr,
156     &        isphere, nbf_ang, iprimo
157      double precision zmin
158      double precision acc_AO_gauss
159      integer mbf, nq, mbfnq
160      integer m, me,nproc
161      integer avail
162      double precision x, y, z, r2
163      integer nxtask,nt1,nt2
164      external nxtask
165c
166c     determine node id
167c
168      me = ga_nodeid()
169      nproc=ga_nnodes()
170c
171c
172c     Loop over all the radial shells.
173c
174      acc_AO_gauss = dble(iAOacc)
175      max_mbf = 0
176      max_pr_mbf = 0
177      max_pr_nq = 0
178      max_pr_mbfnq = 0
179      nt1 = 0
180      nt2 = nxtask(nproc,1)
181      do 70 iqsh = 1, nqshells
182        if(nt1.eq.nt2) then
183c
184         irsh = iqshell(1,iqsh)
185         ictr = iqshell(3,iqsh)
186         iang = iqshell(4,iqsh)
187c
188         ia_ictr = iatype(ictr)
189         r = rpts(irsh,ia_ictr)
190c
191         nq = 0
192c
193         if (leb) then
194c
195           nq=nq+ntheta(iang)
196         else
197           nq=nq+ntheta(iang)*nphi(iang)
198c
199         endif
200         do 40 m = 1, ncenters
201               x = coord(1,ictr) - coord(1,m)
202               y = coord(2,ictr) - coord(2,m)
203               z = coord(3,ictr) - coord(3,m)
204               r2 = sqrt(x*x + y*y + z*z)
205               rq0(m)=(r2-r)*(r2-r)
206   40    continue
207c
208         if (.not.bas_numcont(AO_bas_han, ncontrset))
209     &      call errquit('Exiting in mbf_ao_max.',1, BASIS_ERR)
210c
211         n1 = 0
212c
213         do 60 icset = 1,ncontrset
214            if (.not.bas_cn2ce(AO_bas_han, icset, ictr))
215     &         call errquit('Exiting in mbf_ao_max.',2, BASIS_ERR)
216c
217c           get info about current contraction set
218c
219            if (.not.bas_continfo(AO_bas_han, icset,
220     &         itype, nprimo, ncontr, isphere))
221     &         call errquit('Exiting in mbf_ao_max.',3, BASIS_ERR)
222c
223c           angular momentum
224c
225            l = 0
226            if (itype .lt. 0)then
227#if 0
228               call errquit('mbf_ao_max: sp-type orbital not coded', 5,
229     &       BASIS_ERR)
230#else
231               l=1
232#endif
233            else
234               l = itype
235            endif
236c
237c           cartesian/spherical harmonic
238c
239            nbf_ang = 0
240            if (isphere .eq. 0)then !  cartesian set
241               nbf_ang = (l+1)*(l+2)/2
242            elseif (isphere .eq. 1)then !  spherical harmonic
243               nbf_ang = 2*l+1
244            else
245               call errquit('mbf_ao_max: illegal isphere value', 6,
246     &       BASIS_ERR)
247            endif
248c
249c           get exponents and contraction coefficients for this contraction set
250c
251            if (.not.bas_get_exponent(AO_bas_han, icset, zprim))
252     &         call errquit('Exiting in mbf_ao_max.',7, BASIS_ERR)
253c
254c           Determine the minimum Gaussian exponent.
255c
256            zmin = 1.D+06
257            do 50 iprimo = 1,nprimo
258               zmin = min(zprim(iprimo),zmin)
259   50       continue
260c
261c           Only include those basis functions that are "non-zero" for at least one
262c           point in the sampling set.
263c
264            if (zmin*rq0(ictr).gt.acc_AO_gauss)goto 60
265            if (l.eq.0)then
266c
267c              =============>  S Contractions  <=============
268c
269               n1 = n1 + ncontr
270            elseif (l.eq.1)then
271c
272c              =============>  P Contractions  <=============
273c
274               n1 = n1+ncontr*3
275            elseif (l.eq.2)then
276c
277c              =============>  D Contractions  <=============
278c
279               n1 = n1 + ncontr*6
280            else
281c
282c              =============>  General Case  <=============
283c
284               n1 = n1 + ncontr*nbf_ang
285            endif
286c
287  60     continue
288c
289         mbf = n1
290c
291c        need to determine max_mbf and max(mbf,nq) pair
292c
293         if (mbf.gt.max_mbf)then
294            max_mbf = mbf
295         endif
296         mbfnq = mbf*nq
297         if (mbfnq.gt.max_pr_mbfnq)then
298            max_pr_mbfnq = mbfnq
299            max_pr_mbf = mbf
300            max_pr_nq = nq
301         endif
302
303c
304            nt1 = nt1 + 1
305            nt2 = nxtask(nproc,1)
306         else
307            nt1 = nt1 + 1
308         endif
309c
310c
311   70 continue
312      nt2 = nxtask(-nproc,1)
313      call ga_igop(dft_mxmbfnq, max_pr_mbfnq, 1, 'max')
314      call ga_igop(dft_mxmbf, max_mbf, 1, 'max')
315c
316c     This can be further reduced by blocking the computed
317c     angular grid.
318c
319c     Assume we want to use no more than 1/3 of physical memory (stack + heap) for
320c     the quadrature.  So, lets put chi(nq,mbf) and delchi(nq,3,mbf)
321c     in roughly 1/3 - 8Mb by chunking up nq.
322c
323c     find - (minimum)amount local available memory on all nodes
324c
325      call ga_sync
326      avail = MA_inquire_avail(mt_dbl)
327      call ga_igop(msg_min_stack_avail, avail, 1, 'min')
328
329c      write(6,*)' avail = ',avail
330      avail = avail/3
331c      write(6,*)' avail/3 = ',avail
332      avail = avail - 1024*1024
333      if(avail.lt.0)
334     &         call errquit('xc_setquad: out of memory',avail, MEM_ERR)
335c      write(6,*)' amt to be used for xc = ',avail
336c
337      if ( (4*max_pr_mbfnq) .gt. avail )then
338         nq_chunk = avail/4/max_mbf
339c
340c        redefine max_pr_mbfnq
341c
342         max_pr_mbfnq = max_mbf*nq_chunk
343c
344c        reset store_wght to false (not working yet)
345c
346         store_wght=.false.
347      else
348c
349c        everything fits no chunking necessary
350c
351         nq_chunk = 0
352c
353c        redefine nq_task
354c
355Cedo         if(nquad_task.eq.1) then
356          if (.not. rtdb_get(rtdb, 'dft:nquad_task', mt_int, 1,
357     &        nquad_task))then
358          nquad_task = min(avail/2/(4*max_pr_mbfnq),6)
359          if(nquad_task.lt.1) nquad_task=1
360           nqmax=nqmax*nquad_task
361           if (.not. rtdb_put(rtdb, 'dft:nquad_task', mt_int, 1,
362     &          nquad_task))
363     &          call errquit('mbf_ao_max: rtdb_put failed', 911,
364     &       RTDB_ERR)
365         endif
366
367      endif
368c      write(6,*)' nq_chunk = ',nq_chunk
369      if (rtdb_get(rtdb, 'dft:nq_chunk', mt_int, 1, nq_chunk))then
370         if (me.eq.0)write(LuOut,*)' nq_chunk input override= ',nq_chunk
371c
372c        redefine max_pr_mbfnq
373c
374         max_pr_mbfnq = max_mbf*nq_chunk
375      endif
376c
377c     check nquad_task (if .ne. 1 makes no sense with chunking turned on ... reset)
378c
379      if (nq_chunk.ne.0)then
380         if (nquad_task.gt.1)then
381            nquad_task = 1
382c
383         endif
384      endif
385      if (.not. rtdb_put(rtdb, 'dft:nquad_task', mt_int, 1,
386     &     nquad_task))
387     &     call errquit('mbf_ao_max: rtdb_put failed', 911,
388     &       RTDB_ERR)
389c
390c      write(6,*)' iAOacc, acc_AO_gauss: ',iAOacc, acc_AO_gauss
391c      write(6,*)' max_mbf, max_pr_mbfnq, max_pr_mbf, max_pr_nq: ',
392c     &            max_mbf, max_pr_mbfnq, max_pr_mbf, max_pr_nq
393c
394      return
395      end
396      double precision function dft_gaussian_range(n, alpha, eps)
397      implicit none
398c
399      integer n
400      double precision alpha, eps
401c
402c     Return an approximation to the outer solution of
403c     .     r^n*exp(-ar^2) = eps
404c     .     r = (n*ln(-ln(eps)) - n*ln(a) - 4*ln(eps)) /
405c     .         4*sqrt(-alpha*ln(eps))
406c
407c     Accuracy improves with smaller eps.
408c
409      double precision logeps
410c
411      logeps = log(eps)
412c
413      dft_gaussian_range =
414     $     (n*log(-logeps) - n*log(alpha) - 4.0d0*logeps) /
415     $     sqrt(-16.0d0*alpha*logeps)
416c
417      end
418
419      double precision function r_neglected(k, alpha, eps)
420      implicit none
421c
422      integer k
423      double precision alpha, eps
424c
425c     For a function f(r) = r^k*exp(-alpha*r^2) determine
426c     the radial distance r such that the fraction of the
427c     function norm that is neglected if the 3D volume
428c     integration is terminated at a distance r is less
429c     than or equal to eps.
430c
431      double precision r, test, neglected, step
432c
433      step = 0.5d0
434      r = 1.0d0
435 10   test = neglected(k,alpha,r)
436      if (test .gt. eps) then
437         r = r + step
438      else
439         r = r - step
440         if (r .lt. 0.0d0) r = 0.0d0
441         step = step*0.5d0
442         r = r + step
443      endif
444      if (step .gt. 0.01d0) goto 10
445c
446      r_neglected = r
447c
448      end
449      double precision function neglected(k,alpha,r)
450      implicit none
451c
452      integer k
453      double precision alpha, r
454c
455c     For a function f(r) = r^k*exp(-alpha*r^2) determine
456c     the fraction of the function norm that is neglected
457c     if the 3D volume integration is terminated at a
458c     distance r.
459c
460c     neglected = int(t^2*f(t),t=r..infinity)/int(t^2*f(t),t=0..infinity)
461c
462      double precision ik
463c
464      neglected = ik(k+2,alpha,r)/ik(k+2,alpha,0.0d0)
465c
466      end
467      double precision function ik(k,alpha,r)
468      implicit none
469c
470      integer k
471      double precision alpha, r
472c
473c     I(k) = int(t^k exp(-alpha*t^2), t=0..infinity)
474c
475c     I(k) = [(k-1)*I(k-2) + r^(k-1)*exp(-alpha*r^2)]/(2*alpha)
476c
477      integer i, ilo
478      double precision value
479#if defined(SGI) || defined(WIN32) ||defined(LINUX)
480      double precision derfc
481#else
482      double precision erfc
483#endif
484c
485      ilo = mod(k,2)
486c
487      if (ilo .eq. 0) then
488#if defined(SGI) || defined(WIN32) ||defined(LINUX)
489         value = 0.5d0*sqrt(4.0d0*atan(1.0d0)/alpha)*
490     $        derfc(sqrt(alpha)*r)
491#else
492         value = 0.5d0*sqrt(4.0d0*atan(1.0d0)/alpha)*
493     $        erfc(sqrt(alpha)*r)
494#endif
495      else
496         value = exp(-alpha*r*r)/(2.0d0*alpha)
497      endif
498c
499      do i = ilo+2,k,2
500         value = ((i-1)*value + r**(i-1)*exp(-alpha*r*r))/(2.0d0*alpha)
501      enddo
502c
503      ik = value
504c
505      end
506
507      Subroutine grid_repack(xyzw, qxyz, qwght, nq,rad,istep)
508c
509C$Id$
510c
511      implicit none
512c
513      integer nq
514      double precision xyzw(4,nq), qxyz(3,nq), qwght(nq),
515     .     rad
516c
517      integer n,istep
518c
519      istep=istep+1
520      nq=dble(xyzw(1,istep))
521      rad=xyzw(2,istep)
522      do 30 n = 1, nq
523c
524        qxyz(1,n) = xyzw(1,n+istep)
525        qxyz(2,n) = xyzw(2,n+istep)
526        qxyz(3,n) = xyzw(3,n+istep)
527c
528        qwght(n) = xyzw(4,n+istep)
529c
530!            write(0,'(A,i4,4F16.9)')
531!     .           ' RE ',n+istep,
532!     ,       qxyz(1,n),qxyz(2,n),qxyz(3,n),qwght(n)
533   30 continue
534      istep=istep+nq
535c      write(6,*)' repacked buffer '
536      return
537      end
538