1      logical function dft_mem3c(rtdb,
2     I     natoms,npol,oprint_parm,oprint_3c2e,
3     O     n3c_int,n3c_dbl,
4     O     l_3ceri,k_3ceri, l_3cwhat,k_3cwhat)
5* $Id$
6      implicit none
7#include "errquit.fh"
8#include "global.fh"
9#include "mafdecls.fh"
10#include "stdio.fh"
11#include "msgids.fh"
12#include "cdft.fh"
13#include "basP.fh"
14#include "bas.fh"
15#include "rtdb.fh"
16#include "util.fh"
17      integer rtdb   ! [in]
18      integer natoms ! [in]
19      integer npol ! [in]
20      integer n3c_int ! [out]
21      integer n3c_dbl ! [out]
22      logical oprint_parm,oprint_3c2e ![in]
23      integer l_3ceri,k_3ceri ! [out]
24      integer l_3cwhat,k_3cwhat ! [out]
25      integer max_component,max_elem_ang_scr
26      integer nscr
27      integer avail,availm
28      double precision availm_r
29      integer dft_n3cint
30      external dft_n3cint
31      double precision dft_n3cdbl
32      integer dft_nao2_max
33      external dft_n3cdbl,dft_nao2_max
34      double precision n3c_dbl_r,n3c_dbl_max
35      integer nao2_max
36      integer nshpair_sum
37      integer me,nproc,icount
38      integer maxg,scrmx
39      integer ishc,itype,nprimo,nshbfc,isphere
40      integer deficit,atom_c,sh_lo_c,sh_hi_c
41      logical spherical_ao
42      integer n3c_int_in,n3c_dbl_in
43c
44      me=ga_nodeid()
45      nproc=ga_nnodes()
46      dft_mem3c=.true.
47c
48c     Determine how big a buffer can be allocated to 3-center
49c     2e- integrals.
50c
51c     - amount needed for all incore:
52c
53      n3c_dbl_r = dft_n3cdbl()
54      n3c_int = dft_n3cint()
55c     either read no. of batches or make it at least 1000
56      if(.not.rtdb_get(rtdb,'dft:n3c_int',mt_int,1,n3c_int_in))
57     N     n3c_int_in=1000
58      n3c_int=max(n3c_int,n3c_int_in)
59c     either read no. of batches or make it at least 1000
60      if(.not.rtdb_get(rtdb,'dft:n3c_dbl',mt_int,1,n3c_dbl_in))
61     N     n3c_dbl_in=1000
62      n3c_dbl_r=max(n3c_dbl_r,1d0*n3c_dbl_in)
63      n3c_dbl_max=n3c_dbl_r
64c     need to find max among all procs since n3cdbl might be different
65      call ga_dgop(19641964, n3c_dbl_max, 1, 'max')
66c
67c     find - (minimum)amount local available memory on all nodes
68c
69      avail = MA_inquire_avail(mt_dbl)
70      call ga_igop(msg_min_stack_avail, avail, 1, 'min')
71c
72c     estimate and subtract off amount needed for DIIS
73c
74      availm = avail - ((nfock+4)*nbf_ao*nbf_ao)/nproc
75c
76c        estimate and subtract off amount needed for XC numerical integration
77c     in xc_quadv0
78c
79      availm = availm - (natoms*(natoms+1)/2 + 13*natoms +
80     &     3*nqmax*(7*ipol +  npol + natoms +
81     &     nbf_ao_mxnbf_ce + 4) +
82     &     nbf_ao_mxcont + nbf_ao_mxprim +
83     &     2*nbf_ao_mxnbf_ce*nbf_ao_mxnbf_ce)
84c
85c     estimate and subtract off amount needed for XC numerical integration
86c     in xc_quadv0_a
87c
88      max_component = (nbf_ao_mxang+1)*(nbf_ao_mxang+2)/2
89      if (nbf_ao_mxang .eq. 0)then
90         max_elem_ang_scr = max_component * 3
91      elseif (nbf_ao_mxang .le. 3)then
92         max_elem_ang_scr = max_component * 9
93      else                      ! general case
94         max_elem_ang_scr = max_component * 28
95      endif
96c
97      nscr = 3*nqmax*nbf_ao_mxcont +
98     &     max(3*nqmax*nbf_ao_mxprim,nqmax*max_elem_ang_scr) + 1
99c
100c     The big chunk is the memory needed for new_eval_gbsets
101c     which is roughly 4*nqmax*nbf_ao.  This is reduced by
102c     screening (and chunking up the angular grid) and is
103c     computed at the end of xc_setquad to be 4*max_pr_mbfnq.
104c
105      availm = availm - (nqmax*(natoms + 3*nbf_ao_mxnbf_ce + 1) +
106     &     4*max_pr_mbfnq +
107     &     nbf_ao + nscr)
108c
109c        Subtract off a few extra bits
110c
111      availm = availm - 100000
112c
113      if(availm.lt.0)then
114         availm = 0
115      endif
116      availm_r = dble(availm)
117      availm_r = min(availm_r,n3c_dbl_max)
118      deficit=0
119      if (availm_r.lt.n3c_dbl_max)then
120         deficit=nint(n3c_dbl_max)-availm_r
121c
122c     get amount of local MA in Mbytes need to get incore done
123c
124         deficit=max((deficit*8+deficit/2)/1024/1024,1)
125c
126c     force direct if  memory is not sufficient
127c
128         availm_r = 0
129      endif
130c
131c     check if availm_r is big enough for max batch
132c
133      call int_mem_2e3c(maxg, scrmx)
134
135      n3c_dbl = min(availm_r,n3c_dbl_r)
136      if (me.eq.0 .and. oprint_parm)
137     &     write(LuOut,3228)avail, availm,
138     N     n3c_dbl,nint(n3c_dbl_max),n3c_int
139
140      if(deficit.ne.0.and.me.eq.0) write(luout,3230) deficit
141c
142c     Loops are parallelized over the products of atoms
143c     (check for zero ... must be at least 1).
144c
145
146      if (direct.or.(deficit.ne.0))then
147         dft_mem3c = .false.
148         n3c_dbl = 1
149         n3c_int = 1
150      endif
151      if (.not.MA_Push_Get
152     &     (MT_Dbl,n3c_dbl,'3c ERI space',l_3cERI,k_3cERI))
153     &     call errquit('dft_scf: push_get failed', 12, MA_ERR)
154c
155      if (.not.MA_Push_Get
156     &     (MT_int,n3c_int,'3c what space',l_3cwhat,k_3cwhat))
157     &     call errquit('dft_scf:push_get failed', 13, MA_ERR)
158c
159      if (dft_mem3c)then
160         if (me.eq.0 .and. oprint_3c2e)
161     &        write(LuOut,3229)n3c_dbl*1.d-6
162      endif
163c     if(me.eq.0) call MA_summarize_allocated_blocks()
164         return
165 3228 format(10x,'Minimum dble words available (all nodes) is: ',i15,
166     &     /,10x,'         This is reduced (for later use) to: ',i15,
167     &     /,10x,'            proc 0 Suggested buffer size is: ',i15,
168     &     /,10x,'               Max Suggested buffer size is: ',i15,
169     &     /,10x,'                    no. integral batches is: ',i15)
170
171 3230 format(/7x,
172     &     'Extra Local Memory (stack+heap) needed for incore:',i6,
173     &     ' Mbytes ')
174 3229 format(/,6x,f10.3,' MW buffer allocated for incore 3-center '
175     &     /,10x,'2e- integral storage on stack. ')
176         end
177c     hack for spherical since there is always a cartesian internal bit
178      integer function dft_nao2_max(basis,natoms)
179      implicit none
180#include "bas.fh"
181#include "errquit.fh"
182      integer basis
183      integer natoms
184c
185      integer atom_c,atom_d
186      integer ishc,itype,nprimo,nshbfc,isphere
187      integer sh_lo_c, sh_hi_c
188      integer nshpair_sum
189c
190      dft_nao2_max=0
191      do atom_c=1,natoms
192         if (.not. bas_ce2cnr( basis, atom_c, sh_lo_c, sh_hi_c))
193     &        call errquit('Exiting in dft_fitcd',110, BASIS_ERR)
194         nshpair_sum=0
195         do ishc=sh_lo_c,sh_hi_c
196            if(.not.bas_continfo(basis,ishc,
197     &           itype,nprimo,nshbfc,isphere))
198     &           call errquit('Exiting in fitcd.',44, CALC_ERR)
199
200            nshpair_sum = nshpair_sum +
201     P           ((itype+1)*(itype+2))/2*nshbfc
202         enddo
203         dft_nao2_max=max(dft_nao2_max,nshpair_sum)
204      enddo
205      dft_nao2_max=dft_nao2_max**2
206      return
207      end
208