1 logical function dft_mem3c(rtdb, 2 I natoms,npol,oprint_parm,oprint_3c2e, 3 O n3c_int,n3c_dbl, 4 O l_3ceri,k_3ceri, l_3cwhat,k_3cwhat) 5* $Id$ 6 implicit none 7#include "errquit.fh" 8#include "global.fh" 9#include "mafdecls.fh" 10#include "stdio.fh" 11#include "msgids.fh" 12#include "cdft.fh" 13#include "basP.fh" 14#include "bas.fh" 15#include "rtdb.fh" 16#include "util.fh" 17 integer rtdb ! [in] 18 integer natoms ! [in] 19 integer npol ! [in] 20 integer n3c_int ! [out] 21 integer n3c_dbl ! [out] 22 logical oprint_parm,oprint_3c2e ![in] 23 integer l_3ceri,k_3ceri ! [out] 24 integer l_3cwhat,k_3cwhat ! [out] 25 integer max_component,max_elem_ang_scr 26 integer nscr 27 integer avail,availm 28 double precision availm_r 29 integer dft_n3cint 30 external dft_n3cint 31 double precision dft_n3cdbl 32 integer dft_nao2_max 33 external dft_n3cdbl,dft_nao2_max 34 double precision n3c_dbl_r,n3c_dbl_max 35 integer nao2_max 36 integer nshpair_sum 37 integer me,nproc,icount 38 integer maxg,scrmx 39 integer ishc,itype,nprimo,nshbfc,isphere 40 integer deficit,atom_c,sh_lo_c,sh_hi_c 41 logical spherical_ao 42 integer n3c_int_in,n3c_dbl_in 43c 44 me=ga_nodeid() 45 nproc=ga_nnodes() 46 dft_mem3c=.true. 47c 48c Determine how big a buffer can be allocated to 3-center 49c 2e- integrals. 50c 51c - amount needed for all incore: 52c 53 n3c_dbl_r = dft_n3cdbl() 54 n3c_int = dft_n3cint() 55c either read no. of batches or make it at least 1000 56 if(.not.rtdb_get(rtdb,'dft:n3c_int',mt_int,1,n3c_int_in)) 57 N n3c_int_in=1000 58 n3c_int=max(n3c_int,n3c_int_in) 59c either read no. of batches or make it at least 1000 60 if(.not.rtdb_get(rtdb,'dft:n3c_dbl',mt_int,1,n3c_dbl_in)) 61 N n3c_dbl_in=1000 62 n3c_dbl_r=max(n3c_dbl_r,1d0*n3c_dbl_in) 63 n3c_dbl_max=n3c_dbl_r 64c need to find max among all procs since n3cdbl might be different 65 call ga_dgop(19641964, n3c_dbl_max, 1, 'max') 66c 67c find - (minimum)amount local available memory on all nodes 68c 69 avail = MA_inquire_avail(mt_dbl) 70 call ga_igop(msg_min_stack_avail, avail, 1, 'min') 71c 72c estimate and subtract off amount needed for DIIS 73c 74 availm = avail - ((nfock+4)*nbf_ao*nbf_ao)/nproc 75c 76c estimate and subtract off amount needed for XC numerical integration 77c in xc_quadv0 78c 79 availm = availm - (natoms*(natoms+1)/2 + 13*natoms + 80 & 3*nqmax*(7*ipol + npol + natoms + 81 & nbf_ao_mxnbf_ce + 4) + 82 & nbf_ao_mxcont + nbf_ao_mxprim + 83 & 2*nbf_ao_mxnbf_ce*nbf_ao_mxnbf_ce) 84c 85c estimate and subtract off amount needed for XC numerical integration 86c in xc_quadv0_a 87c 88 max_component = (nbf_ao_mxang+1)*(nbf_ao_mxang+2)/2 89 if (nbf_ao_mxang .eq. 0)then 90 max_elem_ang_scr = max_component * 3 91 elseif (nbf_ao_mxang .le. 3)then 92 max_elem_ang_scr = max_component * 9 93 else ! general case 94 max_elem_ang_scr = max_component * 28 95 endif 96c 97 nscr = 3*nqmax*nbf_ao_mxcont + 98 & max(3*nqmax*nbf_ao_mxprim,nqmax*max_elem_ang_scr) + 1 99c 100c The big chunk is the memory needed for new_eval_gbsets 101c which is roughly 4*nqmax*nbf_ao. This is reduced by 102c screening (and chunking up the angular grid) and is 103c computed at the end of xc_setquad to be 4*max_pr_mbfnq. 104c 105 availm = availm - (nqmax*(natoms + 3*nbf_ao_mxnbf_ce + 1) + 106 & 4*max_pr_mbfnq + 107 & nbf_ao + nscr) 108c 109c Subtract off a few extra bits 110c 111 availm = availm - 100000 112c 113 if(availm.lt.0)then 114 availm = 0 115 endif 116 availm_r = dble(availm) 117 availm_r = min(availm_r,n3c_dbl_max) 118 deficit=0 119 if (availm_r.lt.n3c_dbl_max)then 120 deficit=nint(n3c_dbl_max)-availm_r 121c 122c get amount of local MA in Mbytes need to get incore done 123c 124 deficit=max((deficit*8+deficit/2)/1024/1024,1) 125c 126c force direct if memory is not sufficient 127c 128 availm_r = 0 129 endif 130c 131c check if availm_r is big enough for max batch 132c 133 call int_mem_2e3c(maxg, scrmx) 134 135 n3c_dbl = min(availm_r,n3c_dbl_r) 136 if (me.eq.0 .and. oprint_parm) 137 & write(LuOut,3228)avail, availm, 138 N n3c_dbl,nint(n3c_dbl_max),n3c_int 139 140 if(deficit.ne.0.and.me.eq.0) write(luout,3230) deficit 141c 142c Loops are parallelized over the products of atoms 143c (check for zero ... must be at least 1). 144c 145 146 if (direct.or.(deficit.ne.0))then 147 dft_mem3c = .false. 148 n3c_dbl = 1 149 n3c_int = 1 150 endif 151 if (.not.MA_Push_Get 152 & (MT_Dbl,n3c_dbl,'3c ERI space',l_3cERI,k_3cERI)) 153 & call errquit('dft_scf: push_get failed', 12, MA_ERR) 154c 155 if (.not.MA_Push_Get 156 & (MT_int,n3c_int,'3c what space',l_3cwhat,k_3cwhat)) 157 & call errquit('dft_scf:push_get failed', 13, MA_ERR) 158c 159 if (dft_mem3c)then 160 if (me.eq.0 .and. oprint_3c2e) 161 & write(LuOut,3229)n3c_dbl*1.d-6 162 endif 163c if(me.eq.0) call MA_summarize_allocated_blocks() 164 return 165 3228 format(10x,'Minimum dble words available (all nodes) is: ',i15, 166 & /,10x,' This is reduced (for later use) to: ',i15, 167 & /,10x,' proc 0 Suggested buffer size is: ',i15, 168 & /,10x,' Max Suggested buffer size is: ',i15, 169 & /,10x,' no. integral batches is: ',i15) 170 171 3230 format(/7x, 172 & 'Extra Local Memory (stack+heap) needed for incore:',i6, 173 & ' Mbytes ') 174 3229 format(/,6x,f10.3,' MW buffer allocated for incore 3-center ' 175 & /,10x,'2e- integral storage on stack. ') 176 end 177c hack for spherical since there is always a cartesian internal bit 178 integer function dft_nao2_max(basis,natoms) 179 implicit none 180#include "bas.fh" 181#include "errquit.fh" 182 integer basis 183 integer natoms 184c 185 integer atom_c,atom_d 186 integer ishc,itype,nprimo,nshbfc,isphere 187 integer sh_lo_c, sh_hi_c 188 integer nshpair_sum 189c 190 dft_nao2_max=0 191 do atom_c=1,natoms 192 if (.not. bas_ce2cnr( basis, atom_c, sh_lo_c, sh_hi_c)) 193 & call errquit('Exiting in dft_fitcd',110, BASIS_ERR) 194 nshpair_sum=0 195 do ishc=sh_lo_c,sh_hi_c 196 if(.not.bas_continfo(basis,ishc, 197 & itype,nprimo,nshbfc,isphere)) 198 & call errquit('Exiting in fitcd.',44, CALC_ERR) 199 200 nshpair_sum = nshpair_sum + 201 P ((itype+1)*(itype+2))/2*nshbfc 202 enddo 203 dft_nao2_max=max(dft_nao2_max,nshpair_sum) 204 enddo 205 dft_nao2_max=dft_nao2_max**2 206 return 207 end 208