1 subroutine ccsd_trpdrv(t1, 2 & f1n,f1t,f2n,f2t,f3n,f3t,f4n,f4t,eorb, 3 & g_objo,g_objv,g_coul,g_exch, 4 & ncor,nocc,nvir,iprt,emp4,emp5, 5 & oseg_lo,oseg_hi, 6 $ kchunk, Tij, Tkj, Tia, Tka, Xia, Xka, Jia, Jka, Kia, Kka, 7 $ Jij, Jkj, Kij, Kkj, Dja, Djka, Djia) 8C $Id$ 9 implicit none 10c 11#include "global.fh" 12#include "ccsd_len.fh" 13#include "ccsdps.fh" 14c 15 double precision t1(*), 16 & f1n(*),f1t(*),f2n(*), 17 & f2t(*),f3n(*),f3t(*),f4n(*),f4t(*),eorb(*), 18 & emp4,emp5 19 double precision Tij(*), Tkj(*), Tia(*), Tka(*), Xia(*), Xka(*), 20 $ Jia(*), Jka(*), Kia(*), Kka(*), 21 $ Jij(*), Jkj(*), Kij(*), Kkj(*), Dja(*), Djka(*), Djia(*) 22 23 integer g_objo,g_objv,ncor,nocc,nvir,iprt,g_coul, 24 & g_exch,oseg_lo,oseg_hi 25c 26 double precision eaijk 27 integer a,b,c,i,j,k,akold,av,inode,len, 28 & next,nxtask 29 external nxtask 30C 31 Integer Nodes, IAm 32c 33 integer klo, khi, start, end 34 integer kchunk 35c 36c 37 double precision zip 38 data zip/0.0d00/ 39c 40c apr call ga_print(g_coul) 41c apr call ga_print(g_exch) 42 Nodes = GA_NNodes() 43 IAm = GA_NodeID() 44C 45 call ga_sync() 46 if (occsdps) then 47 call pstat_on(ps_trpdrv) 48 else 49 call qenter('trpdrv',0) 50 endif 51 inode=-1 52 next=nxtask(nodes, 1) 53c 54c 55 do klo = 1, nocc, kchunk 56 akold=0 57 khi = min(nocc, klo+kchunk-1) 58 do a=oseg_lo,oseg_hi 59 av=a-ncor-nocc 60 do j=1,nocc 61 inode=inode+1 62 if (inode.eq.next)then 63c 64c Get Dja = Dci,ja for given j, a, all ci 65c 66 start = 1 + (j-1)*lnov 67 len = lnov 68 end = start + len - 1 69 call ga_get(g_objv,start,end,av,av,Dja,len) 70c 71c Get Tkj = T(b,c,k,j) for given j, klo<=k<=khi, all bc 72c 73 start = (klo-1)*lnvv + 1 74 len = (khi-klo+1)*lnvv 75 end = start + len - 1 76 call ga_get(g_objo,start,end,j,j,Tkj,len) 77c 78c Get Jkj = J(c,l,k,j) for given j, klo<=k<=khi, all cl 79c 80 start = lnovv + (klo-1)*lnov + 1 81 len = (khi-klo+1)*lnov 82 end = start + len - 1 83 call ga_get(g_objo,start,end,j,j,Jkj,len) 84c 85c Get Kkj = K(c,l,k,j) for given j, klo<=k<=khi, all cl 86c 87 start = lnovv + lnoov + (klo-1)*lnov + 1 88 len = (khi-klo+1)*lnov 89 end = start + len - 1 90 call ga_get(g_objo,start,end,j,j,Kkj,len) 91c 92 if (akold .ne. a) then 93 akold = a 94c 95c Get Jka = J(b,c,k,a) for given a, klo<=k<=khi, all bc 96c 97 start = (a-oseg_lo)*nocc + klo 98 len = (khi-klo+1) 99 end = start + len - 1 100 call ga_get(g_coul,1,lnvv,start,end,Jka,lnvv) 101c 102c Get Kka = K(b,c,k,a) for given a, klo<=k<=khi, all bc 103c 104 start = (a-oseg_lo)*nocc + klo 105 len = (khi-klo+1) 106 end = start + len - 1 107 call ga_get(g_exch,1,lnvv,start,end,Kka,lnvv) 108c 109c Get Tka = Tbl,ka for given a, klo<=k<=khi, all bl 110c 111 start = 1 + lnoov + (klo-1)*lnov 112 len = (khi-klo+1)*lnov 113 end = start + len - 1 114 call ga_get(g_objv,start,end,av,av,Tka,len) 115c 116c Get Xka = Tal,kb for given a, klo<=k<=khi, all bl 117c 118 start = 1 + lnoov + lnoov + (klo-1)*lnov 119 len = (khi-klo+1)*lnov 120 end = start + len - 1 121 call ga_get(g_objv,start,end,av,av,Xka,len) 122 endif 123c 124c Get Djka = Dcj,ka for given j, a, klo<=k<=khi, all c 125c 126 call ccsd_getdjka(djka, 127 G g_objv,nvir,lnov, 128 K j,av,klo,khi) 129c 130c Get Tia = Tbl,ia for given a, all i,bl 131c 132 start = 1 + lnoov 133 len = lnov*nocc 134 end = start + len - 1 135 call ga_get(g_objv,start,end,av,av,Tia,len) 136c 137c Get Xia = Tal,ib for given a, all i,bl 138c 139 start = 1 + lnoov + lnoov 140 len = lnov*nocc 141 end = start + len - 1 142 call ga_get(g_objv,start,end,av,av,Xia,len) 143c 144c Get Jij = J(c,l,i,j) for given j, all i,cl 145c 146 start = lnovv + 1 147 len = lnov*nocc 148 end = start + len - 1 149 call ga_get(g_objo,start,end,j,j,Jij,len) 150c 151c Get Kij = K(c,l,i,j) for given j, all,i cl 152c 153 start = lnovv + lnoov + 1 154 len = lnov*nocc 155 end = start + len - 1 156 call ga_get(g_objo,start,end,j,j,Kij,len) 157c 158c Get Dia = Dcj,ia for given j, i, a, all c 159c 160 call ccsd_getdjka(djia, 161 G g_objv,nvir,lnov, 162 K j,av,1,nocc) 163c 164 do i=1,nocc 165c 166c Get Tij = T(b,c,i,j) for given j, i, all bc 167c 168 start = (i-1)*lnvv + 1 169 len = lnvv 170 end = start + len - 1 171 call ga_get(g_objo,start,end,j,j,Tij,len) 172c 173c Get Jia = J(b,c,i,a) for given a, i, all bc 174c 175 start = (a-oseg_lo)*nocc + i 176 len = 1 177 end = start + len - 1 178 call ga_get(g_coul,1,lnvv,start,end,Jia,lnvv) 179c 180c Get Kia = K(b,c,i,a) for given a, i, all bc 181c 182 start = (a-oseg_lo)*nocc + i 183 len = 1 184 end = start + len - 1 185 call ga_get(g_exch,1,lnvv,start,end,Kia,lnvv) 186c 187 do k=klo,min(khi,i) 188 call dcopy(lnvv,zip,0,f1n,1) 189 call dcopy(lnvv,zip,0,f1t,1) 190 call dcopy(lnvv,zip,0,f2n,1) 191 call dcopy(lnvv,zip,0,f2t,1) 192 call dcopy(lnvv,zip,0,f3n,1) 193 call dcopy(lnvv,zip,0,f3t,1) 194 call dcopy(lnvv,zip,0,f4n,1) 195 call dcopy(lnvv,zip,0,f4t,1) 196c 197c sum(d) (Jia, Kia)bd * Tkj,cd -> Fbc 198c 199 call ccsd_dovvv(Jia, Kia, 200 $ Tkj(1+(k-klo)*lnvv),f1n,f2n,f3n,f4n, 201 $ nocc,nvir) 202c 203c sum(d) (Jka, Kka)bd * Tij,cd -> Fbc 204c 205 call ccsd_dovvv(Jka(1+(k-klo)*lnvv), 206 $ Kka(1+(k-klo)*lnvv), 207 $ Tij,f1t,f2t,f3t,f4t,nocc,nvir) 208c 209c sum(l) (Jij, Kij)cl * Tkl,ab -> Fbc 210c 211 call ccsd_doooo(Jkj(1+(k-klo)*lnov), 212 $ Kkj(1+(k-klo)*lnov), 213 $ Tia(1+(i-1)*lnov),Xia(1+(i-1)*lnov), 214 $ f1n,f2n, 215 $ f3n,f4n,nocc,nvir) 216c 217c sum(l) (Jkj, Kkj)cl * Tli,ba -> Fbc 218c 219 call ccsd_doooo(Jij(1+(i-1)*lnov), 220 K Kij(1+(i-1)*lnov), 221 $ Tka(1+(k-klo)*lnov),Xka(1+(k-klo)*lnov), 222 $ f1t,f2t, 223 $ f3t,f4t,nocc,nvir) 224c 225 if (iprt.gt.50)then 226 call prtfmat(f1n,f1t,f2n,f2t,f3n,f3t,f4n, 227 $ f4t, nvir) 228 end if 229c 230 eaijk=eorb(ncor+i)+eorb(ncor+j)+eorb(ncor+k) 231 $ -eorb(a) 232c 233 call ccsd_tengy(f1n,f1t,f2n,f2t,f3n,f3t,f4n, 234 $ f4t, 235 & Dja(1+(i-1)*nvir),Djia(1+(i-1)*nvir), 236 $ t1((k-1)*nvir+1),eorb,eaijk,emp4,emp5, 237 $ ncor,nocc,nvir) 238c 239 if (i.ne.k)then 240 call ccsd_tengy(f1t,f1n,f2t,f2n, 241 $ f3t,f3n,f4t,f4n, 242 $ Dja(1+(k-1)*nvir),Djka(1+(k-klo)*nvir), 243 $ t1((i-1)*nvir+1),eorb,eaijk,emp4,emp5, 244 $ ncor,nocc,nvir) 245c 246 end if 247 end do 248 end do 249 if (iprt.gt.50)then 250 write(6,1234)iam,a,j,emp4,emp5 251 1234 format(' iam aijk',3i5,2e15.5) 252 end if 253 next=nxtask(nodes, 1) 254 end if 255 end do 256 if(ga_nodeid().eq.0) then 257 write(6,4321) ' ccsd(t): done ', 258 A a-(ncor+nocc)+((klo-1)/kchunk)*nvir, 259 O ' out of ',(nocc/kchunk)*nvir, 260 O ' progress: ', 261 O ((a-(ncor+nocc)+((klo-1)/kchunk)*nvir)*100d0)/ 262 D ((nocc/kchunk)*nvir), 263 P '%' 264 4321 format(a,i8,a,i8,a,f6.1,a1) 265 endif 266 end do 267 end do 268c 269c 270 next=nxtask(-nodes, 1) 271 call ga_sync() 272 if (occsdps) then 273 call pstat_off(ps_trpdrv) 274 else 275 call qexit('trpdrv',0) 276 endif 277c 278 end 279 280 subroutine ccsd_doooo(eintc,eintx,t2c,t2x,f1,f2,f3,f4,nocc,nvir) 281 implicit none 282 double precision eintc(*),eintx(*),t2c(*),t2x(*), 283 & f1(*),f2(*),f3(*),f4(*) 284 integer nocc,nvir 285 double precision one 286 data one/1.0d00/ 287#include "ccsdps.fh" 288 if (occsdps) then 289 call pstat_on(ps_doooo) 290 else 291 call qenter('doooo',0) 292 endif 293c 294c f1n(b,c)=f1n(b,c)-t2(b,l,a,i)*vooo(c,k,l,j) 295 call dgemm('n','n',nvir,nvir,nocc,-one,t2c,nvir, 296 & eintx,nocc,one,f1,nvir) 297c f2n(b,c)=f2n(b,c)-t2(a,l,b,i)*vooo(c,k,l,j) 298 call dgemm('n','n',nvir,nvir,nocc,-one,t2x,nvir, 299 & eintx,nocc,one,f2,nvir) 300c f3n(b,c)=f3n(b,c)-t2(b,l,a,i)*vooo(c,j,l,k) 301 call dgemm('n','n',nvir,nvir,nocc,-one,t2c,nvir, 302 & eintc,nocc,one,f3,nvir) 303c f4n(b,c)=f4n(b,c)-t2(a,l,b,i)*vooo(c,j,l,k) 304 call dgemm('n','n',nvir,nvir,nocc,-one,t2x,nvir, 305 & eintc,nocc,one,f4,nvir) 306 if (occsdps) then 307 call pstat_off(ps_doooo) 308 else 309 call qexit('doooo',0) 310 endif 311 return 312 end 313 314 315 316 317 318 319 subroutine ccsd_dovvv(fintc,fintx,t2,f1,f2,f3,f4,nocc,nvir) 320 implicit none 321 double precision fintc(*),fintx(*),t2(*),f1(*),f2(*),f3(*),f4(*) 322 integer nocc,nvir 323 double precision one,zero 324 data one/1.0d00/ 325 data zero/0.0d00/ 326#include "ccsdps.fh" 327 if (occsdps) then 328 call pstat_on(ps_dovvv) 329 else 330 call qenter('dovvv',0) 331 endif 332c 333c -- f1n(b,c)=f1n(b,c)+vvvo(b,d,a,i)*t2(d,j,c,k) 334 call dgemm('n','t',nvir,nvir,nvir,one,fintc,nvir, 335 & t2,nvir,one,f1,nvir) 336c -- f2n(b,c)=f2n(b,c)+vvvo(a,d,b,i)*t2(d,j,c,k) 337 call dgemm('n','t',nvir,nvir,nvir,one,fintx,nvir, 338 & t2,nvir,one,f2,nvir) 339c -- f3n(b,c)=f3n(b,c)+vvvo(b,d,a,i)*t2(d,k,c,j) 340 call dgemm('n','n',nvir,nvir,nvir,one,fintc,nvir, 341 & t2,nvir,one,f3,nvir) 342c -- f4n(b,c)=f4n(b,c)+vvvo(a,d,b,i)*t2(d,k,c,j) 343 call dgemm('n','n',nvir,nvir,nvir,one,fintx,nvir, 344 & t2,nvir,one,f4,nvir) 345 if (occsdps) then 346 call pstat_off(ps_dovvv) 347 else 348 call qexit('dovvv',0) 349 endif 350 return 351 end 352 353 subroutine ccsd_getdjka(djka, 354 G g_objv,nvir,lnov, 355 K j,av,klo,khi) 356 implicit none 357#include "mafdecls.fh" 358#include "global.fh" 359 double precision djka(*) 360 integer g_objv, nvir,lnov 361 integer av,klo,khi,j 362c 363 integer k,l_kk,k_kk,lenk,start,end,iptr,i,startc 364 logical failed 365c 366 start = 1 + (j-1)*nvir + (klo-1)*lnov 367 end = (j-1)*nvir + (khi)*lnov 368 lenk=end-start+1 369 lenk=min(lenk,(ma_inquire_avail(MT_DBL)*66)/100) 370crestrict to 5M to avoid OOMs 371 lenk=min(lenk,5000000) 372 1 continue 373 failed=.not.ma_push_get(MT_Dbl,lenk, 374 Q 'tdjka',l_kk,k_kk) 375 if(failed) then 376 lenk=lenk/2 377 goto 1 378 endif 379c round it to the nearest multiple of lnov 380 lenk=(lenk/lnov)*lnov 381 iptr=1 382 failed=.false. 383 2 continue 384 call ccsd_cpyloop(g_objv,start,end,lenk, 385 V av,nvir,lnov, iptr, 386 A dbl_mb(k_kk),djka) 387 if(start-1.ne.end)then 388 if(failed) 389 A call errquit('getdjka:error ', 0, 0) 390 lenk=end-start+1 391 goto 2 392 endif 393 if (.not.ma_pop_stack(l_kk)) 394 & call errquit('ccsdgetdjka: cannot pop stack',11,0) 395 396 return 397 end 398 subroutine ccsd_cpyloop(g_objv,start,end,lenk, 399 V av,nvir,lnov, iptr, 400 A kk,djka) 401 implicit none 402 integer g_objv,start,end,lenk, 403 V av,nvir,lnov 404 double precision kk(*),djka(*) 405c 406 integer startc,iptr,i,k 407c 408 do i=1,(end-start+1)/lenk 409 call ga_get(g_objv,start,start+lenk-1,av,av, 410 $ kk,lenk) 411 startc=1 412 do k = 1,lenk/lnov 413 call dcopy(nvir,kk(startc),1,Djka(iptr),1) 414 iptr=iptr+nvir 415 startc=startc+lnov 416 enddo 417 start=start+lenk 418 enddo 419 return 420 end 421 422 423 424 425