1 subroutine ccsd_rdtrpo(t1,buf1,buf2,g_objo,g_objv, 2 $ nocc,nvir,iprt) 3! $Id$ 4 implicit none 5#include "errquit.fh" 6! 7#include "ccsd_len.fh" 8 integer g_objo,g_objv,nocc,nvir,iprt 9 double precision t1(nvir,nocc),buf1(lnobj),buf2(lnobj) 10! 11 integer i,j,k,a,av,bv,ad1,ad2,lbfo 12 Integer Nodes, IAm 13 integer g_tmp, start, end 14! 15#include "msgids.fh" 16#include "tcgmsg.fh" 17#include "global.fh" 18#include "ccsdps.fh" 19! 20 Nodes = GA_NNodes() 21 IAm = GA_NodeID() 22 23 if (occsdps) then 24 call pstat_on(ps_rdtrpo) 25 else 26 call qenter('rdtrpo',0) 27 endif 28 29 call ga_sync 30! 31! - read in objects 32! - [io|ov], [oo|iv], t2(io:vv) / [ao|vo], t2(oo:av), t2(oo,va) 33! 34 lbfo=lnovv+lnoov+lnoov 35! 36! do av ... uses 2*oov+ooo memory. Do not use locality 37! so that we can distribute more uniformly across nodes. 38! Locality is not used in the triples driver. 39! 40 do av=iam+1,nvir,nodes 41! 42! get dint and eint 43 call ga_get(g_objv,1,2*lnoov+lnooo,av,av,buf1, 44 & 2*lnoov+lnooo) 45! 46! process eints for occupied indexed buffer 47 do i=1,nocc 48! eintc 49 ad1=0 50 do j=1,nocc 51 do k=1,nocc 52 ad1=ad1+1 53 buf2(ad1)=buf1(lnoov+(j-1)*lnoo+(i-1)*nocc+k) 54 end do 55 end do 56 ad2=lnovv+(av-1)*lnoo 57 call ga_put(g_objo,ad2+1,ad2+lnoo,i,i,buf2,lnoo) 58! eintx 59 ad1=lnoov+(i-1)*lnoo+1 60 ad2=lnovv+lnoov+(av-1)*lnoo 61 call ga_put(g_objo,ad2+1,ad2+lnoo,i,i,buf1(ad1),lnoo) 62 end do 63! 64! process dints for virtual indexed buffer 65 ad1=0 66 do i=1,nocc 67 do j=1,nocc 68 do bv=1,nvir 69 ad1=ad1+1 70 ad2=(bv-1)*lnoo+(i-1)*nocc+j 71 buf2(ad1)=buf1(ad2) 72 end do 73 end do 74 end do 75! 76! process t2 amplitudes for virtual indexed buffer 77! t2c 78 do i=1,nocc 79 do j=1,nocc 80 do bv=1,nvir 81 ad1=ad1+1 82 ad2=lnoov+lnooo+(bv-1)*lnoo+(i-1)*nocc+j 83 buf2(ad1)=buf1(ad2) 84 end do 85 end do 86 end do 87! t2x 88 do i=1,nocc 89 do j=1,nocc 90 do bv=1,nvir 91 ad1=ad1+1 92 ad2=lnoov+lnooo+(bv-1)*lnoo+(j-1)*nocc+i 93 buf2(ad1)=buf1(ad2) 94 end do 95 end do 96 end do 97 if (iprt.gt.50)then 98 print *,'t2c new ',iam,av 99 write(6,4859)(buf2(j),j=lnoov+1,2*lnoov) 100 print *,'t2x new ',iam,av 101 write(6,4859)(buf2(j),j=2*lnoov+1,3*lnoov) 102 print *,'dint new ',iam,av 103 write(6,4859)(buf2(j),j=1,lnoov) 104 4859 format(1x,5e14.4) 105 end if 106 call ga_put(g_objv,1,ad1,av,av,buf2,ad1) 107! 108! process t2 amplitudes for occupied indexed buffer 109! t2j 110 do i=1,nocc 111 ad1=0 112 do j=1,nocc 113 do bv=1,nvir 114 ad1=ad1+1 115 buf2(ad1)= 116 $ buf1(lnoov+lnooo+(bv-1)*lnoo+(i-1)*nocc+j) 117 end do 118 end do 119 ad2=(av-1)*lnov 120 call ga_put(g_objo,ad2+1,ad2+lnov,i,i,buf2,lnov) 121 end do 122! 123 end do 124! 125 call ga_sync() ! ga_sync ensures all GA ops complete 126! 127! sort t1 then broadcast 128! 129 if (iam.eq.0)then 130 call ga_get(g_objv,lnoov+lnooo+1,lnoov+lnooo+lnov, 131 $ nvir+1,nvir+1, buf1,lnov) 132 ad1=0 133 do a=1,nvir 134 do i=1,nocc 135 ad1=ad1+1 136 t1(a,i)=buf1(ad1) 137 end do 138 end do 139 end if 140 call ga_brdcst(msg_cc_t1b,t1,mdtob(lnov),0) 141! 142! Next phase transposes bits withing g_objo. To avoid OVV local 143! memory, duplicate g_objo and do some smaller operations 144! to accomplish the sort in O(V) memory. 145! 146 call ga_sync 147! 148 if (.not. ga_duplicate(g_objo, g_tmp, 'rtdpro')) call errquit 149 $ ('ccsd_rdtpro: failed to allocate tmp GA',0, GA_ERR) 150 call ga_copy(g_objo, g_tmp) 151! 152 do i=1+iam,nocc,nodes 153! t2j 154!* start = 1 155!* end = lnovv 156!* call ga_get(g_tmp,start,end,i,i,buf1,lnovv) 157!* ad1 = 0 158 do j=1,nocc 159 do av=1,nvir 160!* do bv=1,nvir 161!* ad1=ad1+1 162!* buf2(ad1)=buf1((av-1)*lnov+(j-1)*nvir+bv) 163!* end do 164 start = (av-1)*lnov+(j-1)*nvir+1 165 end = start + nvir - 1 166 call ga_get(g_tmp,start,end,i,i,buf1,nvir) 167! 168 start = (av-1)*nvir+(j-1)*nvir*nvir+1 169 end = start + nvir - 1 170 call ga_put(g_objo,start,end,i,i,buf1,nvir) 171 end do 172 end do 173!* call ga_put(g_objo,start,end,i,i,buf2,lnovv) 174! 175! eintc 176 start = lnovv + 1 177 end = start + 2*lnoov - 1 178 call ga_get(g_tmp,start,end,i,i,buf1,2*lnoov) 179 ad1=0 180 do j=1,nocc 181 do av=1,nvir 182 do k=1,nocc 183 ad1=ad1+1 184 ad2=(av-1)*lnoo+(j-1)*nocc+k 185 buf2(ad1)=buf1(ad2) 186! buf1(ad1)=gtint(i,a,j,k) 187 end do 188 end do 189 end do 190! - eintx 191 do j=1,nocc 192 do av=1,nvir 193 do k=1,nocc 194 ad1=ad1+1 195 ad2=lnoov+(av-1)*lnoo+(j-1)*nocc+k 196 buf2(ad1)=buf1(ad2) 197! buf1(ad1)=gtint(i,k,j,a) 198 end do 199 end do 200 end do 201! 202 call ga_put(g_objo,start,end,i,i,buf2,2*lnoov) 203! 204 end do 205! 206 call ga_sync() 207! 208 if (.not. ga_destroy(g_tmp)) call errquit 209 $ ('ccsd_trdpro: failed to destroy GA',0, GA_ERR) 210! 211 if (occsdps) then 212 call pstat_off(ps_rdtrpo) 213 else 214 call qexit('rdtrpo',0) 215 endif 216! 217 end 218