1      subroutine ccsd_rdtrpo(t1,buf1,buf2,g_objo,g_objv,
2     $     nocc,nvir,iprt)
3!     $Id$
4      implicit none
5#include "errquit.fh"
6!
7#include "ccsd_len.fh"
8      integer g_objo,g_objv,nocc,nvir,iprt
9      double precision t1(nvir,nocc),buf1(lnobj),buf2(lnobj)
10!
11      integer i,j,k,a,av,bv,ad1,ad2,lbfo
12      Integer Nodes, IAm
13      integer g_tmp, start, end
14!
15#include "msgids.fh"
16#include "tcgmsg.fh"
17#include "global.fh"
18#include "ccsdps.fh"
19!
20      Nodes = GA_NNodes()
21      IAm = GA_NodeID()
22
23      if (occsdps) then
24         call pstat_on(ps_rdtrpo)
25      else
26         call qenter('rdtrpo',0)
27      endif
28
29      call ga_sync
30!
31!     - read in objects
32!     - [io|ov], [oo|iv], t2(io:vv) / [ao|vo], t2(oo:av), t2(oo,va)
33!
34      lbfo=lnovv+lnoov+lnoov
35!
36!     do av ... uses 2*oov+ooo memory.  Do not use locality
37!     so that we can distribute more uniformly across nodes.
38!     Locality is not used in the triples driver.
39!
40      do av=iam+1,nvir,nodes
41!
42!     get dint and eint
43         call ga_get(g_objv,1,2*lnoov+lnooo,av,av,buf1,
44     &        2*lnoov+lnooo)
45!
46!     process eints for occupied indexed buffer
47         do i=1,nocc
48!     eintc
49            ad1=0
50            do j=1,nocc
51               do k=1,nocc
52                  ad1=ad1+1
53                  buf2(ad1)=buf1(lnoov+(j-1)*lnoo+(i-1)*nocc+k)
54               end do
55            end do
56            ad2=lnovv+(av-1)*lnoo
57            call ga_put(g_objo,ad2+1,ad2+lnoo,i,i,buf2,lnoo)
58!     eintx
59            ad1=lnoov+(i-1)*lnoo+1
60            ad2=lnovv+lnoov+(av-1)*lnoo
61            call ga_put(g_objo,ad2+1,ad2+lnoo,i,i,buf1(ad1),lnoo)
62         end do
63!
64!     process dints for virtual indexed buffer
65         ad1=0
66         do i=1,nocc
67            do j=1,nocc
68               do bv=1,nvir
69                  ad1=ad1+1
70                  ad2=(bv-1)*lnoo+(i-1)*nocc+j
71                  buf2(ad1)=buf1(ad2)
72               end do
73            end do
74         end do
75!
76!     process t2 amplitudes for virtual indexed buffer
77!     t2c
78         do i=1,nocc
79            do j=1,nocc
80               do bv=1,nvir
81                  ad1=ad1+1
82                  ad2=lnoov+lnooo+(bv-1)*lnoo+(i-1)*nocc+j
83                  buf2(ad1)=buf1(ad2)
84               end do
85            end do
86         end do
87!     t2x
88         do i=1,nocc
89            do j=1,nocc
90               do bv=1,nvir
91                  ad1=ad1+1
92                  ad2=lnoov+lnooo+(bv-1)*lnoo+(j-1)*nocc+i
93                  buf2(ad1)=buf1(ad2)
94               end do
95            end do
96         end do
97         if (iprt.gt.50)then
98            print *,'t2c new ',iam,av
99            write(6,4859)(buf2(j),j=lnoov+1,2*lnoov)
100            print *,'t2x new ',iam,av
101            write(6,4859)(buf2(j),j=2*lnoov+1,3*lnoov)
102            print *,'dint new ',iam,av
103            write(6,4859)(buf2(j),j=1,lnoov)
104 4859       format(1x,5e14.4)
105         end if
106         call ga_put(g_objv,1,ad1,av,av,buf2,ad1)
107!
108!     process t2 amplitudes for occupied indexed buffer
109!     t2j
110         do i=1,nocc
111            ad1=0
112            do j=1,nocc
113               do bv=1,nvir
114                  ad1=ad1+1
115                  buf2(ad1)=
116     $                 buf1(lnoov+lnooo+(bv-1)*lnoo+(i-1)*nocc+j)
117               end do
118            end do
119            ad2=(av-1)*lnov
120            call ga_put(g_objo,ad2+1,ad2+lnov,i,i,buf2,lnov)
121         end do
122!
123      end do
124!
125      call ga_sync()            ! ga_sync ensures all GA ops complete
126!
127!     sort t1 then broadcast
128!
129      if (iam.eq.0)then
130         call ga_get(g_objv,lnoov+lnooo+1,lnoov+lnooo+lnov,
131     $        nvir+1,nvir+1, buf1,lnov)
132         ad1=0
133         do a=1,nvir
134            do i=1,nocc
135               ad1=ad1+1
136               t1(a,i)=buf1(ad1)
137            end do
138         end do
139      end if
140      call ga_brdcst(msg_cc_t1b,t1,mdtob(lnov),0)
141!
142!     Next phase transposes bits withing g_objo.  To avoid OVV local
143!     memory, duplicate g_objo and do some smaller operations
144!     to accomplish the sort in O(V) memory.
145!
146      call ga_sync
147!
148      if (.not. ga_duplicate(g_objo, g_tmp, 'rtdpro')) call errquit
149     $     ('ccsd_rdtpro: failed to allocate tmp GA',0, GA_ERR)
150      call ga_copy(g_objo, g_tmp)
151!
152      do i=1+iam,nocc,nodes
153!     t2j
154!*         start = 1
155!*         end   = lnovv
156!*         call ga_get(g_tmp,start,end,i,i,buf1,lnovv)
157!*         ad1 = 0
158         do j=1,nocc
159            do av=1,nvir
160!*               do bv=1,nvir
161!*                  ad1=ad1+1
162!*                  buf2(ad1)=buf1((av-1)*lnov+(j-1)*nvir+bv)
163!*               end do
164               start = (av-1)*lnov+(j-1)*nvir+1
165               end   = start + nvir - 1
166               call ga_get(g_tmp,start,end,i,i,buf1,nvir)
167!
168               start = (av-1)*nvir+(j-1)*nvir*nvir+1
169               end   = start + nvir - 1
170               call ga_put(g_objo,start,end,i,i,buf1,nvir)
171            end do
172         end do
173!*         call ga_put(g_objo,start,end,i,i,buf2,lnovv)
174!
175!     eintc
176         start = lnovv + 1
177         end   = start + 2*lnoov - 1
178         call ga_get(g_tmp,start,end,i,i,buf1,2*lnoov)
179         ad1=0
180         do j=1,nocc
181            do av=1,nvir
182               do k=1,nocc
183                  ad1=ad1+1
184                  ad2=(av-1)*lnoo+(j-1)*nocc+k
185                  buf2(ad1)=buf1(ad2)
186!     buf1(ad1)=gtint(i,a,j,k)
187               end do
188            end do
189         end do
190!     - eintx
191         do j=1,nocc
192            do av=1,nvir
193               do k=1,nocc
194                  ad1=ad1+1
195                  ad2=lnoov+(av-1)*lnoo+(j-1)*nocc+k
196                  buf2(ad1)=buf1(ad2)
197!     buf1(ad1)=gtint(i,k,j,a)
198               end do
199            end do
200         end do
201!
202         call ga_put(g_objo,start,end,i,i,buf2,2*lnoov)
203!
204      end do
205!
206      call ga_sync()
207!
208      if (.not. ga_destroy(g_tmp)) call errquit
209     $     ('ccsd_trdpro: failed to destroy GA',0, GA_ERR)
210!
211      if (occsdps) then
212         call pstat_off(ps_rdtrpo)
213      else
214         call qexit('rdtrpo',0)
215      endif
216!
217      end
218