1      subroutine ccsd_trpdrv(t1,
2     &     f1n,f1t,f2n,f2t,f3n,f3t,f4n,f4t,eorb,
3     &     g_objo,g_objv,g_coul,g_exch,
4     &     ncor,nocc,nvir,iprt,emp4,emp5,
5     &     oseg_lo,oseg_hi,
6     $     kchunk, Tij, Tkj, Tia, Tka, Xia, Xka, Jia, Jka, Kia, Kka,
7     $     Jij, Jkj, Kij, Kkj, Dja, Djka, Djia)
8C     $Id$
9      implicit none
10c
11#include "global.fh"
12#include "ccsd_len.fh"
13#include "ccsdps.fh"
14c
15      double precision t1(*),
16     &     f1n(*),f1t(*),f2n(*),
17     &     f2t(*),f3n(*),f3t(*),f4n(*),f4t(*),eorb(*),
18     &     emp4,emp5
19      double precision Tij(*), Tkj(*), Tia(*), Tka(*), Xia(*), Xka(*),
20     $     Jia(*), Jka(*), Kia(*), Kka(*),
21     $     Jij(*), Jkj(*), Kij(*), Kkj(*), Dja(*), Djka(*), Djia(*)
22
23      integer g_objo,g_objv,ncor,nocc,nvir,iprt,g_coul,
24     &     g_exch,oseg_lo,oseg_hi
25c
26      double precision eaijk
27      integer a,b,c,i,j,k,akold,av,inode,len,
28     &     next,nxtask
29      external nxtask
30C
31      Integer Nodes, IAm
32c
33      integer klo, khi, start, end
34      integer kchunk
35c
36c
37      double precision zip
38      data zip/0.0d00/
39c
40c     apr      call ga_print(g_coul)
41c     apr      call ga_print(g_exch)
42      Nodes = GA_NNodes()
43      IAm = GA_NodeID()
44C
45      call ga_sync()
46      if (occsdps) then
47         call pstat_on(ps_trpdrv)
48      else
49         call qenter('trpdrv',0)
50      endif
51      inode=-1
52      next=nxtask(nodes, 1)
53c
54c
55      do klo = 1, nocc, kchunk
56         akold=0
57         khi = min(nocc, klo+kchunk-1)
58         do a=oseg_lo,oseg_hi
59            av=a-ncor-nocc
60            do j=1,nocc
61               inode=inode+1
62               if (inode.eq.next)then
63c
64c     Get Dja = Dci,ja for given j, a, all ci
65c
66                  start = 1 + (j-1)*lnov
67                  len   = lnov
68                  end   = start + len - 1
69                  call ga_get(g_objv,start,end,av,av,Dja,len)
70c
71c     Get Tkj = T(b,c,k,j) for given j, klo<=k<=khi, all bc
72c
73                  start = (klo-1)*lnvv + 1
74                  len   = (khi-klo+1)*lnvv
75                  end   = start + len - 1
76                  call ga_get(g_objo,start,end,j,j,Tkj,len)
77c
78c     Get Jkj = J(c,l,k,j) for given j, klo<=k<=khi, all cl
79c
80                  start = lnovv + (klo-1)*lnov + 1
81                  len   = (khi-klo+1)*lnov
82                  end   = start + len - 1
83                  call ga_get(g_objo,start,end,j,j,Jkj,len)
84c
85c     Get Kkj = K(c,l,k,j) for given j, klo<=k<=khi, all cl
86c
87                  start = lnovv + lnoov + (klo-1)*lnov + 1
88                  len   = (khi-klo+1)*lnov
89                  end   = start + len - 1
90                  call ga_get(g_objo,start,end,j,j,Kkj,len)
91c
92                  if (akold .ne. a) then
93                     akold = a
94c
95c     Get Jka = J(b,c,k,a) for given a, klo<=k<=khi, all bc
96c
97                     start = (a-oseg_lo)*nocc + klo
98                     len   = (khi-klo+1)
99                     end   = start + len - 1
100                     call ga_get(g_coul,1,lnvv,start,end,Jka,lnvv)
101c
102c     Get Kka = K(b,c,k,a) for given a, klo<=k<=khi, all bc
103c
104                     start = (a-oseg_lo)*nocc + klo
105                     len   = (khi-klo+1)
106                     end   = start + len - 1
107                     call ga_get(g_exch,1,lnvv,start,end,Kka,lnvv)
108c
109c     Get Tka = Tbl,ka for given a, klo<=k<=khi, all bl
110c
111                     start = 1 + lnoov + (klo-1)*lnov
112                     len   = (khi-klo+1)*lnov
113                     end   = start + len - 1
114                     call ga_get(g_objv,start,end,av,av,Tka,len)
115c
116c     Get Xka = Tal,kb for given a, klo<=k<=khi, all bl
117c
118                     start = 1 + lnoov + lnoov + (klo-1)*lnov
119                     len   = (khi-klo+1)*lnov
120                     end   = start + len - 1
121                     call ga_get(g_objv,start,end,av,av,Xka,len)
122                  endif
123c
124c     Get Djka = Dcj,ka for given j, a, klo<=k<=khi, all c
125c
126                  call ccsd_getdjka(djka,
127     G                 g_objv,nvir,lnov,
128     K                 j,av,klo,khi)
129c
130c     Get Tia = Tbl,ia for given a,  all i,bl
131c
132                     start = 1 + lnoov
133                     len   = lnov*nocc
134                     end   = start + len - 1
135                     call ga_get(g_objv,start,end,av,av,Tia,len)
136c
137c     Get Xia = Tal,ib for given a, all i,bl
138c
139                     start = 1 + lnoov + lnoov
140                     len   = lnov*nocc
141                     end   = start + len - 1
142                     call ga_get(g_objv,start,end,av,av,Xia,len)
143c
144c     Get Jij = J(c,l,i,j) for given j, all i,cl
145c
146                     start = lnovv +  1
147                     len   = lnov*nocc
148                     end   = start + len - 1
149                     call ga_get(g_objo,start,end,j,j,Jij,len)
150c
151c     Get Kij = K(c,l,i,j) for given j, all,i cl
152c
153                     start = lnovv + lnoov +  1
154                     len   = lnov*nocc
155                     end   = start + len - 1
156                     call ga_get(g_objo,start,end,j,j,Kij,len)
157c
158c     Get Dia = Dcj,ia for given j, i, a, all c
159c
160                     call ccsd_getdjka(djia,
161     G                    g_objv,nvir,lnov,
162     K                    j,av,1,nocc)
163c
164                  do i=1,nocc
165c
166c     Get Tij = T(b,c,i,j) for given j, i, all bc
167c
168                     start = (i-1)*lnvv + 1
169                     len   = lnvv
170                     end   = start + len - 1
171                     call ga_get(g_objo,start,end,j,j,Tij,len)
172c
173c     Get Jia = J(b,c,i,a) for given a, i, all bc
174c
175                     start = (a-oseg_lo)*nocc + i
176                     len   = 1
177                     end   = start + len - 1
178                     call ga_get(g_coul,1,lnvv,start,end,Jia,lnvv)
179c
180c     Get Kia = K(b,c,i,a) for given a, i, all bc
181c
182                     start = (a-oseg_lo)*nocc + i
183                     len   = 1
184                     end   = start + len - 1
185                     call ga_get(g_exch,1,lnvv,start,end,Kia,lnvv)
186c
187                     do k=klo,min(khi,i)
188                        call dcopy(lnvv,zip,0,f1n,1)
189                        call dcopy(lnvv,zip,0,f1t,1)
190                        call dcopy(lnvv,zip,0,f2n,1)
191                        call dcopy(lnvv,zip,0,f2t,1)
192                        call dcopy(lnvv,zip,0,f3n,1)
193                        call dcopy(lnvv,zip,0,f3t,1)
194                        call dcopy(lnvv,zip,0,f4n,1)
195                        call dcopy(lnvv,zip,0,f4t,1)
196c
197c     sum(d) (Jia, Kia)bd * Tkj,cd -> Fbc
198c
199                        call ccsd_dovvv(Jia, Kia,
200     $                       Tkj(1+(k-klo)*lnvv),f1n,f2n,f3n,f4n,
201     $                       nocc,nvir)
202c
203c     sum(d) (Jka, Kka)bd * Tij,cd -> Fbc
204c
205                        call ccsd_dovvv(Jka(1+(k-klo)*lnvv),
206     $                       Kka(1+(k-klo)*lnvv),
207     $                       Tij,f1t,f2t,f3t,f4t,nocc,nvir)
208c
209c     sum(l) (Jij, Kij)cl  * Tkl,ab -> Fbc
210c
211                        call ccsd_doooo(Jkj(1+(k-klo)*lnov),
212     $                       Kkj(1+(k-klo)*lnov),
213     $                       Tia(1+(i-1)*lnov),Xia(1+(i-1)*lnov),
214     $                       f1n,f2n,
215     $                       f3n,f4n,nocc,nvir)
216c
217c     sum(l) (Jkj, Kkj)cl  * Tli,ba -> Fbc
218c
219                        call ccsd_doooo(Jij(1+(i-1)*lnov),
220     K                       Kij(1+(i-1)*lnov),
221     $                       Tka(1+(k-klo)*lnov),Xka(1+(k-klo)*lnov),
222     $                       f1t,f2t,
223     $                       f3t,f4t,nocc,nvir)
224c
225                        if (iprt.gt.50)then
226                           call prtfmat(f1n,f1t,f2n,f2t,f3n,f3t,f4n,
227     $                          f4t, nvir)
228                        end if
229c
230                        eaijk=eorb(ncor+i)+eorb(ncor+j)+eorb(ncor+k)
231     $                       -eorb(a)
232c
233                        call ccsd_tengy(f1n,f1t,f2n,f2t,f3n,f3t,f4n,
234     $                       f4t,
235     &                       Dja(1+(i-1)*nvir),Djia(1+(i-1)*nvir),
236     $                       t1((k-1)*nvir+1),eorb,eaijk,emp4,emp5,
237     $                       ncor,nocc,nvir)
238c
239                        if (i.ne.k)then
240                           call ccsd_tengy(f1t,f1n,f2t,f2n,
241     $                          f3t,f3n,f4t,f4n,
242     $                          Dja(1+(k-1)*nvir),Djka(1+(k-klo)*nvir),
243     $                          t1((i-1)*nvir+1),eorb,eaijk,emp4,emp5,
244     $                          ncor,nocc,nvir)
245c
246                        end if
247                     end do
248                  end do
249                  if (iprt.gt.50)then
250                     write(6,1234)iam,a,j,emp4,emp5
251 1234                format(' iam aijk',3i5,2e15.5)
252                  end if
253                  next=nxtask(nodes, 1)
254               end if
255            end do
256            if(ga_nodeid().eq.0) then
257               write(6,4321) ' ccsd(t): done ',
258     A              a-(ncor+nocc)+((klo-1)/kchunk)*nvir,
259     O              ' out of ',(nocc/kchunk)*nvir,
260     O              ' progress: ',
261     O              ((a-(ncor+nocc)+((klo-1)/kchunk)*nvir)*100d0)/
262     D              ((nocc/kchunk)*nvir),
263     P              '%'
264 4321          format(a,i8,a,i8,a,f6.1,a1)
265                  endif
266         end do
267      end do
268c
269c
270      next=nxtask(-nodes, 1)
271      call ga_sync()
272      if (occsdps) then
273         call pstat_off(ps_trpdrv)
274      else
275         call qexit('trpdrv',0)
276      endif
277c
278      end
279
280      subroutine ccsd_doooo(eintc,eintx,t2c,t2x,f1,f2,f3,f4,nocc,nvir)
281      implicit none
282      double precision eintc(*),eintx(*),t2c(*),t2x(*),
283     &                 f1(*),f2(*),f3(*),f4(*)
284      integer nocc,nvir
285      double precision one
286      data one/1.0d00/
287#include "ccsdps.fh"
288      if (occsdps) then
289         call pstat_on(ps_doooo)
290      else
291         call qenter('doooo',0)
292      endif
293c
294c      f1n(b,c)=f1n(b,c)-t2(b,l,a,i)*vooo(c,k,l,j)
295      call dgemm('n','n',nvir,nvir,nocc,-one,t2c,nvir,
296     &           eintx,nocc,one,f1,nvir)
297c      f2n(b,c)=f2n(b,c)-t2(a,l,b,i)*vooo(c,k,l,j)
298      call dgemm('n','n',nvir,nvir,nocc,-one,t2x,nvir,
299     &           eintx,nocc,one,f2,nvir)
300c      f3n(b,c)=f3n(b,c)-t2(b,l,a,i)*vooo(c,j,l,k)
301      call dgemm('n','n',nvir,nvir,nocc,-one,t2c,nvir,
302     &           eintc,nocc,one,f3,nvir)
303c      f4n(b,c)=f4n(b,c)-t2(a,l,b,i)*vooo(c,j,l,k)
304      call dgemm('n','n',nvir,nvir,nocc,-one,t2x,nvir,
305     &           eintc,nocc,one,f4,nvir)
306      if (occsdps) then
307         call pstat_off(ps_doooo)
308      else
309         call qexit('doooo',0)
310      endif
311      return
312      end
313
314
315
316
317
318
319      subroutine ccsd_dovvv(fintc,fintx,t2,f1,f2,f3,f4,nocc,nvir)
320      implicit none
321      double precision fintc(*),fintx(*),t2(*),f1(*),f2(*),f3(*),f4(*)
322      integer nocc,nvir
323      double precision one,zero
324      data one/1.0d00/
325      data zero/0.0d00/
326#include "ccsdps.fh"
327      if (occsdps) then
328         call pstat_on(ps_dovvv)
329      else
330         call qenter('dovvv',0)
331      endif
332c
333c --  f1n(b,c)=f1n(b,c)+vvvo(b,d,a,i)*t2(d,j,c,k)
334      call dgemm('n','t',nvir,nvir,nvir,one,fintc,nvir,
335     &           t2,nvir,one,f1,nvir)
336c --  f2n(b,c)=f2n(b,c)+vvvo(a,d,b,i)*t2(d,j,c,k)
337      call dgemm('n','t',nvir,nvir,nvir,one,fintx,nvir,
338     &           t2,nvir,one,f2,nvir)
339c --  f3n(b,c)=f3n(b,c)+vvvo(b,d,a,i)*t2(d,k,c,j)
340      call dgemm('n','n',nvir,nvir,nvir,one,fintc,nvir,
341     &           t2,nvir,one,f3,nvir)
342c --  f4n(b,c)=f4n(b,c)+vvvo(a,d,b,i)*t2(d,k,c,j)
343      call dgemm('n','n',nvir,nvir,nvir,one,fintx,nvir,
344     &           t2,nvir,one,f4,nvir)
345      if (occsdps) then
346         call pstat_off(ps_dovvv)
347      else
348         call qexit('dovvv',0)
349      endif
350      return
351      end
352
353      subroutine ccsd_getdjka(djka,
354     G     g_objv,nvir,lnov,
355     K     j,av,klo,khi)
356      implicit none
357#include "mafdecls.fh"
358#include "global.fh"
359      double precision djka(*)
360      integer g_objv, nvir,lnov
361      integer av,klo,khi,j
362c
363      integer k,l_kk,k_kk,lenk,start,end,iptr,i,startc
364      logical failed
365c
366      start = 1 + (j-1)*nvir + (klo-1)*lnov
367      end =  (j-1)*nvir + (khi)*lnov
368      lenk=end-start+1
369      lenk=min(lenk,(ma_inquire_avail(MT_DBL)*66)/100)
370crestrict to 5M to avoid OOMs
371      lenk=min(lenk,5000000)
372 1    continue
373      failed=.not.ma_push_get(MT_Dbl,lenk,
374     Q     'tdjka',l_kk,k_kk)
375      if(failed) then
376         lenk=lenk/2
377         goto 1
378      endif
379c     round it to the nearest multiple of lnov
380      lenk=(lenk/lnov)*lnov
381      iptr=1
382      failed=.false.
383 2    continue
384      call ccsd_cpyloop(g_objv,start,end,lenk,
385     V     av,nvir,lnov, iptr,
386     A     dbl_mb(k_kk),djka)
387      if(start-1.ne.end)then
388         if(failed)
389     A     call errquit('getdjka:error ', 0, 0)
390         lenk=end-start+1
391         goto 2
392      endif
393      if (.not.ma_pop_stack(l_kk))
394     &     call errquit('ccsdgetdjka: cannot pop stack',11,0)
395
396      return
397      end
398      subroutine ccsd_cpyloop(g_objv,start,end,lenk,
399     V     av,nvir,lnov, iptr,
400     A     kk,djka)
401      implicit none
402      integer g_objv,start,end,lenk,
403     V     av,nvir,lnov
404      double precision kk(*),djka(*)
405c
406      integer startc,iptr,i,k
407c
408      do i=1,(end-start+1)/lenk
409         call ga_get(g_objv,start,start+lenk-1,av,av,
410     $        kk,lenk)
411         startc=1
412         do k = 1,lenk/lnov
413            call dcopy(nvir,kk(startc),1,Djka(iptr),1)
414            iptr=iptr+nvir
415            startc=startc+lnov
416         enddo
417         start=start+lenk
418      enddo
419      return
420      end
421
422
423
424
425