1ckbn ccsd_t.F modified to tce_mrcc_ccsdpt_uncoup_pt3.F
2
3       SUBROUTINE tce_mrcc_ccsdpt_subg(d_t1,k_t1_offset,
4     +                           d_t2,k_t2_offset,
5     +                           d_v2,k_v2_offset,
6     +                           d_f1,
7     +                           k_f1_offset,
8c     +                          energy1,energy2,energy3,
9     +                           energy3,
10     1                           iref)
11c     1                           iref,nref)
12
13      IMPLICIT NONE
14#include "mafdecls.fh"
15#include "tcgmsg.fh"
16#include "global.fh"
17#include "bas.fh"
18#include "geom.fh"
19#include "rtdb.fh"
20#include "sym.fh"
21#include "util.fh"
22#include "msgids.fh"
23#include "stdio.fh"
24#include "sf.fh"
25#include "inp.fh"
26#include "errquit.fh"
27#include "tce.fh"
28#include "tce_main.fh"
29#include "tce_hetio.fh"
30#include "tce_diis.fh"
31#include "tce_prop.fh"
32#include "tce_restart.fh"
33#include "tce_mrcc.fh"
34#include "offl.fh"
35c      integer nref
36      integer iref
37      integer d_t1
38      integer k_t1_offset
39      integer d_t2
40      integer k_t2_offset
41      integer d_v2
42      integer k_v2_offset
43      integer t_h1b, t_h1
44      integer t_h2b, t_h2
45      integer t_h3b, t_h3
46      integer t_p4b, t_p4
47      integer t_p5b, t_p5
48      integer t_p6b, t_p6
49      integer k_singles,l_singles
50      integer k_doubles,l_doubles
51ckbn-2
52c      integer k_q3fnt2,l_q3fnt2
53      integer size,i,j
54      integer nxtask
55      integer next
56      integer nprocs
57      integer count
58      integer offset_p4,offset_p5,offset_p6
59      integer offset_h1,offset_h2,offset_h3
60      integer range_p4,range_p5,range_p6
61      integer range_h1,range_h2,range_h3
62      double precision energy(3)
63c      double precision energy1,energy2
64ckbn-2
65      double precision energy3
66      double precision factor,denom
67      double precision denom_p4,denom_p5,denom_p6
68      double precision denom_h1,denom_h2,denom_h3
69      external nxtask
70ckbn-3
71      logical nodezero
72      integer d_f1,k_f1_offset
73      double precision dnorm1,dnorm2,dmaxt1,dmaxt2
74      double precision dmint1,dmint2
75      integer ioff
76      integer orbspin(6),orbindex(6),off(6),blck(6)
77      integer itype,idim1, idim2(2),idim3,ndim
78      INTEGER NXTASKsub
79      EXTERNAL NXTASKsub
80
81ckbn-2
82      nodezero=(ga_nodeid().eq.0)
83
84      if(lusesub) then
85      nprocs = GA_pgroup_NNODES(mypgid)
86      count = 0
87      next = NXTASKsub(nprocs,1,mypgid)
88      else
89      nprocs = GA_NNODES()
90      count = 0
91      next = nxtask(nprocs,1)
92      endif
93c
94      energy(1)=0.0d0
95      energy(2)=0.0d0
96      energy(3)=0.0d0
97c      energy1  =0.0d0
98c      energy2  =0.0d0
99      energy3  =0.0d0
100c
101c MIC
102c
103#ifdef USE_OFFLOAD
104      triplesx_alloced=.false.
105      v2sub_alloced=.false.
106      t1sub_alloced=.false.
107      t2sub_alloced=.false.
108#endif
109c     estimate triplesx size
110      range_p4=0
111      do t_p4b = noab+1,noab+nvab
112         range_p4 = max(range_p4,int_mb(k_range+t_p4b-1))
113      enddo
114      range_h1=0
115      do t_h1b = 1,noab
116         range_h1 = max(range_h1,int_mb(k_range+t_h1b-1))
117      enddo
118      size=(range_p4**3)*(range_h1**3)
119      if (.not.MA_PUSH_GET(mt_dbl,size,'(T) singles',l_singles,
120     1     k_singles)) call errquit('ccsd_t: MA error sgl',
121     2     size,MA_ERR)
122      if (.not.MA_PUSH_GET(mt_dbl,size,'(T) doubles',l_doubles,
123     1     k_doubles)) call errquit('ccsd_t: MA error dbl',
124     2     size,MA_ERR)
125#ifdef USE_OFFLOAD
126      triplesx_copyback=.true.
127c      call util_align64(size)
128      triplesx_mxlgth=size
129      call offl_alloc(dbl_mb(k_doubles),size)
130      triplesx_alloced=.true.
131#endif
132c     stagger start of loop
133      call util_mpinap(100)
134c
135c
136      do t_p4b = noab+1,noab+nvab
137       range_p4 = int_mb(k_range+t_p4b-1)
138       offset_p4 = k_evl_sorted+int_mb(k_offset+t_p4b-1)-1
139       do t_p5b = t_p4b,noab+nvab
140        range_p5 = int_mb(k_range+t_p5b-1)
141        offset_p5 = k_evl_sorted+int_mb(k_offset+t_p5b-1)-1
142        do t_p6b = t_p5b,noab+nvab
143         range_p6 = int_mb(k_range+t_p6b-1)
144         offset_p6 = k_evl_sorted+int_mb(k_offset+t_p6b-1)-1
145         do t_h1b = 1,noab
146          range_h1 = int_mb(k_range+t_h1b-1)
147          offset_h1 = k_evl_sorted+int_mb(k_offset+t_h1b-1)-1
148          do t_h2b = t_h1b,noab
149           range_h2 = int_mb(k_range+t_h2b-1)
150           offset_h2 = k_evl_sorted+int_mb(k_offset+t_h2b-1)-1
151           do t_h3b = t_h2b,noab
152            range_h3 = int_mb(k_range+t_h3b-1)
153            offset_h3 = k_evl_sorted+int_mb(k_offset+t_h3b-1)-1
154            if (next.eq.count) then
155            if (int_mb(k_spin+t_p4b-1)
156     1         +int_mb(k_spin+t_p5b-1)
157     2         +int_mb(k_spin+t_p6b-1)
158     3      .eq.int_mb(k_spin+t_h1b-1)
159     4         +int_mb(k_spin+t_h2b-1)
160     5         +int_mb(k_spin+t_h3b-1)) then
161            if ((.not.restricted).or.
162     1         (int_mb(k_spin+t_p4b-1)
163     1         +int_mb(k_spin+t_p5b-1)
164     2         +int_mb(k_spin+t_p6b-1)
165     3         +int_mb(k_spin+t_h1b-1)
166     4         +int_mb(k_spin+t_h2b-1)
167     5         +int_mb(k_spin+t_h3b-1).le.8)) then
168            if (ieor(int_mb(k_sym+t_p4b-1),
169     1          ieor(int_mb(k_sym+t_p5b-1),
170     2          ieor(int_mb(k_sym+t_p6b-1),
171     3          ieor(int_mb(k_sym+t_h1b-1),
172     4          ieor(int_mb(k_sym+t_h2b-1),
173     5               int_mb(k_sym+t_h3b-1)))))).eq.0) then
174
175
176            size = range_p4 * range_p5 * range_p6
177     3           * range_h1 * range_h2 * range_h3
178c MIC
179c zeroing ---
180cc        call dcopy(size, 0.0d0, 0, dbl_mb(k_singles), 1)
181cc        call offl_zerofill(dbl_mb(k_singles),size)
182cc        call dcopy(size, 0.0d0, 0, dbl_mb(k_doubles), 1)
183cc        call offl_zerofill(dbl_mb(k_doubles),size)
184        call dcopy(triplesx_mxlgth, 0.0d0, 0, dbl_mb(k_singles), 1)
185
186        call dcopy(triplesx_mxlgth, 0.0d0, 0, dbl_mb(k_doubles), 1)
187#ifdef USE_OFFLOAD
188        call offl_zerofill(dbl_mb(k_doubles),triplesx_mxlgth)
189#endif
190c -----------
191c
192ccx            if (.not.MA_PUSH_GET(mt_dbl,size,'(T) singles',l_singles,
193ccx     1        k_singles)) call errquit('ccsd_t: MA error',1,MA_ERR)
194ccx            if (.not.MA_PUSH_GET(mt_dbl,size,'(T) doubles',l_doubles,
195ccx     1        k_doubles)) call errquit('ccsd_t: MA error',2,MA_ERR)
196ccx            do i = 0, size-1
197ccx              dbl_mb(k_singles+i) = 0.0d0
198ccx              dbl_mb(k_doubles+i) = 0.0d0
199ccx            enddo
200cx           call tce_mrcc_q3vnt2(dbl_mb(k_doubles),d_t2,d_v2,k_t2_offset,
201cx     1        k_v2_offset,t_h1b,t_h2b,t_h3b,t_p4b,t_p5b,t_p6b,2)
202            call ccsd_t_doubles_l(dbl_mb(k_doubles),
203     1        d_t2,d_v2,k_t2_offset,
204     1        k_v2_offset,t_h1b,t_h2b,t_h3b,t_p4b,t_p5b,t_p6b,2)
205           call tce_mrcc_q3vnt1_subg(dbl_mb(k_singles),d_t1,d_v2,
206     +        k_t1_offset,
207     1        k_v2_offset,t_h1b,t_h2b,t_h3b,t_p4b,t_p5b,t_p6b,2)
208           call tce_mrcc_q3fnt2_subg(dbl_mb(k_singles),d_f1,d_t2,
209     +           k_f1_offset,
210     +           k_t2_offset,t_h1b,t_h2b,t_h3b,t_p4b,t_p5b,t_p6b,2)
211
212            if (restricted) then
213              factor = 2.0d0
214            else
215              factor = 1.0d0
216            endif
217            if ((t_p4b.eq.t_p5b).and.(t_p5b.eq.t_p6b)) then
218              factor = factor / 6.0d0
219            else if ((t_p4b.eq.t_p5b).or.(t_p5b.eq.t_p6b)) then
220              factor = factor / 2.0d0
221            endif
222            if ((t_h1b.eq.t_h2b).and.(t_h2b.eq.t_h3b)) then
223              factor = factor / 6.0d0
224            else if ((t_h1b.eq.t_h2b).or.(t_h2b.eq.t_h3b)) then
225              factor = factor / 2.0d0
226            endif
227c
228c factor = [ 1/36, 1/18, 1/12, 1/6, 1/4, 1/3, 1/2, 1, 2]
229c
230            i = 0
231            do t_p4 = 1, range_p4
232             denom_p4 = dbl_mb(offset_p4+t_p4)
233             do t_p5 = 1, range_p5
234              denom_p5 = dbl_mb(offset_p5+t_p5)
235              do t_p6 = 1, range_p6
236               denom_p6 = dbl_mb(offset_p6+t_p6)
237               do t_h1 = 1, range_h1
238                denom_h1 = dbl_mb(offset_h1+t_h1)
239                do t_h2 = 1, range_h2
240                 denom_h2 = dbl_mb(offset_h2+t_h2)
241                 do t_h3 = 1, range_h3
242                  denom_h3 = dbl_mb(offset_h3+t_h3)
243
244cxx
245                  orbspin(1) = int_mb(k_spin+t_p4b-1)-1
246                  orbspin(2) = int_mb(k_spin+t_p5b-1)-1
247                  orbspin(3) = int_mb(k_spin+t_p6b-1)-1
248                  orbspin(4) = int_mb(k_spin+t_h1b-1)-1
249                  orbspin(5) = int_mb(k_spin+t_h2b-1)-1
250                  orbspin(6) = int_mb(k_spin+t_h3b-1)-1
251
252                  orbindex(1) = (1-orbspin(1)+int_mb(k_mo_indexm(iref)+
253     1           int_mb(k_offset+t_p4b-1)+t_p4-1))/2
254                  orbindex(2) = (1-orbspin(2)+int_mb(k_mo_indexm(iref)+
255     1           int_mb(k_offset+t_p5b-1)+t_p5-1))/2
256                  orbindex(3) = (1-orbspin(3)+int_mb(k_mo_indexm(iref)+
257     1           int_mb(k_offset+t_p6b-1)+t_p6-1))/2
258                  orbindex(4) = (1-orbspin(4)+int_mb(k_mo_indexm(iref)+
259     1           int_mb(k_offset+t_h1b-1)+t_h1-1))/2
260                  orbindex(5) = (1-orbspin(5)+int_mb(k_mo_indexm(iref)+
261     1           int_mb(k_offset+t_h2b-1)+t_h2-1))/2
262                  orbindex(6) = (1-orbspin(6)+int_mb(k_mo_indexm(iref)+
263     1           int_mb(k_offset+t_h3b-1)+t_h3-1))/2
264
265        orbindex(1) = moindexes(orbindex(1),orbspin(1)+1,iref)
266        orbindex(2) = moindexes(orbindex(2),orbspin(2)+1,iref)
267        orbindex(3) = moindexes(orbindex(3),orbspin(3)+1,iref)
268        orbindex(4) = moindexes(orbindex(4),orbspin(4)+1,iref)
269        orbindex(5) = moindexes(orbindex(5),orbspin(5)+1,iref)
270        orbindex(6) = moindexes(orbindex(6),orbspin(6)+1,iref)
271
272
273      if(isactive(orbindex(1),orbspin(1)+1).and.
274     1 isactive(orbindex(2),orbspin(2)+1) .and.
275     2 isactive(orbindex(3),orbspin(3)+1) .and.
276     3 isactive(orbindex(4),orbspin(4)+1) .and.
277     4 isactive(orbindex(5),orbspin(5)+1) .and.
278     5 isactive(orbindex(6),orbspin(6)+1)) then
279        dbl_mb(k_doubles+i) = 0.0d0
280       endif
281cxx
282
283                  if(.not.lusesamefock_nonit) then
284                  denom = 1.0d0 / ((denom_h1 + denom_h2 + denom_h3)
285     +                           - (denom_p4 + denom_p5 + denom_p6))
286
287
288                  else
289                  orbspin(1) = int_mb(k_spin+t_p4b-1)-1
290                  orbspin(2) = int_mb(k_spin+t_p5b-1)-1
291                  orbspin(3) = int_mb(k_spin+t_p6b-1)-1
292                  orbspin(4) = int_mb(k_spin+t_h1b-1)-1
293                  orbspin(5) = int_mb(k_spin+t_h2b-1)-1
294                  orbspin(6) = int_mb(k_spin+t_h3b-1)-1
295
296                  orbindex(1) = (1-orbspin(1)+int_mb(k_mo_indexm(iref)+
297     1           int_mb(k_offset+t_p4b-1)+t_p4-1))/2
298                  orbindex(2) = (1-orbspin(2)+int_mb(k_mo_indexm(iref)+
299     1           int_mb(k_offset+t_p5b-1)+t_p5-1))/2
300                  orbindex(3) = (1-orbspin(3)+int_mb(k_mo_indexm(iref)+
301     1           int_mb(k_offset+t_p6b-1)+t_p6-1))/2
302                  orbindex(4) = (1-orbspin(4)+int_mb(k_mo_indexm(iref)+
303     1           int_mb(k_offset+t_h1b-1)+t_h1-1))/2
304                  orbindex(5) = (1-orbspin(5)+int_mb(k_mo_indexm(iref)+
305     1           int_mb(k_offset+t_h2b-1)+t_h2-1))/2
306                  orbindex(6) = (1-orbspin(6)+int_mb(k_mo_indexm(iref)+
307     1           int_mb(k_offset+t_h3b-1)+t_h3-1))/2
308
309                 do j=1,6
310                  blck(j) = orbinblck(orbindex(j),orbspin(j)+1,1)
311                  off(j) = offsetinblck(orbindex(j),orbspin(j)+1,1)
312                 enddo
313
314                denom = 1.0d0/(
315     +                  -dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1)
316     +                         +blck(1)-1)+off(1))
317     +                  -dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1)
318     +                         +blck(2)-1)+off(2))
319     +                  -dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1)
320     +                         +blck(3)-1)+off(3))
321     +                  +dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1)
322     +                         +blck(4)-1)+off(4))
323     +                  +dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1)
324     +                         +blck(5)-1)+off(5))
325     +                  +dbl_mb(k_evl_sortedm(1)+int_mb(k_offsetm(1)
326     +                         +blck(6)-1)+off(6)))
327                endif ! lusesamefock_nonit
328
329                if((abs(denom)).gt.1.0d2) then
330c                 if(nodezero) write(LuOut,*)
331                 write(LuOut,*)
332     +           'Warning:Denominator is very low. 1/D=',abs(denom)
333c                 if(nodezero) call util_flush(LuOut)
334                endif
335
336
337c                  energy1 = energy1 + factor*denom
338c     1                    * dbl_mb(k_doubles+i)*dbl_mb(k_doubles+i)
339c                  energy2 = energy2 + factor*denom*dbl_mb(k_doubles+i)
340c     1                    * (dbl_mb(k_doubles+i)+dbl_mb(k_singles+i))
341                  energy3 = energy3 + factor*denom*dbl_mb(k_doubles+i)
342     +                    * (dbl_mb(k_doubles+i)+dbl_mb(k_singles+i))
343
344
345                  i = i + 1
346                 enddo
347                enddo
348               enddo
349              enddo
350             enddo
351            enddo
352c            if (.not.MA_POP_STACK(l_q3fnt2))
353c     1        call errquit('ccsd_t',2,MA_ERR)
354ccx            if (.not.MA_POP_STACK(l_doubles))
355ccx     1        call errquit('ccsd_t',3,MA_ERR)
356ccx            if (.not.MA_POP_STACK(l_singles))
357ccx     1        call errquit('ccsd_t',4,MA_ERR)
358            endif
359            endif
360            endif
361c            ENDIF
362            if(lusesub) then
363            next = NXTASKsub(nprocs,1,mypgid)
364            else
365            next = nxtask(nprocs,1)
366            endif
367            endif
368            count = count + 1
369           enddo
370          enddo
371         enddo
372        enddo
373       enddo
374      enddo
375c
376c
377#ifdef USE_OFFLOAD
378      call offl_free(dbl_mb(k_doubles),size)
379      triplesx_alloced=.false.
380#if 0
381      call offl_free(dbl_mb(k_singles),size)
382#endif
383#endif
384
385      if (.not.MA_POP_STACK(l_doubles))
386     1     call errquit('ccsd_t doubles',3,MA_ERR)
387      if (.not.MA_POP_STACK(l_singles))
388     1     call errquit('ccsd_t singles',4,MA_ERR)
389c
390c
391      if(lusesub) then
392      next = NXTASKsub(-nprocs,1,mypgid)
393      call ga_pgroup_sync(mypgid)
394      else
395      next = nxtask(-nprocs,1)
396      call ga_sync()
397      endif
398c      energy(1) = energy1
399c      energy(2) = energy2
400      energy(3) = energy3
401      if(lusesub) then
402c     call ga_pgroup_dgop(p_handle, type, buf, lenbuf, op)
403      call ga_pgroup_dgop(mypgid, mt_dbl,energy,3,'+')
404      else
405      call ga_dgop(mt_dbl,energy,3,'+')
406      endif
407c      energy1 = energy(1)
408c      energy2 = energy(2)
409      energy3 = energy(3)
410
411c      if(lusesub)then
412c      if(ga_pgroup_nodeid(mypgid).eq.0) then
413cc      write(*,'(A,F17.10,A,F17.10,A,F17.10)')"energy1 ",energy1,
414cc     +   " energy2 ",energy2," energy3 ",energy3
415c      write(*,'(A,F17.10,A,F17.10,A,F17.10)')"energy3 ",energy3
416c      endif
417c      else
418c      if(nodezero) then
419cc      write(*,'(A,F17.10,A,F17.10,A,F17.10)')"energy1 ",energy1,
420cc     +   " energy2 ",energy2," energy3 ",energy3
421c      write(*,'(A,F17.10,A,F17.10,A,F17.10)')"energy3 ",energy3
422c      endif
423c      endif
424
425      return
426      end
427c $Id$
428