1      subroutine uccsdt_cterm(urange,vrange,qO_handles,qV_handles)
2c
3c$Id$
4c
5      implicit none
6#include "mafdecls.fh"
7#include "bas.fh"
8#include "global.fh"
9#include "cuccsdtP.fh"
10c
11      integer urange(2,0:7), vrange(2,0:7)
12      integer qO_handles(0:7,8), qV_handles(0:7,8)
13c
14c     i,j = particle-transformed occupied orbitals
15c     m,n = hole-transformed occupied orbitals
16c     a,b,e,f = holes
17c     u,v = symmetry-adapted occupied orbitals
18c
19c     u - the SOs arising from the unique AO shells in the
20c     .   range ushuqlo, ushuqhi
21c     All other indices span the complete range.
22c
23c     IN CCTRANS:
24c     #  Integral        Spins           Storage  Used for
25c     -- --------------- --------------- -------- --------------
26c     1. (ui|vj)-(uj|vi) i=alpha j=alpha I(jv,iu) Z1,Z6
27c     2. (ui|vj)         i=alpha j=beta           Z2,Z7
28c     3. (ui|vj)         i=beta  j=alpha          Z5,Z8
29c     4. (ui|vj)-(uj|vi) i=beta  j=beta           Z3,Z4
30c
31c     IN CTERM:
32c     Lists 1,2,3,4 are stored as the amplitudes but as if u (SO)
33c     were a virtual orbital in a reduced space.  And each symmetry
34c     block of jb needs to be in a distinct GA (or copied there when
35c     used).
36c
37c     (note v->b must be done before fn loop)
38c
39c     1. (iu|jb) i=a=alpha j=b=alpha   antisymmetrized
40c     2. (iu|jb) i=a=alpha j=b=beta
41c     3. (iu|jb) i=a=beta  j=b=alpha
42c     4. (iu|jb) i=a=beta  j=b=beta    antisymmetrized
43c
44c     IN CCTRANS:
45c     9. (ui|vn)-(uv|in) i=alpha n=alpha I(nv,iu) aaC1,abC4
46c     10.(ui|vn)         i=alpha n=beta           abC1,bbC2
47c     11.(ui|vn)         i=beta  n=alpha          abC5,aaC2
48c     12.(ui|vn)-(uv|in) i=beta  n=beta           abC1,abC2
49c
50c     13.(uv|in)         i=alpha n=alpha I(nv,iu) abC3
51c     14.(uv|in)         i=beta  n=beta           abC5
52c
53c     IN CTERM:
54c     Lists 9,10,11,12,13,14 also stored as the amplitudes.
55c     The antisymmetrization must take place before the
56c     final index transformation of lists 13 and 14.
57c
58c     9. I(iu,nf) = <if||un> = (iu|nf) - (in|uf) i=n=alpha n=f=alpha
59c     10.I(iu|nf) = <if|un>  = (iu|nf) i=u=alpha n=f=beta
60c     11.I(iu|nf) = <if|un>  = (iu|nf) i=u=beta  n=f=alpha
61c     12.I(iu,nf) = <if||un> = (iu|nf) - (in|uf) i=beta  n=beta
62c     13.(in|uv) i=n=alpha
63c     14.I(iu,nf) = <fi|un> = (in|uf) i=n=beta f=u=alpha
64c
65c     IN CCTRANS:
66c     The occ-SO pairs are stored as follows
67c
68c     fill oso_off with -99999999
69c     ind = 0
70c     do symiu
71c     .  do u in natural order
72c     .     -> symu and symi
73c     .     oso_off(u,symiu) = ind
74c     .     do i of symi
75c     .        pair(1+ind) = T(i,u)
76c     .        ind = ind + 1
77c
78c     Can also address pairs as
79c     pair(1 + i-o_sym(1,symi,spini) + oso_off(u,symiu)) = T(i,u)
80c
81c     The lists will be used in large matrix multiplications
82c     as follows:
83c
84c     Z(iu,nw) = <ij|uv>*T(jv,nw)
85c     do sym(iu) -> sym(jv) -> sym(nw)
86c     .   Read T, allocate Z
87c     .   Z(iu,nw) <- I(jv,iu)*T(jv,nw)
88c     end do
89c
90c     For best performance we need the symmetry sub-blocks distributed
91c     across the whole machine as separate dense arrays.  Thus, each
92c     list is stored with a separate GA for each pair symmetry.
93c
94
95c
96c     STORED LIKE T(nv,mu) -> (um||vn) -> <uv||mn>
97c     Have lists 5-8 <uv|mn> distributed over mn ... transform
98c     uv to ef, and accumlate <ef|mn> to disk.
99c     <ef||mn> -> R(me,nf)
100c
101
102c
103c        generate T(iu,nf) aa,aa; ab,ab; ab,ba; ba,ab; ba,ba; bb,bb;
104c        T stored as T(ia,jb,irrep) = T(i,a,symj,j,b,symb,symjb)
105c        getT2 = uccsdt_ampfile_read_t2(file,spini,spina,spinj,spinb,
106c                symjb,blo,bhi,g_t2,ocreate,distribution=block or column)
107c
108
109c
110c     Pure aa C terms (e=f=m=n=alpha)
111c
112
113C
114C     GENERATION OF C-TERMS OF ALPHA-ALPHA SPIN-BLOCK.
115C     Z-INTERMEDIATES ARE USED TO GENERATE NECESSARY Q-INTERMEDIATES FOR E-TERMS
116C
117      do symnf = 0, nir-1
118         symiu = symnf
119         symme = symnf
120         symjv = symiu
121c
122c     we need a subroutine to get the correct t-block and to do the transformation
123c     collect z-terms with tjbnfmix and tjbnfpure
124c     deallocate t-terms
125c     generate q-terms
126c     now generate tiumepure and tiumemix and combine with Z-terms into r-term
127c     deallocate t-terms
128c
129         dimnf = ov_len(symnf,1,1)
130         dimiu_a = iu_len(symiu,1)
131         dimiu_b = iu_len(symiu,2)
132         dimjv_a = iso_len(symjv,1)
133         dimjv_b = iso_len(symjv,2)
134c        allocate TJBNFmix
135c        get T(jb,nf,symnf) or T(jv,nf,symnf) j=b=beta n=f=alpha
136         call get_T(g_t2_jbnf_mix,symnf,2,2,1,1,urange)
137c
138c     Z2(iu,nf) = <ij|ub>*T(jb,nf) i=u=alpha j=b=beta (integral list 2)
139c     Z2(iu,nf) = I(jv,iu)^t * T(jv,nf)
140c     Q2 from Z2
141c     <if||un> += Z2 i=u=f=n=alpha (integral list 9 modified) = I(nf,iu)
142c
143         if (.not. ga_create(MT_DBL,dimnf,dimiu_a,'z2',0,0,g_z2)
144     &      call errquit('cterm: alloc z2 failed',0,0)
145         call ga_dgemm('t','n',dimnf,dimiu_a,dimjv_b,1.0d0,
146     &                 g_t2_jbnf_mix,list(symiu,2),0.0d0,g_z2)
147         call make_qO(qO_handles(0,2),g_z2,symnf,1,1)
148         call make_qV(qV_handles(0,2),g_z2,symnf,1,1)
149         call oso_moindx2_add(g_z2,list(symnf,9),symnf,1,1,1,1)
150c
151c     Z3(iu,nf) = <ij||ub> * T(jb,nf)  i=u=j=b=beta  (integral list 4)
152c     R(me,nf)  = T(iu,me)*[<fi|nu> + 0.5*Z3(iu,nf)]     (integral list 11)
153c
154         if (.not. ga_create(MT_DBL,dimnf,dimiu_b,'z3',0,0,g_z3)
155     &       call errquit('cterm: alloc z3 failed',0,0)
156         call ga_dgemm('t','n',dimnf,dimiu_b,dimjv_b,1.0d0,
157     &      g_t2_jbnf_mix,list(symiu,4),0.0d0,g_z3)
158         if (.not. ga_destroy(g_t2_jbnf_mix)) call
159     &       errquit('cterm: dealloc g_t2_jbnf_mix failed',0)
160         call ga_scale(g_z3,0.5d0)
161         call oso_moindx2_add(g_z3,list(symnf,11),symnf,1,1,2,2)
162c        MxM T*Z3 -> R(me,nf) all me, nf in block
163c        get T(me,iu,symiu/symme)
164         call get_T(g_t2_iume_mix,symiu,1,1,2,2,urange)
165         call ga_dgemm('n','t',dimme,dimnf,dimiu_b,1.0d0,
166     &      g_t2_iume_mix,g_z3,1.0d0,g_raa)
167         if (.not. ga_destroy(g_t2_iume_mix)) call
168     &       errquit('cterm: dealloc g_t2_iume_mix failed',0)
169         if (.not. ga_destroy(g_z3)) call
170     &       errquit('cterm: dealloc g_z3 failed',0)
171
172c     Here b could be v and T(jv,nf)
173c     Z1(iu,nf) = <ij||ub>*T(jb,nf) i=j=u=b=n=f=alpha (integral list 1)
174c     Q1
175c     R(me,nf) = T(iu,me)*(<if||un>+Z2(iu,nf)+0.5*Z1(iu,nf))
176c     <if||un> += Z2(iu,nf) (modified Z2)
177c
178         call get_T(g_t2_jbnf_pure,symnf,1,1,1,1,urange)
179         if (.not. ga_create(MT_DBL,dimnf,dimiu_a,'z1',0,0,g_z1,
180     &      call errquit('cterm: alloc z1 failed',0,0)
181         call ga_dgemm('t','n',dimnf,dimiu_a,dimjb_a,1.0d0,
182     &      g_t2_jbnf_pure,list(symiu,1),0.0d0,g_z1)
183         call make_qO(qO_handles(0,1),g_z1,symnf,1,1)
184         call make_qV(qV_handles(0,1),g_z1,symnf,1,1)
185         call ga_add(0.5d0,g_z1,1.0d0,g_z2,g_z1)
186         call ga_dgemm('n','n',dimnf,dimnf,dimiu_a,1.0d0,
187     &      g_t2_iume_pure,g_z1,1.0d0,g_raa)
188         if (.not. ga_destroy(g_t2_jbnf_pure)) call
189     &       errquit('cterm: dealloc g_t2_jbnf_pure failed',0)
190         if (.not. ga_destroy(g_z2)) call
191     &       errquit('cterm: dealloc g_z2 failed',0)
192         if (.not. ga_destroy(g_z1)) call
193     &       errquit('cterm: dealloc g_z1 failed',0)
194      end do
195C
196C     GENERATION OF C-TERMS OF BETA-BETA SPIN-BLOCK PLUS
197C     GENERATION OF C1- and C2-TERMS OF MIXED SPIN-BLOCK
198C     Z-INTERMEDIATES ARE USED TO GENERATE NECESSARY Q-INTERMEDIATES FOR E-TERMS
199C
200      do symnf = 0, nir-1
201         symiu = symnf
202         symme = symnf
203         symjv = symiu
204         dimnf = ov_len(symnf,2,2)
205         dimiu_a = iu_len(symiu,1)
206         dimiu_b = iu_len(symiu,2)
207         dimjv_a = iso_len(symjv,1)
208         dimjv_b = iso_len(symjv,2)
209c        allocate TIUMEpure
210c        get T(iu,me) i=u=m=e=beta
211         call get_T(g_t2_iume_pure,symme,2,2,2,2,urange)
212c        allocate TIUMEmix
213c        get T(iu,me) i=u=alpha, m=e=beta
214         call get_T(g_t2_iume_mix,symme,1,1,2,2,urange)
215c
216c     Here b could be v and T(jv,nf)
217c     Z5(iu,nf) = <ij|ub>*T(jb,nf) i=u=beta j=b=alpha (integral list 3)
218         if (.not. ga_create(MT_DBL,dimnf,dimiu_b,'z5',0,0,
219     &       g_z5) call errquit('cterm: alloc z5 failed',0,0)
220         call ga_dgemm('t','n',dimnf,dimiu_a,dimjv_b,1.0d0,
221     &                 g_t2_jbnf_mix,list(symiu,3),0.0d0,g_z5)
222         call make_qO(qO_handles(0,5),g_z5,symnf,2,2)
223         call make_qV(qV_handles(0,5),g_z5,symnf,2,2)
224c     <if||un> += Z5(iu,nf) (modified list 12)
225         call oso_moindx2_add(g_z5,list(symnf,12),symnf,2,2,2,2)
226         if (.not. ga_destroy(list(symiu,12))) call
227     &       errquit('cterm: dealloc list 12 failed',0)
228c     Z6(iu,nf) = <ij||ub> * T(jb,nf)  i=u=j=b=alpha  (integral list 1)
229         if (.not. ga_create(MT_DBL,dimnf,dimiu_a,'z6',0,0,g_z6)
230     &       call errquit('cterm: alloc z6 failed',0,0)
231         call ga_dgemm('t','n',dimnf,dimiu_a,dimjv_a,1.0d0,
232     &      g_t2_jbnf_mix,list(symiu,1),0.0d0,g_z6)
233         if (.not. ga_destroy(g_t2_jbnf_mix)) call
234     &       errquit('cterm: dealloc g_t2_jbnf_mix failed',0)
235         if (.not. ga_create(MT_DBL,dimnf,dimiu_a,'z7',0,0,g_z7)
236     &       call errquit('cterm: alloc z7 failed',0,0)
237c     copy z6 to z7 for use in mixed spin
238         call ga_copy(g_z6,g_z7)
239         call ga_scale(g_z6,0.5d0)
240         call oso_moindx2_add(g_z6,list(symnf,10),symnf,2,2,1,1)
241c        MxM T*Z3 -> R(me,nf) all me, nf in block
242c        get T(me,iu,symiu/symme)
243         call get_T(g_t2_iume_mix,symiu,1,1,2,2,urange)
244         call ga_dgemm('n','t',dimme,dimnf,dimiu_a,1.0d0,
245     &      g_t2_iume_mix,g_z6,1.0d0,g_rbb)
246         if (.not. ga_destroy(g_z6)) call
247     &       errquit('cterm: dealloc g_z6 failed',0)
248         if (.not. ga_destroy(g_t2_iume_mix)) call
249     &       errquit('cterm: dealloc g_t2_iume_mix failed',0)
250c     Z4(iu,nf) = <ij||ub>*T(jb,nf) i=j=u=b=n=f=beta (integral list 4)
251         call get_T(g_t2_jbnf_pure,symnf,1,1,1,1,urange)
252         if (.not. ga_create(MT_DBL,dimnf,dimiu_a,'z4',0,0,g_z4,
253     &       call errquit('cterm: alloc z4 failed',0,0)
254         call ga_dgemm('t','n',dimnf,dimiu_a,dimjb_a,1.0d0,
255     &      g_t2_jbnf_pure,list(symiu,4),0.0d0,g_z4)
256         call make_qO(qO_handles(0,4),g_z4,symnf,2,2)
257         call make_qV(qV_handles(0,4),g_z4,symnf,2,2)
258         call ga_add(0.5d0,g_z4,1.0d0,g_z5,g_z5)
259         call ga_dgemm('n','n',dimnf,dimnf,dimiu_a,1.0d0,
260     &      g_t2_iume_pure,g_z5,1.0d0,g_rbb)
261c     need the additional contribution of 1/2 Z4 for C2 term in mixed spin
262         call ga_add(0.5d0,g_z4,1.0d0,g_z5,g_z5)
263         if (.not. ga_destroy(g_t2_jbnf_pure)) call
264     &       errquit('cterm: dealloc g_t2_jbnf_pure failed',0)
265         if (.not. ga_destroy(g_z4)) call
266     &       errquit('cterm: dealloc g_z4 failed',0)
267         if (.not. ga_destroy(list(symiu,1))) call
268     &       errquit('cterm: dealloc list 1 failed',0)
269         if (.not. ga_destroy(list(symiu,4))) call
270     &       errquit('cterm: dealloc list 4 failed',0)
271C
272C   WE WILL DO SOME MIXED TERMS AS WE ARE REUSING SOME BETA Z-BLOCKS
273C
274c
275c g_z5 (which is z4+z5+<if||an>) and g_z7 (which contains z6)
276c are still active. We will use those in C2 and C1 mixed spin terms
277c
278c
279c   C2 += t(me,ia)([if|an> + Z4 + Z5]
280c   term in [] already stored in g_z5, just matmul with t
281c
282         call get_T(g_t2_meia_mix,symnf,1,1,2,2,urange)
283         call ga_dgemm('n','n',dimnf,dimnf,dimiu_b,1.0d0,
284     &      g_t2_iume_pure,g_z5,1.0d0,g_rab)
285         if (.not. ga_destroy(g_t2_jbnf_pure)) call
286     &       errquit('cterm: dealloc g_t2_jbnf_pure failed',0)
287         if (.not. ga_destroy(g_z7)) call
288     &       errquit('cterm: dealloc g_z1 failed',0)
289c
290c   Make Z7 with T is pure beta
291c   Add to g_z6 and matmul with t
292c   C1 += -t(ie,ma)*[<if|an> + Z6 + Z7] with T is pure alpha
293c
294         call get_T(g_t2_jbnf_pure,symnf,2,2,2,2,urange)
295         call ga_dgemm('t','n',dimnf,dimiu_a,dimjv_b,1.0d0,
296     &                 g_t2_jbnf_pure,list(symiu,2),1.0d0,g_z7)
297         call oso_moindx2_add(g_z7,list(symnf,10),symnf,1,1,1,1)
298         call get_T(g_t2_iema_pure,symnf,1,1,1,1,urange)
299         call ga_dgemm('n','n',dimnf,dimnf,dimiu_a,1.0d0,
300     &      g_t2_iume_pure,g_z7,1.0d0,g_rab)
301         if (.not. ga_destroy(g_t2_iema_pure)) call
302     &       errquit('cterm: dealloc g_t2_jbnf_pure failed',0)
303         if (.not. ga_destroy(g_t2_jbnf_pure)) call
304     &       errquit('cterm: dealloc g_t2_jbnf_pure failed',0)
305         if (.not. ga_destroy(g_z7)) call
306     &       errquit('cterm: dealloc g_z1 failed',0)
307         if (.not. ga_destroy(list(symiu,2))) call
308     &       errquit('cterm: dealloc list 2 failed',0)
309         if (.not. ga_destroy(list(symiu,10))) call
310     &       errquit('cterm: dealloc list 10 failed',0)
311      end do
312c
313c
314c     Mixed-spin terms (e=m=alpha n=f=beta)
315c
316C
317C     GENERATION OF C-TERMS OF REMAINING MIXED ALPHA-BETA SPIN BLOCK
318C
319      do symnf = 0, nir-1
320         symiu = symnf
321         symme = symnf
322         symjv = symiu
323         dimnf = ov_len(symnf,2,2)
324         dimnf = ov_len(symnf,2,2)
325         dimiu_a = iu_len(symiu,1)
326         dimiu_b = iu_len(symiu,2)
327         dimjv_a = iso_len(symjv,1)
328         dimjv_b = iso_len(symjv,2)
329c
330c        do blocks nf n=f=beta
331c
332c        allocate R(me,nf)
333c
334c     C4 : I DON'T THINK WE NEED THIS TERM, REARRANGEMENT REMOVED THIS ONE
335c     Another piece of C5 using list 9
336c     R(me,nf) += t(iu,nf)*[<ie||um> + Z2(iu,me)]
337c            I think we have the term inside [] in Z2, only multiply with T
338c     .      e=m=i=u=alpha n=f=beta  (use modified list 9)
339c
340c        allocate TIUNFmix
341c        get T(iu,nf) i=u=alpha, n=f=beta
342         call get_T(g_t2_iunf_mix,symnf,1,1,2,2,urange)
343         MxM I(iu,me)*T(iu,nf) -> R  (list 9)
344         call ga_dgemm('t','n',dimiu,dimnf,dimjb,1.0d0,
345     &      list(symiu,9),g_t2_iunf_mix,1.0d0,g_rab)
346         free TIUNFmix
347c
348c     C5
349c     R(me,nf) += t(iu,nf)*<ei|mu>
350c     .      e=m=alpha  n=f=i=u=beta (list 11)
351c
352c        allocate TIUNFpure
353c        get T(iu,nf) i=u=n=f=beta
354         call get_T(g_t2_iunf_pure,symnf,1,1,1,1,urange)
355c        MxM I(iu,me)*T(iu,nf) -> R (list 11)
356         call ga_dgemm('t','n',dimiu,dimnf,dimjb,1.0d0,
357     &      list(symiu,11),g_t2_iunf_pure,1.0d0,g_rab)
358
359c        accumulate R to disk
360c        free R
361         end do
362c        free lists 9 and 11
363         if (.not. ga_destroy(list(symiu,9))) call errquit()
364         if (.not. ga_destroy(list(symiu,11))) call errquit()
365      end do
366c
367c
368      do symfn = 0, nir-1
369         symme = symfn
370         do symf = 0, nir-1
371            symn = ieor(symfn,symf)
372            do blocks of f
373               allocate & zero R(me,nf)
374               do symi = 0, nir-1
375                  symif = ieor(symi,symf)
376                  allocate T(mu,if) m=u=alpha i=f=beta
377                  get T(mu,if) (read #sym times due to sym(fn) loop)
378c
379c     C5
380c     R(mf,ne) += - t(iu,mf)*<ei|un>
381c                            I(en,iu)
382c     .      e=m=u=alpha n=f=i=beta  (list 14)
383c
384c     Complexity here is that the OV pairs are mixed spin
385c     whereas in most other uses each OV pair is pure spin
386c     so we end up having to do an in-core transpose.
387c
388                  do symm = 0, nir-1
389                     syme = ieor(symme,symm)
390                     symu = syme
391                     allocate R(mf,ne) ... dense 4-D array
392                     allocate T(iu,mf) ... dense 4-D array
393                     transpose T(mu,if) into T(iu,mf)
394                     call ga_transpose(t_array)
395                     MxM T(iu,mf)*I(iu,ne) -> R(mf,ne)
396                     call ga_dgemm('t','n',dimiu,dimnf,dimjb,1.0d0,
397     &                    list(symiu,14),g_t2_jbnf_pure,1.0d0,g_z4)
398                     transpose accumulate R(mf,ne) into R(me,nf)
399                  end do
400                  free T(mu,if)
401               end do
402               accumulate R(me,nf) to disk
403               free R(me,nf)
404            end do
405         end do
406      end do
407c
408      destroy list(symnf,14)
409c
410c     Make Z8
411c
412c     Z8(iu,mf) = <ij|bu> * t(mb,jf) m=ib=alpha f=j=a=beta
413c     .         = I(iu,jb) * T(jb,mf) (transposed list 3)
414c
415      allocate Z8
416      do symjf = 0, nir-1
417         symmb = symjf
418         do symf = 0, nir-1
419            symj = ieor(symjf,symf)
420            do blocks of f
421               allocate T(mb,jf)
422               get T(mb,jf)
423                  do symb = 0, nir-1
424                  symm = ieor(symmb,symb)
425
426
427
428               end do
429            end do
430         end do
431c        do sum_i C_hole(ua) Z8(iu,if) and store in q_8_O(a,f) -> qO_handles(8)
432c        do sum_u C_hole(ua) Z8(iu,ma) and store in q_8_V(i,m) -> qV_handles(8)
433         call make_qO(qO_handles(0,8),g_z8,symjf,0,1)
434         call make_qV(qV_handles(0,8),g_z8,symjf,0,1)
435         destroy list(symnf,3)
436      end do
437c
438c   C3 += -t(ie,na)*[(if|ma) - Z8]
439c
440
441         destroy list(symnf,13)
442      end
443
444      subroutine get_T(g_t2,symjb,spini,spina,spinj,spinb,urange)
445      implicit none
446#include "mafdecls.fh"
447#include "bas.fh"
448#include "global.fh"
449#include "cuccsdtP.fh"
450c
451      integer spini,spina,spinj,spinb,symjb,g_t2
452      integer blo,bhi,urange(2,0:7)
453      integer t2_file
454c
455c        getT2 = uccsdt_ampfile_read_t2(file,spini,spina,spinj,spinb,
456c                symjb,blo,bhi,g_t2,ocreate,distribution=block or column)
457c
458      blo = v_sym(1,0,spinb)
459      bhi = v_sym(2,nir-1,spinb)
460      getT2 = uccsdt_ampfile_read_t2(t2_file,spini,spina,spinj,spinb,
461     &        symjb,blo,bhi,g_temp,.true.,'block')
462c
463c     allocate g_t2 with dimension for urange
464c     do dgemm with inverse of c_hole to get u back
465c
466      end
467
468      subroutine make_qO(qO_handles,g_z,symif,spini,spinf,urange)
469      implicit none
470#include "mafdecls.fh"
471#include "bas.fh"
472#include "global.fh"
473#include "cuccsdtP.fh"
474c
475      integer qO_handles(0:7),urange(1:2,0:7),g_z,symif,spini
476      integer spinf,symf,symu,symi,len_i,iindx,jindx,u,f
477      double precision fu_val, ifiu_val
478      logical i_match,j_match
479c
480c     WE NEED TO ACCUMULATE THE Q's DURING THE BLOCKING LOOP IN acefterms.F
481c     HENCE ADD CONTRIBUTIONS TO qO_handles
482c     spini=spinj and spinf=spina
483c     do sum_i C_hole(ua) Z2(iu,if) and store in q_2_O(a,f) -> qO_handles(2)
484c
485      call ga_distribution(g_z,ga_nodeid(),ilo,ihi,jlo,jhi)
486      jindx = 0
487      do symf = 0, nir-1
488         symu = symf
489         symi = ieor(symif,symf)
490         len_i = no(symi,spini)
491         len_u = urange(2,symu)-urange(1,symu)+1
492         len_f = v_sym(2,symf,spinf)-v_sym(1,symf,spinf)+1
493         if (.not. ma_push_get(mt_dbl,len_f,'uf block',l_uf,
494     &         k_uf)) call errquit('cterm: uf block alloc failed',0,0)
495         do u = urange(1,symu), urange(2,symu)
496            call ga_get(c_hole(spinf),urange(1,symu),urange(2,symu),
497     &               v_sym(1,symf,spinf),v_sym(2,symf,spinf),
498     &               dbl_mb(k_uf),len_u)
499            iindx = oso_u_off(u,symif,spini)
500            do f = v_sym(1,symf,spinf), v_sym(2,symf,spinf)
501               i_match = ilo.le.(iindx+len_i).and.ihi.gt.(iindx)
502               j_match = jlo.le.(jindx+len_i).and.jhi.gt.(jindx)
503               if (i_match.and.j_match) then
504                  do i = max(il0,iindx), min(ihi,iindx+len_i)
505                     do j = max(jl0,jindx), min(jhi,jindx+len_i)
506                        call ga_get(g_z,i,i,j,j,ifiu_val)
507                        fu_val = fu_val + ifiu_val
508                     end do
509                  end do
510                  call ga_acc(qO_handles(symf),alo,ahi,f,f,
511      &                       dbl_mb(k_uf),len_f,fu_val)
512               endif
513               iindx = iindx + len_i
514            end do
515            jindx = jindx + len_i
516         end do
517      end do
518c
519      return
520      end
521
522      subroutine make_qV(qV_handles,g_z,symif,spini,spinf,urange)
523      implicit none
524#include "mafdecls.fh"
525#include "bas.fh"
526#include "global.fh"
527#include "cuccsdtP.fh"
528c
529      integer qV_handles(0:7),urange(1:2,0:7),g_z,symif,spini
530      integer spinf,symf,symu,symi,len_i,iindx,jindx,u,f
531      double precision fu_val, ifiu_val
532      logical i_match,j_match
533c
534c     WE NEED TO ACCUMULATE THE Q's DURING THE BLOCKING LOOP IN acefterms.F
535c     HENCE ADD CONTRIBUTIONS TO qV_handles
536c     spini=spinj and spinf=spina
537c     do sum_u C_hole(ua) Z2(ma,iu) and store in q_2_V(i,m) -> qV_handles(2)
538c
539      call ga_distribution(g_z,ga_nodeid(),ilo,ihi,jlo,jhi)
540      jindx = 0
541      do symf = 0, nir-1
542         symu = symf
543         symi = ieor(symif,symf)
544         len_i = no(symi,spini)
545         len_u = urange(2,symu)-urange(1,symu)+1
546         len_f = v_sym(2,symf,spinf)-v_sym(1,symf,spinf)+1
547         if (.not. ma_push_get(mt_dbl,len_i*len_i,'im block',l_im,
548     &         k_im)) call errquit('cterm: im block alloc failed',0,0)
549         if (.not. ma_push_get(mt_dbl,len_f,'uf block',l_uf,
550     &         k_uf)) call errquit('cterm: uf block alloc failed',0,0)
551         do u = urange(1,symu), urange(2,symu)
552            call ga_get(c_hole(spinf),urange(1,symu),urange(2,symu),
553     &               v_sym(1,symf,spinf),v_sym(2,symf,spinf),
554     &               dbl_mb(k_uf),len_u)
555            iindx = oso_u_off(u,symif,spini)
556            do f = v_sym(1,symf,spinf), v_sym(2,symf,spinf)
557               i_match = ilo.le.(iindx+len_i).and.ihi.gt.(iindx)
558               j_match = jlo.le.(jindx+len_i).and.jhi.gt.(jindx)
559               if (i_match.and.j_match) then
560                 alo = max(il0,iindx)
561                 ahi = min(ihi,iindx+len_i)
562                 mlo = max(jl0,jindx)
563                 mhi = min(jhi,jindx+len_i)
564                 call ga_get(g_z,alo,ahi,mlo,mhi,dbl_mb(k_im),ahi-alo+1)
565                 call ga_acc(qV_handles(symf),alo,ahi,mlo,mhi,
566      &                   dbl_mb(k_im),ahi-alo+1,dbl_mb(k_uf+fpointer)
567               endif
568               iindx = iindx + len_i
569            end do
570            jindx = jindx + len_i
571         end do
572      end do
573c
574      end
575
576
577      subroutine oso_moindx2_add(g_a,g_b,sym,sa,sb,sc,sd)
578      implicit none
579#include "mafdecls.fh"
580#include "bas.fh"
581#include "global.fh"
582#include "cuccsdtP.fh"
583c
584      integer g_a,g_b,sym,sa,sb,sc,sd
585      integer iu,symf,symn,dimf,dimn,dimv,dimiu
586      integer l_nv,k_nv,l_nf,k_nf,l_vf,k_vf
587      integer nvlo,nvhi,vlo,vhi,flo,fhi,nflo,nfhi
588c
589c     Do the transformation (nv,iu) -> (nf,iu)
590c     Transformation is for C_part
591c     Hence nv -> nf or v -> f
592c     Loop over iu on the nodes and do a complete nv block at a time
593c
594      dimiu = iu_len(symiu,sc)
595      do iu = ga_nodeid()+1, dimiu, ga_nnodes()
596         do symf = 0, nir-1
597            symn = ieor(sym,symf)
598            dimf = nv_sym(symf,sb)
599            dimn = no_sym(symn,sa)
600            dimv = bf_per_ir(symf)
601            if (.not. ma_push_get(mt_dbl,dimn*dimv,'nv block',l_nv,
602     &         k_nv)) call errquit('cterm: nv block alloc failed',0,0)
603            if (.not. ma_push_get(mt_dbl,dimn*dimf,'nf block',l_nf,
604     &         k_nf)) call errquit('cterm: nf block alloc failed',0,0)
605            if (.not. ma_push_get(mt_dbl,dimv*dimf,'vf block',l_vf,
606     &         k_vf)) call errquit('cterm: vf block alloc failed',0,0)
607            nvlo = oso_off(bf_per_ir_cum(symf)+1,sym,1) + 1
608            nvhi = vlo + dimn*dimv
609            vlo = bf_per_ir_cum(symf)+1
610            vhi = vlo + dimv
611            flo = v_sym(1,symf,sb)
612            fhi = v_sym(2,symf,sb)
613            call ga_get(g_b,vlo,vhi,iu,iu,dbl_mb(k_nv),dimn)
614            call ga_get(g_part(sb),vlo,vhi,flo,fhi,dbl_mb(k_vf),dimv)
615            call dgemm('n','n',dimn,dimf,dimv,1.0d0,dbl_mb(k_nv),
616     &                 dimn,dbl_mb(k_vf),dimv,1.0d0,dbl_mb(k_nf),dimn)
617            nflo = ov_off(v_sym(1,symf,1),0:7,sa,sb) + 1
618            nfhi = flo + dimn*dimf
619            call ga_acc(g_a,nflo,nfhi,iu,iu,dbl_mb(k_nf),dimn*dimf,
620     &                  1.0d0)
621            if (.not. ma_pop_stack(l_vf)) call
622     &          errquit('oso_moindx2_add: vf block dealloc failed',0)
623            if (.not. ma_pop_stack(l_nf)) call
624     &          errquit('oso_moindx2_add: nf block dealloc failed',0)
625            if (.not. ma_pop_stack(l_nv)) call
626     &          errquit('oso_moindx2_add: nv block dealloc failed',0)
627         end do
628      end do
629c
630      end
631