1      SUBROUTINE ccsd_t2_8(d_a,k_a_offset,d_b,k_b_offset,d_c,k_c_offset)
2C     $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $
3C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
4C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
5C     i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v
6      IMPLICIT NONE
7#include "global.fh"
8#include "mafdecls.fh"
9#include "sym.fh"
10#include "errquit.fh"
11#include "tce.fh"
12      INTEGER d_a
13      INTEGER k_a_offset
14      INTEGER d_b
15      INTEGER k_b_offset
16      INTEGER d_c
17      INTEGER k_c_offset
18      INTEGER NXTASK
19      INTEGER next
20      INTEGER nprocs
21      INTEGER count
22      INTEGER p3b
23      INTEGER p4b
24      INTEGER h1b
25      INTEGER h2b
26      INTEGER dimc
27      INTEGER l_cs
28      INTEGER k_cs
29      INTEGER p5b
30      INTEGER p6b
31      INTEGER p5b_1
32      INTEGER p6b_1
33      INTEGER h1b_1
34      INTEGER h2b_1
35      INTEGER p3b_2
36      INTEGER p4b_2
37      INTEGER p5b_2
38      INTEGER p6b_2
39      INTEGER dim_common
40      INTEGER dima_sort
41      INTEGER dima
42      INTEGER dimb_sort
43      INTEGER dimb
44      INTEGER l_as
45      INTEGER k_as
46      INTEGER l_a
47      INTEGER k_a
48      INTEGER l_bs
49      INTEGER k_bs
50      INTEGER l_b
51      INTEGER k_b
52      INTEGER nsuperp(2)
53      INTEGER isuperp
54      INTEGER l_c
55      INTEGER k_c
56      integer p5b_in,p6b_in
57      DOUBLE PRECISION FACTORIAL
58      EXTERNAL NXTASK
59      EXTERNAL FACTORIAL
60      nprocs = GA_NNODES()
61      count = 0
62      next = NXTASK(nprocs, 1)
63      DO p3b = noab+1,noab+nvab
64      DO p4b = p3b,noab+nvab
65      DO h1b = 1,noab
66      DO h2b = h1b,noab
67      IF ((.not.restricted).or.(int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1
68     &)+int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN
69      IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq. int_mb(k_spin+h
70     &1b-1)+int_mb(k_spin+h2b-1)) THEN
71      IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1),ieor(int_mb(
72     &k_sym+h1b-1),int_mb(k_sym+h2b-1)))) .eq. ieor(irrep_v,irrep_t)) TH
73     &EN
74      IF (next.eq.count) THEN
75      dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1) * int_mb(k_ra
76     &nge+h1b-1) * int_mb(k_range+h2b-1)
77      IF (.not.MA_PUSH_GET(mt_dbl,dimc,'cs',l_cs,k_cs)) CALL
78     & ERRQUIT('ccsd_t2_8',0,MA_ERR)
79      CALL DFILL(dimc,0.0d0,dbl_mb(k_cs),1)
80#if 0
81      DO p5b = noab+1,noab+nvab
82      DO p6b = p5b,noab+nvab
83#else
84      DO p5b_in =ga_nodeid(),ga_nodeid()+nvab-1
85         p5b=mod(p5b_in,nvab)+noab+1
86         DO p6b_in=ga_nodeid(),ga_nodeid()+nvab+noab-p5b
87            p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b
88#endif
89      IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. int_mb(k_spin+h
90     &1b-1)+int_mb(k_spin+h2b-1)) THEN
91      IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),ieor(int_mb(
92     &k_sym+h1b-1),int_mb(k_sym+h2b-1)))) .eq. irrep_t) THEN
93      CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b,p5b_1,p6b_1,h1b_1,h2b_1)
94      CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b,p3b_2,p4b_2,p5b_2,p6b_2)
95      dim_common = int_mb(k_range+p5b-1) * int_mb(k_range+p6b-1)
96      dima_sort = int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1)
97      dima = dim_common * dima_sort
98      dimb_sort = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1)
99      dimb = dim_common * dimb_sort
100      IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN
101      IF (.not.MA_PUSH_GET(mt_dbl,dima,'as',l_as,k_as)) CALL
102     & ERRQUIT('ccsd_t2_8',1,MA_ERR)
103      IF (.not.MA_PUSH_GET(mt_dbl,dima,'a',l_a,k_a)) CALL ERRQUIT('
104     &ccsd_t2_8',2,MA_ERR)
105      CALL GET_HASH_BLOCK(d_a,dbl_mb(k_a),dima,int_mb(k_a_offset),(h2b_1
106     & - 1 + noab * (h1b_1 - 1 + noab * (p6b_1 - noab - 1 + nvab * (p5b_
107     &1 - noab - 1)))))
108      CALL TCE_SORT_4(dbl_mb(k_a),dbl_mb(k_as),int_mb(k_range+p5b-1)
109     &,int_mb(k_range+p6b-1),int_mb(k_range+h1b-1),int_mb(k_range+h2b-1)
110     &,4,3,2,1,1.0d0)
111      IF (.not.MA_POP_STACK(l_a)) CALL ERRQUIT('ccsd_t2_8',3,MA_ERR)
112      IF (.not.MA_PUSH_GET(mt_dbl,dimb,'bs',l_bs,k_bs)) CALL
113     & ERRQUIT('ccsd_t2_8',4,MA_ERR)
114      IF (.not.MA_PUSH_GET(mt_dbl,dimb,'b',l_b,k_b)) CALL ERRQUIT('
115     &ccsd_t2_8',5,MA_ERR)
116      if(.not.intorb) then
117      CALL GET_HASH_BLOCK(d_b,dbl_mb(k_b),dimb,int_mb(k_b_offset),(p6b_2
118     & - 1 + (noab+nvab) * (p5b_2 - 1 + (noab+nvab) * (p4b_2 - 1 + (noab
119     &+nvab) * (p3b_2 - 1)))))
120      else
121      CALL GET_HASH_BLOCK_I(d_b,dbl_mb(k_b),dimb,int_mb(k_b_offset),
122     &(p6b_2
123     & - 1 + (noab+nvab) * (p5b_2 - 1 + (noab+nvab) * (p4b_2 - 1 + (noab
124     &+nvab) * (p3b_2 - 1)))),p6b_2,p5b_2,p4b_2,p3b_2)
125      end if
126      CALL TCE_SORT_4(dbl_mb(k_b),dbl_mb(k_bs),int_mb(k_range+p3b-1)
127     &,int_mb(k_range+p4b-1),int_mb(k_range+p5b-1),int_mb(k_range+p6b-1)
128     &,2,1,4,3,1.0d0)
129      IF (.not.MA_POP_STACK(l_b)) CALL ERRQUIT('ccsd_t2_8',6,MA_ERR)
130      nsuperp(1) = 1
131      nsuperp(2) = 1
132      isuperp = 1
133      IF (p5b .eq. p6b) THEN
134      nsuperp(isuperp) = nsuperp(isuperp) + 1
135      ELSE
136      isuperp = isuperp + 1
137      END IF
138      CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common,2.0d0/FACTORIAL(
139     &nsuperp(1))/FACTORIAL(nsuperp(2)),dbl_mb(k_as),dim_common,dbl_
140     &mb(k_bs),dim_common,1.0d0,dbl_mb(k_cs),dima_sort)
141      IF (.not.MA_POP_STACK(l_bs)) CALL ERRQUIT('ccsd_t2_8',7,MA_ERR
142     &)
143      IF (.not.MA_POP_STACK(l_as)) CALL ERRQUIT('ccsd_t2_8',8,MA_ERR
144     &)
145      END IF
146      END IF
147      END IF
148      END DO
149      END DO
150      IF (.not.MA_PUSH_GET(mt_dbl,dimc,'c',l_c,k_c)) CALL ERRQUIT('
151     &ccsd_t2_8',9,MA_ERR)
152      CALL TCE_SORT_4(dbl_mb(k_cs),dbl_mb(k_c),int_mb(k_range+p4b-1)
153     &,int_mb(k_range+p3b-1),int_mb(k_range+h2b-1),int_mb(k_range+h1b-1)
154     &,2,1,4,3,1.0d0/2.0d0)
155      CALL ADD_HASH_BLOCK(d_c,dbl_mb(k_c),dimc,int_mb(k_c_offset),(h2b -
156     & 1 + noab * (h1b - 1 + noab * (p4b - noab - 1 + nvab * (p3b - noab
157     & - 1)))))
158      IF (.not.MA_POP_STACK(l_c)) CALL ERRQUIT('ccsd_t2_8',10,MA_ERR)
159      IF (.not.MA_POP_STACK(l_cs)) CALL ERRQUIT('ccsd_t2_8',11,MA_ER
160     &R)
161      next = NXTASK(nprocs, 1)
162      END IF
163      count = count + 1
164      END IF
165      END IF
166      END IF
167      END DO
168      END DO
169      END DO
170      END DO
171      next = NXTASK(-nprocs, 1)
172      call GA_SYNC()
173      RETURN
174      END
175
176      SUBROUTINE ccsd_t2_8_test(d_a,k_a_offset,
177     &                          d_b,k_b_offset,
178     &                          d_c,k_c_offset,
179     &                          maxh,maxp)
180C     $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $
181C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
182C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
183C     i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v
184      IMPLICIT NONE
185#include "global.fh"
186#include "mafdecls.fh"
187#include "sym.fh"
188#include "errquit.fh"
189#include "tce.fh"
190      INTEGER d_a,d_b,d_c
191      INTEGER k_a_offset,k_b_offset,k_c_offset
192      INTEGER maxh,maxp,dimhhpp,dimpppp,dimtemp
193      INTEGER next,nprocs,count
194      INTEGER p5b,p6b,p3b,p4b,h1b,h2b
195      INTEGER p5b_1,p6b_1,h1b_1,h2b_1
196      INTEGER p3b_2,p4b_2,p5b_2,p6b_2
197      INTEGER dima,dimb,dimc,dim_common,dima_sort,dimb_sort
198#ifdef USE_F90_ALLOCATABLE
199      double precision, allocatable :: f_a(:)
200      double precision, allocatable :: f_b(:)
201      double precision, allocatable :: f_c(:)
202      double precision, allocatable :: f_t(:)
203#ifdef USE_FASTMEM
204      !dec$ attributes fastmem :: f_a,f_b,f_c,f_t
205#endif
206      integer :: e_a,e_b,e_c,e_t
207#else
208      integer k_a, l_a
209      integer k_b, l_b
210      integer k_c, l_c
211      integer k_t, l_t
212      integer e_a,e_b,e_c,e_t
213#endif
214      double precision alpha
215      integer p5b_in,p6b_in
216      INTEGER NXTASK
217      EXTERNAL NXTASK
218      nprocs = GA_NNODES()
219      count = 0
220      next = NXTASK(nprocs, 1)
221
222      dimhhpp = maxh*maxh*maxp*maxp
223      dimpppp = maxp*maxp*maxp*maxp
224      dimtemp = max(dimpppp,dimhhpp)
225
226#ifdef USE_F90_ALLOCATABLE
227      allocate(f_a(1:dimhhpp),stat=e_a)
228      allocate(f_b(1:dimpppp),stat=e_b)
229      allocate(f_c(1:dimhhpp),stat=e_c)
230# ifndef USE_LOOPS_NOT_DGEMM
231      allocate(f_t(1:dimtemp),stat=e_t)
232# endif
233#else
234      e_a=0
235      if(.not.MA_PUSH_GET(mt_dbl,dimhhpp,"a",l_a,k_a)) e_a=-1
236      e_b=0
237      if(.not.MA_PUSH_GET(mt_dbl,dimpppp,"b",l_b,k_b)) e_b=-1
238      e_c=0
239      if(.not.MA_PUSH_GET(mt_dbl,dimhhpp,"c",l_c,k_c)) e_c=-1
240# ifndef USE_LOOPS_NOT_DGEMM
241      e_t=0
242      if(.not.MA_PUSH_GET(mt_dbl,dimtemp,"t",l_t,k_t)) e_t=-1
243# else
244      dimtemp=-12345
245      e_t=.false.
246# endif
247#endif
248      if (e_a.ne.0) call errquit("MA a",dimhhpp,MA_ERR)
249      if (e_b.ne.0) call errquit("MA b",dimpppp,MA_ERR)
250      if (e_c.ne.0) call errquit("MA c",dimhhpp,MA_ERR)
251      if (e_t.ne.0) call errquit("MA t",dimtemp,MA_ERR)
252      DO p3b = noab+1,noab+nvab
253       DO p4b = p3b,noab+nvab
254        DO h1b = 1,noab
255         DO h2b = h1b,noab
256          IF ((.not.restricted).or.
257     &        ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1)
258     &         +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN
259           IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq.
260     &         int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
261            IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1),
262     &          ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
263     &                 .eq. ieor(irrep_v,irrep_t)) THEN
264             IF (next.eq.count) THEN
265              dima_sort = int_mb(k_range+h1b-1)
266     &                  * int_mb(k_range+h2b-1)
267              dimb_sort = int_mb(k_range+p3b-1)
268     &                  * int_mb(k_range+p4b-1)
269              dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1)
270     &             * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1)
271#ifdef USE_F90_ALLOCATABLE
272              CALL DFILL(dimc,0.0d0,f_c,1)
273#if 0
274              DO p5b = noab+1,noab+nvab
275               DO p6b = p5b,noab+nvab
276#else
277      DO p5b_in =ga_nodeid(),ga_nodeid()+nvab-1
278         p5b=mod(p5b_in,nvab)+noab+1
279         DO p6b_in=ga_nodeid(),ga_nodeid()+nvab+noab-p5b
280            p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b
281#endif
282                IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq.
283     &              int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
284                 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),
285     &               ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
286     &                      .eq. irrep_t) THEN
287                  CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b,
288     &                                  p5b_1,p6b_1,h1b_1,h2b_1)
289                  CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b,
290     &                                  p3b_2,p4b_2,p5b_2,p6b_2)
291                  dim_common = int_mb(k_range+p5b-1)
292     &                       * int_mb(k_range+p6b-1)
293                  dima = dim_common * dima_sort
294                  dimb = dim_common * dimb_sort
295                  IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN
296#ifdef USE_LOOPS_NOT_DGEMM
297                   CALL GET_HASH_BLOCK(d_a,f_a,dima,
298     &                  int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab*
299     &                  (p6b_1-noab-1+nvab*(p5b_1-noab-1)))))
300#else
301                   CALL GET_HASH_BLOCK(d_a,f_t,dima,
302     &                  int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab*
303     &                  (p6b_1-noab-1+nvab*(p5b_1-noab-1)))))
304                   CALL TCE_SORT_4(f_t,f_a,
305     &                  int_mb(k_range+p5b-1),int_mb(k_range+p6b-1),
306     &                  int_mb(k_range+h1b-1),int_mb(k_range+h2b-1),
307     &                  4,3,2,1,1.0d0)
308#endif
309                   if(.not.intorb) then
310#ifdef USE_LOOPS_NOT_DGEMM
311                    CALL GET_HASH_BLOCK(d_b,f_b,dimb,
312     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
313     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
314     &                   (p3b_2-1)))))
315#else
316                    CALL GET_HASH_BLOCK(d_b,f_t,dimb,
317     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
318     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
319     &                   (p3b_2-1)))))
320#endif
321                   else
322#ifdef USE_LOOPS_NOT_DGEMM
323                    CALL GET_HASH_BLOCK_I(d_b,f_b,dimb,
324     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
325     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
326     &                   (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2)
327#else
328                    CALL GET_HASH_BLOCK_I(d_b,f_t,dimb,
329     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
330     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
331     &                   (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2)
332#endif
333                   end if
334#ifndef USE_LOOPS_NOT_DGEMM
335                   CALL TCE_SORT_4(f_t,f_b,
336     &                  int_mb(k_range+p3b-1),int_mb(k_range+p4b-1),
337     &                  int_mb(k_range+p5b-1),int_mb(k_range+p6b-1),
338     &                  2,1,4,3,1.0d0)
339#endif
340                   if (p5b .eq. p6b) then
341                    alpha = 1.0d0
342                   else
343                    alpha = 2.0d0
344                   end if
345#ifdef USE_LOOPS_NOT_DGEMM
346                   call t2_p8(int_mb(k_range+h1b-1),
347     &                        int_mb(k_range+h2b-1),
348     &                        int_mb(k_range+p3b-1),
349     &                        int_mb(k_range+p4b-1),
350     &                        int_mb(k_range+p5b-1),
351     &                        int_mb(k_range+p6b-1),
352     &                        f_a,f_b,f_c,
353     &                        0.5d0*alpha)
354#else
355                   CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common,
356     &                  alpha,f_a,dim_common,f_b,
357     &                  dim_common,1.0d0,f_c,dima_sort)
358#endif
359                  END IF
360                 END IF
361                END IF
362               END DO
363              END DO
364#ifdef USE_LOOPS_NOT_DGEMM
365              CALL ADD_HASH_BLOCK(d_c,f_c,dimc,
366     &             int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
367     &             (p4b-noab-1+nvab*(p3b-noab-1)))))
368#else
369              CALL TCE_SORT_4(f_c,f_t,
370     &             int_mb(k_range+p4b-1),int_mb(k_range+p3b-1),
371     &             int_mb(k_range+h2b-1),int_mb(k_range+h1b-1),
372     &             2,1,4,3,0.5d0)
373              CALL ADD_HASH_BLOCK(d_c,f_t,dimc,
374     &             int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
375     &             (p4b-noab-1+nvab*(p3b-noab-1)))))
376#endif
377#else
378celse// USE_F90_ALLOCATABLE
379              CALL DFILL(dimc,0.0d0,dbl_mb(k_c),1)
380              DO p5b = noab+1,noab+nvab
381               DO p6b = p5b,noab+nvab
382                IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq.
383     &              int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
384                 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),
385     &               ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
386     &                      .eq. irrep_t) THEN
387                  CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b,
388     &                                  p5b_1,p6b_1,h1b_1,h2b_1)
389                  CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b,
390     &                                  p3b_2,p4b_2,p5b_2,p6b_2)
391                  dim_common = int_mb(k_range+p5b-1)
392     &                       * int_mb(k_range+p6b-1)
393                  dima = dim_common * dima_sort
394                  dimb = dim_common * dimb_sort
395                  IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN
396#ifdef USE_LOOPS_NOT_DGEMM
397                   CALL GET_HASH_BLOCK(d_a,dbl_mb(k_a),dima,
398     &                  int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab*
399     &                  (p6b_1-noab-1+nvab*(p5b_1-noab-1)))))
400#else
401                   CALL GET_HASH_BLOCK(d_a,dbl_mb(k_t),dima,
402     &                  int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab*
403     &                  (p6b_1-noab-1+nvab*(p5b_1-noab-1)))))
404                   CALL TCE_SORT_4(dbl_mb(k_t),dbl_mb(k_a),
405     &                  int_mb(k_range+p5b-1),int_mb(k_range+p6b-1),
406     &                  int_mb(k_range+h1b-1),int_mb(k_range+h2b-1),
407     &                  4,3,2,1,1.0d0)
408#endif
409                   if(.not.intorb) then
410#ifdef USE_LOOPS_NOT_DGEMM
411                    CALL GET_HASH_BLOCK(d_b,dbl_mb(k_b),dimb,
412     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
413     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
414     &                   (p3b_2-1)))))
415#else
416                    CALL GET_HASH_BLOCK(d_b,dbl_mb(k_t),dimb,
417     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
418     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
419     &                   (p3b_2-1)))))
420#endif
421                   else
422#ifdef USE_LOOPS_NOT_DGEMM
423                    CALL GET_HASH_BLOCK_I(d_b,dbl_mb(k_b),dimb,
424     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
425     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
426     &                   (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2)
427#else
428                    CALL GET_HASH_BLOCK_I(d_b,dbl_mb(k_t),dimb,
429     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
430     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
431     &                   (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2)
432#endif
433                   end if
434#ifndef USE_LOOPS_NOT_DGEMM
435                   CALL TCE_SORT_4(dbl_mb(k_t),dbl_mb(k_b),
436     &                  int_mb(k_range+p3b-1),int_mb(k_range+p4b-1),
437     &                  int_mb(k_range+p5b-1),int_mb(k_range+p6b-1),
438     &                  2,1,4,3,1.0d0)
439#endif
440                   if (p5b .eq. p6b) then
441                    alpha = 1.0d0
442                   else
443                    alpha = 2.0d0
444                   end if
445#ifdef USE_LOOPS_NOT_DGEMM
446                   call t2_p8(int_mb(k_range+h1b-1),
447     &                        int_mb(k_range+h2b-1),
448     &                        int_mb(k_range+p3b-1),
449     &                        int_mb(k_range+p4b-1),
450     &                        int_mb(k_range+p5b-1),
451     &                        int_mb(k_range+p6b-1),
452     &                        dbl_mb(k_a),dbl_mb(k_b),dbl_mb(k_c),
453     &                        0.5d0*alpha)
454#else
455                   CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common,
456     &                  alpha,dbl_mb(k_a),dim_common,dbl_mb(k_b),
457     &                  dim_common,1.0d0,dbl_mb(k_c),dima_sort)
458#endif
459                  END IF
460                 END IF
461                END IF
462               END DO
463              END DO
464#ifdef USE_LOOPS_NOT_DGEMM
465              CALL ADD_HASH_BLOCK(d_c,dbl_mb(k_c),dimc,
466     &             int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
467     &             (p4b-noab-1+nvab*(p3b-noab-1)))))
468#else
469              CALL TCE_SORT_4(dbl_mb(k_c),dbl_mb(k_t),
470     &             int_mb(k_range+p4b-1),int_mb(k_range+p3b-1),
471     &             int_mb(k_range+h2b-1),int_mb(k_range+h1b-1),
472     &             2,1,4,3,0.5d0)
473              CALL ADD_HASH_BLOCK(d_c,dbl_mb(k_t),dimc,
474     &             int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
475     &             (p4b-noab-1+nvab*(p3b-noab-1)))))
476#endif
477#endif
478cendif// USE_F90_ALLOCATABLE
479              next = NXTASK(nprocs, 1)
480             END IF
481             count = count + 1
482            END IF
483           END IF
484          END IF
485         END DO
486        END DO
487       END DO
488      END DO
489      next = NXTASK(-nprocs, 1)
490      call GA_SYNC()
491
492#ifdef USE_F90_ALLOCATABLE
493      deallocate(f_a,stat=e_a)
494      deallocate(f_b,stat=e_b)
495      deallocate(f_c,stat=e_c)
496# ifndef USE_LOOPS_NOT_DGEMM
497      deallocate(f_t,stat=e_t)
498# endif
499#else
500# ifndef USE_LOOPS_NOT_DGEMM
501      e_t=0
502      if(.not.MA_POP_STACK(l_t)) e_t=-1
503# else
504      l_t=-12345
505      e_t=0
506# endif
507      e_a=0
508      if(.not.MA_CHOP_STACK(l_a)) e_a=-1
509#endif
510      if (e_a.ne.0) call errquit("MA pops a",0,MA_ERR)
511      if (e_t.ne.0) call errquit("MA pops t",1,MA_ERR)
512      RETURN
513      END
514
515
516
517      SUBROUTINE ccsd_t2_8_spiral(d_a,k_a_offset,d_b,k_b_offset,
518     1                            d_c,k_c_offset)
519C     $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $
520C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
521C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
522C     i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v
523      IMPLICIT NONE
524#include "global.fh"
525#include "mafdecls.fh"
526#include "sym.fh"
527#include "errquit.fh"
528#include "tce.fh"
529#include "tce_main.fh"
530      integer d_a, d_b, d_c
531      integer k_a_offset, k_b_offset, k_c_offset
532      integer NXTASK, next, nprocs, count
533      integer p3b, p4b, h1b, h2b, p5b, p6b
534      integer p5b_1, p6b_1, h1b_1, h2b_1, p3b_2, p4b_2, p5b_2, p6b_2
535      integer dim1,dim2,dim3,dim4,dim5,dim6
536      integer dim12,dim34,dim56
537      integer dima,dimb,dimc
538      integer spn1,spn2,spn3,spn4,spn5,spn6
539      integer spn12,spn34,spn56
540      integer sym1,sym2,sym3,sym4,sym5,sym6
541      integer sym12,sym34,sym56
542      integer k_as, l_as, k_a, l_a
543      integer k_bs, l_bs, k_b, l_b
544      integer k_cs, l_cs, k_c, l_c
545      integer nbh
546      double precision alpha
547      external NXTASK
548c
549c      print*,'entering ccsd_t2_8_spiral (energy)'
550c
551      nprocs = ga_nnodes()
552      count = 0
553      next = nxtask(nprocs, 1)
554c
555      if (.not.ma_push_get(mt_dbl,tile_dim**4,'c',l_c,k_c))
556     1     call errquit('ccsd_t2_8',9,MA_ERR)
557c
558      do p3b = noab+1,noab+nvab
559       dim3=int_mb(k_range+p3b-1)
560       spn3=int_mb(k_spin +p3b-1)
561       sym3=int_mb(k_sym  +p3b-1)
562       do p4b = p3b,noab+nvab
563        dim4=int_mb(k_range+p4b-1)
564        spn4=int_mb(k_spin +p4b-1)
565        sym4=int_mb(k_sym  +p4b-1)
566c
567        dim34 = dim3 * dim4
568        spn34 = spn3 + spn4
569        sym34 = ieor(sym3,sym4)
570c
571        do p5b = noab+1,noab+nvab
572         dim5=int_mb(k_range+p5b-1)
573         spn5=int_mb(k_spin +p5b-1)
574         sym5=int_mb(k_sym  +p5b-1)
575         do p6b = p5b,noab+nvab
576          dim6=int_mb(k_range+p6b-1)
577          spn6=int_mb(k_spin +p6b-1)
578          sym6=int_mb(k_sym  +p6b-1)
579c
580          dim56 = dim5 * dim6
581          spn56 = spn5 + spn6
582          sym56 = ieor(sym5,sym6)
583c
584          dimb  = dim34 * dim56
585c
586          if ( (dimb.gt.0) .and. (ieor(sym34,sym56).eq.0)
587     1         .and. (spn34.eq.spn56) ) then
588c
589          if (next.eq.count) then
590c
591           call tce_restricted_4(p3b,p4b,p5b,p6b,
592     1                           p3b_2,p4b_2,p5b_2,p6b_2)
593c
594           if (.not.ma_push_get(mt_dbl,dimb,'bs',l_bs,k_bs))
595     1              call errquit('ccsd_t2_8',4,MA_ERR)
596           if (.not.ma_push_get(mt_dbl,dimb,'b',l_b,k_b))
597     1              call errquit('ccsd_t2_8',5,MA_ERR)
598c
599           if(.not.intorb) then
600            call get_hash_block(d_b,dbl_mb(k_b),dimb,
601     1           int_mb(k_b_offset),
602     2           (p6b_2 - 1 + (noab+nvab) * (p5b_2 - 1 +
603     3           (noab+nvab) * (p4b_2 - 1 + (noab+nvab) *
604     4           (p3b_2 - 1)))))
605           else
606            call get_hash_block_i(d_b,dbl_mb(k_b),dimb,
607     1           int_mb(k_b_offset),
608     2           (p6b_2 - 1 + (noab+nvab) * (p5b_2 - 1 +
609     3           (noab+nvab) * (p4b_2 - 1 + (noab+nvab) *
610     4           (p3b_2 - 1)))),p6b_2,p5b_2,p4b_2,p3b_2)
611           end if
612c
613           call tce_sort_4(dbl_mb(k_b),dbl_mb(k_bs),
614     1          dim3,dim4,dim5,dim6,2,1,4,3,1.0d0)
615c
616           if (.not.ma_pop_stack(l_b))
617     1              call errquit('ccsd_t2_8',6,MA_ERR)
618c
619           do h1b = 1,noab
620            dim1=int_mb(k_range+h1b-1)
621            spn1=int_mb(k_spin +h1b-1)
622            sym1=int_mb(k_sym  +h1b-1)
623            do h2b = h1b,noab
624             dim2=int_mb(k_range+h2b-1)
625             spn2=int_mb(k_spin +h2b-1)
626             sym2=int_mb(k_sym  +h2b-1)
627c
628             dim12 = dim1 * dim2
629             spn12 = spn1 + spn2
630             sym12 = ieor(sym1,sym2)
631c
632             dima  = dim12 * dim56
633c
634             if (dima.gt.0) then
635c
636              call tce_restricted_4(p5b,p6b,h1b,h2b,
637     1                              p5b_1,p6b_1,h1b_1,h2b_1)
638c
639              if (spn34.eq.spn12) then
640               if (spn56.eq.spn12) then
641                if ((.not.restricted).or.((spn34+spn12).ne.8)) then
642                 if (ieor(sym34,sym12).eq.0) then
643                  if (ieor(sym56,sym12).eq.0) then
644c
645                   dimc  = dim12 * dim34
646c
647                   if (.not.ma_push_get(mt_dbl,dimc,'cs',l_cs,k_cs))
648     1                 call errquit('ccsd_t2_8',0,MA_ERR)
649c
650                   call dfill(dimc,0.0d0,dbl_mb(k_cs),1)
651c
652                   if (.not.ma_push_get(mt_dbl,dima,'as',l_as,k_as))
653     1                 call errquit('ccsd_t2_8',1,MA_ERR)
654                   if (.not.ma_push_get(mt_dbl,dima,'a',l_a,k_a))
655     1                 call errquit('ccsd_t2_8',2,MA_ERR)
656c
657                   call get_hash_block(d_a,dbl_mb(k_a),dima,
658     1                  int_mb(k_a_offset),
659     2                  (h2b_1 - 1 + noab * (h1b_1 - 1 + noab *
660     3                  (p6b_1 - noab - 1 + nvab *
661     4                  (p5b_1 - noab - 1)))))
662c
663                   call tce_sort_4(dbl_mb(k_a),dbl_mb(k_as),
664     1                  dim5,dim6,dim1,dim2,4,3,2,1,1.0d0)
665c
666                   if (.not.ma_pop_stack(l_a))
667     1                 call errquit('ccsd_t2_8',3,MA_ERR)
668c
669                   if (p5b .eq. p6b) then
670                    alpha = 1.0d0
671                   else
672                    alpha = 2.0d0
673                   end if
674                   call dgemm('T','N',dim12,dim34,dim56,alpha,
675     2                  dbl_mb(k_as),dim56,dbl_mb(k_bs),dim56,
676     3                  1.0d0,dbl_mb(k_cs),dim12)
677c
678                   if (.not.ma_pop_stack(l_as))
679     1                 call errquit('ccsd_t2_8',8,MA_ERR)
680c
681                   call ga_nbwait(nbh) ! wait until previous put of c is gone before overwriting buffer
682c
683                   call tce_sort_4(dbl_mb(k_cs),dbl_mb(k_c),
684     1                  dim4,dim3,dim2,dim1,2,1,4,3,0.5d0)
685c
686                   call add_hash_block_nb(d_c,dbl_mb(k_c),dimc,
687     1                  int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
688     2                  (p4b-noab-1+nvab*(p3b-noab-1)))),nbh)
689c
690                   if (.not.ma_pop_stack(l_cs))
691     1                 call errquit('ccsd_t2_8',11,MA_ERR)
692c
693                  end if
694                 end if
695                end if
696               end if
697              end if
698c
699             endif ! dima>0
700c
701            end do
702           end do
703c
704           if (.not.ma_pop_stack(l_bs))
705     1              call errquit('ccsd_t2_8',7,MA_ERR)
706c
707            next = NXTASK(nprocs, 1)
708           end if ! next=count
709           count = count + 1
710c
711          endif ! dimb>0
712c
713         end do
714        end do
715       end do
716      end do
717c
718      if (.not.ma_pop_stack(l_c))
719     1    call errquit('ccsd_t2_8',10,MA_ERR)
720c
721      next = NXTASK(-nprocs, 1)
722      call ga_sync()
723      RETURN
724      END
725
726      SUBROUTINE ccsd_t2_8_task_dgemm(d_a,k_a_offset,
727     &                                d_b,k_b_offset,
728     &                                d_c,k_c_offset,
729     &                                maxh,maxp)
730C     $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $
731C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
732C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
733C     i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v
734      IMPLICIT NONE
735#include "global.fh"
736#include "mafdecls.fh"
737#include "sym.fh"
738#include "errquit.fh"
739#include "tce.fh"
740      integer :: d_a,d_b,d_c
741      integer :: k_a_offset,k_b_offset,k_c_offset
742      integer :: maxh,maxp,dimhhpp,dimpppp,dimtemp
743      integer :: next,nprocs,count
744      integer :: p5b,p6b,p3b,p4b,h1b,h2b
745      integer :: p5b_1,p6b_1,h1b_1,h2b_1
746      integer :: p3b_2,p4b_2,p5b_2,p6b_2
747      integer :: dima,dimb,dimc,dim_common,dima_sort,dimb_sort
748      double precision, allocatable :: f_a(:)
749      double precision, allocatable :: f_b(:)
750      double precision, allocatable :: f_c(:)
751      double precision, allocatable :: f_t(:)
752#ifdef USE_FASTMEM
753      !dec$ attributes fastmem :: f_a,f_b,f_c,f_t
754#endif
755      integer :: e_a,e_b,e_c,e_t
756      double precision alpha
757      integer p5b_in,p6b_in,me
758      integer NXTASK
759      external NXTASK
760      nprocs = GA_NNODES()
761      count = 0
762      next = NXTASK(nprocs, 1)
763
764      me = ga_nodeid()
765
766      dimhhpp = maxh*maxh*maxp*maxp
767      dimpppp = maxp*maxp*maxp*maxp
768      dimtemp = max(dimpppp,dimhhpp)
769
770      allocate(f_a(1:dimhhpp),stat=e_a)
771      allocate(f_b(1:dimpppp),stat=e_b)
772      allocate(f_c(1:dimhhpp),stat=e_c)
773      allocate(f_t(1:dimtemp),stat=e_t)
774      if (e_a.ne.0) call errquit("alloc a",dimhhpp,MA_ERR)
775      if (e_b.ne.0) call errquit("alloc b",dimpppp,MA_ERR)
776      if (e_c.ne.0) call errquit("alloc c",dimhhpp,MA_ERR)
777      if (e_t.ne.0) call errquit("alloc t",dimtemp,MA_ERR)
778      DO p3b = noab+1,noab+nvab
779       DO p4b = p3b,noab+nvab
780        DO h1b = 1,noab
781         DO h2b = h1b,noab
782          IF ((.not.restricted).or.
783     &        ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1)
784     &         +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN
785           IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq.
786     &         int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
787            IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1),
788     &          ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
789     &                 .eq. ieor(irrep_v,irrep_t)) THEN
790             IF (next.eq.count) THEN
791              dima_sort = int_mb(k_range+h1b-1)
792     &                  * int_mb(k_range+h2b-1)
793              dimb_sort = int_mb(k_range+p3b-1)
794     &                  * int_mb(k_range+p4b-1)
795              dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1)
796     &             * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1)
797              CALL DFILL(dimc,0.0d0,f_c,1)
798              DO p5b_in =me,me+nvab-1
799                 p5b=mod(p5b_in,nvab)+noab+1
800               DO p6b_in=me,me+nvab+noab-p5b
801                  p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b
802                IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq.
803     &              int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
804                 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),
805     &               ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
806     &                      .eq. irrep_t) THEN
807                  CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b,
808     &                                  p5b_1,p6b_1,h1b_1,h2b_1)
809                  CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b,
810     &                                  p3b_2,p4b_2,p5b_2,p6b_2)
811                  dim_common = int_mb(k_range+p5b-1)
812     &                       * int_mb(k_range+p6b-1)
813                  dima = dim_common * dima_sort
814                  dimb = dim_common * dimb_sort
815                  IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN
816                   CALL GET_HASH_BLOCK(d_a,f_t,dima,
817     &                  int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab*
818     &                  (p6b_1-noab-1+nvab*(p5b_1-noab-1)))))
819                   CALL TCE_SORT_4(f_t,f_a,
820     &                  int_mb(k_range+p5b-1),int_mb(k_range+p6b-1),
821     &                  int_mb(k_range+h1b-1),int_mb(k_range+h2b-1),
822     &                  4,3,2,1,1.0d0)
823                   if(.not.intorb) then
824                    CALL GET_HASH_BLOCK(d_b,f_t,dimb,
825     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
826     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
827     &                   (p3b_2-1)))))
828                   else
829                    CALL GET_HASH_BLOCK_I(d_b,f_t,dimb,
830     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
831     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
832     &                   (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2)
833                   end if
834                   CALL TCE_SORT_4(f_t,f_b,
835     &                  int_mb(k_range+p3b-1),int_mb(k_range+p4b-1),
836     &                  int_mb(k_range+p5b-1),int_mb(k_range+p6b-1),
837     &                  2,1,4,3,1.0d0)
838                   if (p5b .eq. p6b) then
839                    alpha = 1.0d0
840                   else
841                    alpha = 2.0d0
842                   end if
843                   CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common,
844     &                  alpha,f_a,dim_common,f_b,
845     &                  dim_common,1.0d0,f_c,dima_sort)
846                  END IF
847                 END IF
848                END IF
849               END DO
850              END DO
851              CALL TCE_SORT_4(f_c,f_t,
852     &             int_mb(k_range+p4b-1),int_mb(k_range+p3b-1),
853     &             int_mb(k_range+h2b-1),int_mb(k_range+h1b-1),
854     &             2,1,4,3,0.5d0)
855              CALL ADD_HASH_BLOCK(d_c,f_t,dimc,
856     &             int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
857     &             (p4b-noab-1+nvab*(p3b-noab-1)))))
858              next = NXTASK(nprocs, 1)
859             END IF
860             count = count + 1
861            END IF
862           END IF
863          END IF
864         END DO
865        END DO
866       END DO
867      END DO
868      next = NXTASK(-nprocs, 1)
869      call GA_SYNC()
870      deallocate(f_a,stat=e_a)
871      deallocate(f_b,stat=e_b)
872      deallocate(f_c,stat=e_c)
873      deallocate(f_t,stat=e_t)
874      if (e_a.ne.0) call errquit("free a",0,MA_ERR)
875      if (e_b.ne.0) call errquit("free b",1,MA_ERR)
876      if (e_c.ne.0) call errquit("free c",2,MA_ERR)
877      if (e_t.ne.0) call errquit("free t",3,MA_ERR)
878      RETURN
879      END
880
881
882      SUBROUTINE ccsd_t2_8_task_loops(d_a,k_a_offset,
883     &                                d_b,k_b_offset,
884     &                                d_c,k_c_offset,
885     &                                maxh,maxp)
886C     $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $
887C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
888C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
889C     i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v
890      IMPLICIT NONE
891#include "global.fh"
892#include "mafdecls.fh"
893#include "sym.fh"
894#include "errquit.fh"
895#include "tce.fh"
896      integer :: d_a,d_b,d_c
897      integer :: k_a_offset,k_b_offset,k_c_offset
898      integer :: maxh,maxp,dimhhpp,dimpppp,dimtemp
899      integer :: next,nprocs,count
900      integer :: p5b,p6b,p3b,p4b,h1b,h2b
901      integer :: p5b_1,p6b_1,h1b_1,h2b_1
902      integer :: p3b_2,p4b_2,p5b_2,p6b_2
903      integer :: dima,dimb,dimc,dim_common,dima_sort,dimb_sort
904      double precision, allocatable :: f_a(:)
905      double precision, allocatable :: f_b(:)
906      double precision, allocatable :: f_c(:)
907#ifdef USE_FASTMEM
908      !dec$ attributes fastmem :: f_a,f_b,f_c
909#endif
910      integer :: e_a,e_b,e_c
911      double precision alpha
912      integer p5b_in,p6b_in,me
913      integer NXTASK
914      external NXTASK
915      nprocs = GA_NNODES()
916      count = 0
917      next = NXTASK(nprocs, 1)
918
919      me = ga_nodeid()
920
921      dimhhpp = maxh*maxh*maxp*maxp
922      dimpppp = maxp*maxp*maxp*maxp
923      dimtemp = max(dimpppp,dimhhpp)
924
925      allocate(f_a(1:dimhhpp),stat=e_a)
926      allocate(f_b(1:dimpppp),stat=e_b)
927      allocate(f_c(1:dimhhpp),stat=e_c)
928      if (e_a.ne.0) call errquit("alloc a",dimhhpp,MA_ERR)
929      if (e_b.ne.0) call errquit("alloc b",dimpppp,MA_ERR)
930      if (e_c.ne.0) call errquit("alloc c",dimhhpp,MA_ERR)
931      DO p3b = noab+1,noab+nvab
932       DO p4b = p3b,noab+nvab
933        DO h1b = 1,noab
934         DO h2b = h1b,noab
935          IF ((.not.restricted).or.
936     &        ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1)
937     &         +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN
938           IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq.
939     &         int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
940            IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1),
941     &          ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
942     &                 .eq. ieor(irrep_v,irrep_t)) THEN
943             IF (next.eq.count) THEN
944              dima_sort = int_mb(k_range+h1b-1)
945     &                  * int_mb(k_range+h2b-1)
946              dimb_sort = int_mb(k_range+p3b-1)
947     &                  * int_mb(k_range+p4b-1)
948              dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1)
949     &             * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1)
950              CALL DFILL(dimc,0.0d0,f_c,1)
951              DO p5b_in =me,me+nvab-1
952                 p5b=mod(p5b_in,nvab)+noab+1
953               DO p6b_in=me,me+nvab+noab-p5b
954                  p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b
955                IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq.
956     &              int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
957                 IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),
958     &               ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
959     &                      .eq. irrep_t) THEN
960                  CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b,
961     &                                  p5b_1,p6b_1,h1b_1,h2b_1)
962                  CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b,
963     &                                  p3b_2,p4b_2,p5b_2,p6b_2)
964                  dim_common = int_mb(k_range+p5b-1)
965     &                       * int_mb(k_range+p6b-1)
966                  dima = dim_common * dima_sort
967                  dimb = dim_common * dimb_sort
968                  IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN
969                   CALL GET_HASH_BLOCK(d_a,f_a,dima,
970     &                  int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab*
971     &                  (p6b_1-noab-1+nvab*(p5b_1-noab-1)))))
972                   if(.not.intorb) then
973                    CALL GET_HASH_BLOCK(d_b,f_b,dimb,
974     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
975     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
976     &                   (p3b_2-1)))))
977                   else
978                    CALL GET_HASH_BLOCK_I(d_b,f_b,dimb,
979     &                   int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
980     &                   (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
981     &                   (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2)
982                   end if
983                   if (p5b .eq. p6b) then
984                    alpha = 1.0d0
985                   else
986                    alpha = 2.0d0
987                   end if
988                   call t2_p8(int_mb(k_range+h1b-1),
989     &                        int_mb(k_range+h2b-1),
990     &                        int_mb(k_range+p3b-1),
991     &                        int_mb(k_range+p4b-1),
992     &                        int_mb(k_range+p5b-1),
993     &                        int_mb(k_range+p6b-1),
994     &                        f_a,f_b,f_c,
995     &                        0.5d0*alpha)
996                  END IF
997                 END IF
998                END IF
999               END DO
1000              END DO
1001              CALL ADD_HASH_BLOCK(d_c,f_c,dimc,
1002     &             int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
1003     &             (p4b-noab-1+nvab*(p3b-noab-1)))))
1004              next = NXTASK(nprocs, 1)
1005             END IF
1006             count = count + 1
1007            END IF
1008           END IF
1009          END IF
1010         END DO
1011        END DO
1012       END DO
1013      END DO
1014      next = NXTASK(-nprocs, 1)
1015      call GA_SYNC()
1016      deallocate(f_a,stat=e_a)
1017      deallocate(f_b,stat=e_b)
1018      deallocate(f_c,stat=e_c)
1019      if (e_a.ne.0) call errquit("free a",0,MA_ERR)
1020      if (e_b.ne.0) call errquit("free b",0,MA_ERR)
1021      if (e_c.ne.0) call errquit("free t",0,MA_ERR)
1022      RETURN
1023      END
1024
1025
1026      integer function ccsd_t2_8_count()
1027      IMPLICIT NONE
1028#include "global.fh"
1029#include "mafdecls.fh"
1030#include "sym.fh"
1031#include "errquit.fh"
1032#include "tce.fh"
1033      integer :: n
1034      integer :: p5b,p6b,p3b,p4b,h1b,h2b
1035      n = 0
1036      DO p3b = noab+1,noab+nvab
1037       DO p4b = p3b,noab+nvab
1038        DO h1b = 1,noab
1039         DO h2b = h1b,noab
1040          IF ((.not.restricted).or.
1041     &        ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1)
1042     &         +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN
1043           IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq.
1044     &         int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
1045            IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1),
1046     &          ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
1047     &                 .eq. ieor(irrep_v,irrep_t)) THEN
1048             !DO p5b = noab+1,noab+nvab
1049             ! DO p6b = p5b,noab+nvab
1050             !  IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq.
1051     &       !      int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
1052             !   IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),
1053     &       !       ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
1054     &       !              .eq. irrep_t) THEN
1055                 n = n+1
1056             !   END IF
1057             !  END IF
1058             ! END DO
1059             !END DO
1060            END IF
1061           END IF
1062          END IF
1063         END DO
1064        END DO
1065       END DO
1066      END DO
1067      ccsd_t2_8_count = n
1068      RETURN
1069      END
1070
1071      subroutine ccsd_t2_8_make_list(num_tasks, task_list)
1072      IMPLICIT NONE
1073#include "global.fh"
1074#include "mafdecls.fh"
1075#include "sym.fh"
1076#include "errquit.fh"
1077#include "tce.fh"
1078      integer, intent(in) :: num_tasks
1079      integer, intent(inout) :: task_list(4,num_tasks)
1080      integer :: p5b,p6b,p3b,p4b,h1b,h2b
1081      integer :: i
1082      i = 0
1083      DO p3b = noab+1,noab+nvab
1084       DO p4b = p3b,noab+nvab
1085        DO h1b = 1,noab
1086         DO h2b = h1b,noab
1087          IF ((.not.restricted).or.
1088     &        ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1)
1089     &         +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN
1090           IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq.
1091     &         int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
1092            IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1),
1093     &          ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
1094     &                 .eq. ieor(irrep_v,irrep_t)) THEN
1095             !DO p5b = noab+1,noab+nvab
1096             ! DO p6b = p5b,noab+nvab
1097             !  IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq.
1098     &       !      int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
1099             !   IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),
1100     &       !       ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
1101     &       !              .eq. irrep_t) THEN
1102                  i = i + 1
1103                  task_list(1,i) = p3b
1104                  task_list(2,i) = p4b
1105                  task_list(3,i) = h1b
1106                  task_list(4,i) = h2b
1107             !     task_list(5,i) = p5b
1108             !     task_list(6,i) = p6b
1109             !   END IF
1110             !  END IF
1111             ! END DO
1112             !END DO
1113            END IF
1114           END IF
1115          END IF
1116         END DO
1117        END DO
1118       END DO
1119      END DO
1120      RETURN
1121      END
1122
1123#if defined(USE_OPENMP) && defined(USE_OPENMP_TASKS)
1124
1125      SUBROUTINE ccsd_t2_8_loops_exec(d_a,k_a_offset,
1126     &                                d_b,k_b_offset,
1127     &                                d_c,k_c_offset,
1128     &                                maxh,maxp,
1129     &                                num_tasks,task_list)
1130      IMPLICIT NONE
1131#include "global.fh"
1132#include "mafdecls.fh"
1133#include "sym.fh"
1134#include "errquit.fh"
1135#include "tce.fh"
1136      integer, intent(in) :: d_a,d_b,d_c
1137      integer, intent(in) :: k_a_offset,k_b_offset,k_c_offset
1138      integer, intent(in) :: maxh,maxp
1139      integer, intent(in) :: num_tasks
1140      integer, intent(in) :: task_list(4,num_tasks)
1141      integer :: dimhhpp,dimpppp
1142      integer :: p5b,p6b,p3b,p4b,h1b,h2b
1143      integer :: p5b_1,p6b_1,h1b_1,h2b_1
1144      integer :: p3b_2,p4b_2,p5b_2,p6b_2
1145      integer :: dima,dimb,dimc,dim_common,dima_sort,dimb_sort
1146      double precision, allocatable :: f_a(:)
1147      double precision, allocatable :: f_b(:)
1148      double precision, allocatable :: f_c(:)
1149#ifdef USE_FASTMEM
1150      !dec$ attributes fastmem :: f_a,f_b,f_c
1151#endif
1152      integer :: e_a,e_b,e_c
1153      double precision :: alpha
1154      integer :: p5b_in,p6b_in
1155      integer :: me,np
1156      integer :: i
1157
1158      me = ga_nodeid()
1159      np = ga_nnodes()
1160
1161      dimhhpp = maxh*maxh*maxp*maxp
1162      dimpppp = maxp*maxp*maxp*maxp
1163!$omp parallel private(f_a,f_b,f_c,e_a,e_b,e_c)
1164      allocate(f_a(1:dimhhpp),stat=e_a)
1165      allocate(f_b(1:dimpppp),stat=e_b)
1166      allocate(f_c(1:dimhhpp),stat=e_c)
1167      if (e_a.ne.0) call errquit("alloc a",dimhhpp,MA_ERR)
1168      if (e_b.ne.0) call errquit("alloc b",dimpppp,MA_ERR)
1169      if (e_c.ne.0) call errquit("alloc c",dimhhpp,MA_ERR)
1170!$omp master
1171      do i = 1, num_tasks
1172        if (mod(i,np).eq.me) then
1173!$omp task private(p3b,p4b,h1b,h2b,p5b_in,p5b,p6b_in,p6b)
1174!$omp&     private(p5b_1,p6b_1,h1b_1,h2b_1,p3b_2,p4b_2,p5b_2,p6b_2)
1175!$omp&     private(dima,dimb,dimc,dim_common,dima_sort,dimb_sort)
1176!$omp&     private(alpha)
1177          p3b = task_list(1,i)
1178          p4b = task_list(2,i)
1179          h1b = task_list(3,i)
1180          h2b = task_list(4,i)
1181          dima_sort = int_mb(k_range+h1b-1)
1182     &              * int_mb(k_range+h2b-1)
1183          dimb_sort = int_mb(k_range+p3b-1)
1184     &              * int_mb(k_range+p4b-1)
1185          dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1)
1186     &         * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1)
1187          CALL DFILL(dimc,0.0d0,f_c,1)
1188          DO p5b_in =me,me+nvab-1
1189             p5b=mod(p5b_in,nvab)+noab+1
1190           DO p6b_in=me,me+nvab+noab-p5b
1191              p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b
1192            IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq.
1193     &          int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
1194             IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),
1195     &           ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
1196     &                  .eq. irrep_t) THEN
1197              CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b,
1198     &                              p5b_1,p6b_1,h1b_1,h2b_1)
1199              CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b,
1200     &                              p3b_2,p4b_2,p5b_2,p6b_2)
1201              dim_common = int_mb(k_range+p5b-1)
1202     &                   * int_mb(k_range+p6b-1)
1203              dima = dim_common * dima_sort
1204              dimb = dim_common * dimb_sort
1205              IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN
1206               CALL GET_HASH_BLOCK_R(d_a,f_a,dima,
1207     &              int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab*
1208     &              (p6b_1-noab-1+nvab*(p5b_1-noab-1)))))
1209               if(.not.intorb) then
1210                CALL GET_HASH_BLOCK_R(d_b,f_b,dimb,
1211     &               int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
1212     &               (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
1213     &               (p3b_2-1)))))
1214               else
1215                CALL GET_HASH_BLOCK_I_R(d_b,f_b,dimb,
1216     &               int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
1217     &               (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
1218     &               (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2)
1219               end if
1220               if (p5b .eq. p6b) then
1221                alpha = 1.0d0
1222               else
1223                alpha = 2.0d0
1224               end if
1225               call t2_p8(int_mb(k_range+h1b-1),
1226     &                    int_mb(k_range+h2b-1),
1227     &                    int_mb(k_range+p3b-1),
1228     &                    int_mb(k_range+p4b-1),
1229     &                    int_mb(k_range+p5b-1),
1230     &                    int_mb(k_range+p6b-1),
1231     &                    f_a,f_b,f_c,
1232     &                    0.5d0*alpha)
1233              END IF
1234             END IF
1235            END IF
1236           END DO
1237          END DO
1238          CALL ADD_HASH_BLOCK_R(d_c,f_c,dimc,
1239     &         int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
1240     &         (p4b-noab-1+nvab*(p3b-noab-1)))))
1241!$omp end task
1242        endif
1243      enddo
1244!$omp taskwait
1245      call GA_SYNC()
1246!$omp end master
1247      deallocate(f_a,stat=e_a)
1248      deallocate(f_b,stat=e_b)
1249      deallocate(f_c,stat=e_c)
1250      if (e_a.ne.0) call errquit("free a",0,MA_ERR)
1251      if (e_b.ne.0) call errquit("free b",1,MA_ERR)
1252      if (e_c.ne.0) call errquit("free c",2,MA_ERR)
1253!$omp end parallel
1254      RETURN
1255      END
1256
1257
1258      SUBROUTINE ccsd_t2_8_loops_driver(d_a,k_a_offset,
1259     &                                  d_b,k_b_offset,
1260     &                                  d_c,k_c_offset,
1261     &                                  maxh,maxp)
1262C     $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $
1263C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
1264C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
1265C     i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v
1266      IMPLICIT NONE
1267#include "global.fh"
1268#include "mafdecls.fh"
1269#include "sym.fh"
1270#include "errquit.fh"
1271#include "tce.fh"
1272      integer :: d_a,d_b,d_c
1273      integer :: k_a_offset,k_b_offset,k_c_offset
1274      integer :: maxh,maxp
1275      integer :: num_tasks
1276      integer, allocatable :: task_list(:,:)
1277      integer :: e_tl
1278      integer :: ccsd_t2_8_count
1279      external :: ccsd_t2_8_count
1280      num_tasks = ccsd_t2_8_count()
1281      allocate(task_list(4,1:num_tasks),stat=e_tl)
1282      if (e_tl.ne.0) call errquit("alloc task_list",num_tasks,MA_ERR)
1283      call ccsd_t2_8_make_list(num_tasks, task_list)
1284      call ccsd_t2_8_loops_exec(d_a,k_a_offset,
1285     &                          d_b,k_b_offset,
1286     &                          d_c,k_c_offset,
1287     &                          maxh,maxp,
1288     &                          num_tasks,task_list)
1289      deallocate(task_list,stat=e_tl)
1290      if (e_tl.ne.0) call errquit("free task_list",num_tasks,MA_ERR)
1291      RETURN
1292      END
1293
1294
1295      SUBROUTINE ccsd_t2_8_dgemm_exec(d_a,k_a_offset,
1296     &                                d_b,k_b_offset,
1297     &                                d_c,k_c_offset,
1298     &                                maxh,maxp,
1299     &                                num_tasks,task_list)
1300      IMPLICIT NONE
1301#include "global.fh"
1302#include "mafdecls.fh"
1303#include "sym.fh"
1304#include "errquit.fh"
1305#include "tce.fh"
1306      integer, intent(in) :: d_a,d_b,d_c
1307      integer, intent(in) :: k_a_offset,k_b_offset,k_c_offset
1308      integer, intent(in) :: maxh,maxp
1309      integer, intent(in) :: num_tasks
1310      integer, intent(in) :: task_list(4,num_tasks)
1311      integer :: dimhhpp,dimpppp,dimtemp
1312      integer :: p5b,p6b,p3b,p4b,h1b,h2b
1313      integer :: p5b_1,p6b_1,h1b_1,h2b_1
1314      integer :: p3b_2,p4b_2,p5b_2,p6b_2
1315      integer :: dima,dimb,dimc,dim_common,dima_sort,dimb_sort
1316      double precision, allocatable :: f_a(:)
1317      double precision, allocatable :: f_b(:)
1318      double precision, allocatable :: f_c(:)
1319      double precision, allocatable :: f_t(:)
1320#ifdef USE_FASTMEM
1321      !dec$ attributes fastmem :: f_a,f_b,f_c,f_t
1322#endif
1323      integer :: e_a,e_b,e_c,e_t
1324      double precision :: alpha
1325      integer :: p5b_in,p6b_in
1326      integer :: me,np
1327      integer :: i
1328
1329      me = ga_nodeid()
1330      np = ga_nnodes()
1331
1332      dimhhpp = maxh*maxh*maxp*maxp
1333      dimpppp = maxp*maxp*maxp*maxp
1334      dimtemp = max(dimpppp,dimhhpp)
1335!$omp parallel private(f_a,f_b,f_c,f_t,e_a,e_b,e_c,e_t)
1336      allocate(f_a(1:dimhhpp),stat=e_a)
1337      allocate(f_b(1:dimpppp),stat=e_b)
1338      allocate(f_c(1:dimhhpp),stat=e_c)
1339      allocate(f_t(1:dimtemp),stat=e_t)
1340      if (e_a.ne.0) call errquit("MA a",dimhhpp,MA_ERR)
1341      if (e_b.ne.0) call errquit("MA b",dimpppp,MA_ERR)
1342      if (e_c.ne.0) call errquit("MA c",dimhhpp,MA_ERR)
1343      if (e_t.ne.0) call errquit("MA t",dimhhpp,MA_ERR)
1344!$omp master
1345      do i = 1, num_tasks
1346        if (mod(i,np).eq.me) then
1347!$omp task private(p3b,p4b,h1b,h2b,p5b_in,p5b,p6b_in,p6b)
1348!$omp&     private(p5b_1,p6b_1,h1b_1,h2b_1,p3b_2,p4b_2,p5b_2,p6b_2)
1349!$omp&     private(dima,dimb,dimc,dim_common,dima_sort,dimb_sort)
1350!$omp&     private(alpha)
1351          p3b = task_list(1,i)
1352          p4b = task_list(2,i)
1353          h1b = task_list(3,i)
1354          h2b = task_list(4,i)
1355          dima_sort = int_mb(k_range+h1b-1)
1356     &              * int_mb(k_range+h2b-1)
1357          dimb_sort = int_mb(k_range+p3b-1)
1358     &              * int_mb(k_range+p4b-1)
1359          dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1)
1360     &         * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1)
1361          CALL DFILL(dimc,0.0d0,f_c,1)
1362          DO p5b_in =me,me+nvab-1
1363             p5b=mod(p5b_in,nvab)+noab+1
1364           DO p6b_in=me,me+nvab+noab-p5b
1365              p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b
1366            IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq.
1367     &          int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
1368             IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),
1369     &           ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
1370     &                  .eq. irrep_t) THEN
1371              CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b,
1372     &                              p5b_1,p6b_1,h1b_1,h2b_1)
1373              CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b,
1374     &                              p3b_2,p4b_2,p5b_2,p6b_2)
1375              dim_common = int_mb(k_range+p5b-1)
1376     &                   * int_mb(k_range+p6b-1)
1377              dima = dim_common * dima_sort
1378              dimb = dim_common * dimb_sort
1379              IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN
1380               CALL GET_HASH_BLOCK_R(d_a,f_t,dima,
1381     &              int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab*
1382     &              (p6b_1-noab-1+nvab*(p5b_1-noab-1)))))
1383               CALL TCE_SORT_4(f_t,f_a,
1384     &              int_mb(k_range+p5b-1),int_mb(k_range+p6b-1),
1385     &              int_mb(k_range+h1b-1),int_mb(k_range+h2b-1),
1386     &              4,3,2,1,1.0d0)
1387               if(.not.intorb) then
1388                CALL GET_HASH_BLOCK_R(d_b,f_t,dimb,
1389     &               int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
1390     &               (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
1391     &               (p3b_2-1)))))
1392               else
1393                CALL GET_HASH_BLOCK_I_R(d_b,f_t,dimb,
1394     &               int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
1395     &               (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
1396     &               (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2)
1397               end if
1398               CALL TCE_SORT_4(f_t,f_b,
1399     &              int_mb(k_range+p3b-1),int_mb(k_range+p4b-1),
1400     &              int_mb(k_range+p5b-1),int_mb(k_range+p6b-1),
1401     &              2,1,4,3,1.0d0)
1402               if (p5b .eq. p6b) then
1403                alpha = 1.0d0
1404               else
1405                alpha = 2.0d0
1406               end if
1407               CALL DGEMM('T','N',dima_sort,dimb_sort,dim_common,
1408     &              alpha,f_a,dim_common,f_b,
1409     &              dim_common,1.0d0,f_c,dima_sort)
1410              END IF
1411             END IF
1412            END IF
1413           END DO
1414          END DO
1415          CALL TCE_SORT_4(f_c,f_t,
1416     &         int_mb(k_range+p4b-1),int_mb(k_range+p3b-1),
1417     &         int_mb(k_range+h2b-1),int_mb(k_range+h1b-1),
1418     &         2,1,4,3,0.5d0)
1419          CALL ADD_HASH_BLOCK_R(d_c,f_t,dimc,
1420     &         int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
1421     &         (p4b-noab-1+nvab*(p3b-noab-1)))))
1422!$omp end task
1423        endif
1424      enddo
1425!$omp taskwait
1426      call GA_SYNC()
1427!$omp end master
1428      deallocate(f_a,stat=e_a)
1429      deallocate(f_b,stat=e_b)
1430      deallocate(f_c,stat=e_c)
1431      if (e_a.ne.0) call errquit("MA pops a",0,MA_ERR)
1432      if (e_b.ne.0) call errquit("MA pops b",1,MA_ERR)
1433      if (e_c.ne.0) call errquit("MA pops c",2,MA_ERR)
1434!$omp end parallel
1435      RETURN
1436      END
1437
1438
1439      SUBROUTINE ccsd_t2_8_dgemm_driver(d_a,k_a_offset,
1440     &                                  d_b,k_b_offset,
1441     &                                  d_c,k_c_offset,
1442     &                                  maxh,maxp)
1443C     $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $
1444C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
1445C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
1446C     i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v
1447      IMPLICIT NONE
1448#include "global.fh"
1449#include "mafdecls.fh"
1450#include "sym.fh"
1451#include "errquit.fh"
1452#include "tce.fh"
1453      integer :: d_a,d_b,d_c
1454      integer :: k_a_offset,k_b_offset,k_c_offset
1455      integer :: maxh,maxp
1456      integer :: num_tasks
1457      integer, allocatable :: task_list(:,:)
1458      integer :: e_tl
1459      integer :: ccsd_t2_8_count
1460      external :: ccsd_t2_8_count
1461      num_tasks = ccsd_t2_8_count()
1462      allocate(task_list(4,1:num_tasks),stat=e_tl)
1463      if (e_tl.ne.0) call errquit("alloc task_list",num_tasks,MA_ERR)
1464      call ccsd_t2_8_make_list(num_tasks, task_list)
1465      call ccsd_t2_8_dgemm_exec(d_a,k_a_offset,
1466     &                          d_b,k_b_offset,
1467     &                          d_c,k_c_offset,
1468     &                          maxh,maxp,
1469     &                          num_tasks,task_list)
1470      deallocate(task_list,stat=e_tl)
1471      if (e_tl.ne.0) call errquit("free task_list",num_tasks,MA_ERR)
1472      RETURN
1473      END
1474
1475#endif
1476