1      SUBROUTINE tce_mo2e_zones_4a_disk_ga_act(rtdb,d_v2,
2     1                                kax_v2_alpha_offset,
3     1                                size_2e)
4C     $Id$
5C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
6C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
7C     t ( p1 p2 h3 h4 )_t
8      IMPLICIT NONE
9#include "rtdb.fh"
10#include "global.fh"
11#include "mafdecls.fh"
12#include "stdio.fh"
13#include "util.fh"
14#include "bas.fh"
15#include "schwarz.fh"
16#include "sym.fh"
17#include "sf.fh"
18#include "errquit.fh"
19#include "tce.fh"
20#include "tce_main.fh"
21c
22c written by K. Kowalski
23c
24c
25c     max. number of p2 groups =200
26c
27c
28      integer rtdb                 ! Run-time database
29      integer d_v2                 ! MO integrals
30      integer kax_v2_alpha_offset  ! MO integrals offset
31      integer size_2e              ! 2e file size
32c
33      INTEGER size_2g2a,l_2g2a,k_2g2a
34      INTEGER azone1,azone2,azone3,azone4
35      INTEGER g1b,g2b,g3b,g4b
36      INTEGER igi1,igi2,igi3,igi4
37      INTEGER ii,i,j,k,l,N,ipos1,ipos2,ipos3,ipos4
38      INTEGER del1,del2,p1rel,p2rel
39      INTEGER size_4a,l_4a,k_4a
40c
41      integer mu,nu,rho,sigma
42      integer mu_lo,mu_hi
43      integer nu_lo,nu_hi
44      integer rho_lo,rho_hi
45      integer sigma_lo,sigma_hi
46      integer mu_range
47      integer nu_range
48      integer rho_range
49      integer sigma_range
50      integer mu1,nu1,rho1,sigma1
51      integer shift_mu,shift_nu
52      integer shift_rho,shift_sigma
53      integer work1,work2          ! Work array sizes
54      integer l_work1,k_work1      ! Work array 1
55      integer l_work2,k_work2      ! Work array 2
56      integer imu1,inu1,irho1,isigma1
57c
58      integer l_movecs_orb,k_movecs_orb
59      integer l_gpair,k_gpair
60      integer len_pair,g12_shift
61c ATTENTION,ACHTUNG,UWAGA 2000 - max # of CPU
62c
63      integer size_2g2z,l_2g2z,k_2g2z
64      integer tot_azone1_sh,tot_azone2_sh
65      integer tot_azone3_sh,tot_azone4_sh
66      integer ixi,jxi,point_pair
67      integer size_stripe,l_p34,k_p34
68      integer addr,xoffset_34
69      integer size_g3g4,xoffset_p34
70      integer size_g4321,k_g4321,l_g4321
71c
72      integer l_integral,l_coeff
73      integer k_integral,k_coeff
74      integer size_ic,size_icc,size_integral,size_coeff,max_na
75      integer l_aux,k_aux,size_aux,iend
76c
77      integer iha,ihb !number of corr. alpha, beta holes
78      integer ipa,ipb !number of corr. alpha, beta particles
79      integer INDEX_PAIR,icol,irow
80c compression
81      integer xoffset_g12(20000),xoffset_size(20000)
82      integer xoffset_size_p1p2(20000)
83      integer key_start(20000),key_end(20000)
84      integer size_temp,size_temp_4g,xoffset_temp_4g,size_p1p2
85      integer max_size_temp,xoffset_temp,iclose,iopen,offset_2g2z
86      integer imaxp12,istart,ibuba,sumx
87      double precision wall,cpu,wall1,cpu1,wall2,cpu2,wall3,cpu3
88c *** debug ***
89       double precision xtot1,xtot2,xtot3
90c *************
91      double precision tot_zone(20000)
92      integer l_g12piece,k_g12piece,size_g12p
93c - exascale ---
94      LOGICAL is_active_4_o
95c --------------
96      integer l_4af_offset,k_4af_offset,d_4af
97      integer sf_chunk,request
98      integer key_4af,offset_4af,size_4af
99      character*255 filename
100c
101      logical parallel
102c
103      INTEGER length
104      INTEGER next
105      INTEGER nprocs
106      INTEGER count
107      integer nxtask
108      external nxtask
109      logical nodezero
110      logical idiskl
111c
112c
113c
114c
115cccx      parallel = (ga_nnodes().gt.1)
116      parallel = .true.
117c
118      if(idisk.eq.0) then
119       idiskl=.false.
120      else
121       idiskl=.true.
122      end if
123c
124c
125cccx      max_size_temp=100000000
126      max_size_temp=imaxsize**4
127c
128c
129c
130      do ii=1,20000
131       tot_zone(ii)=0.0d0
132      enddo
133      if(atpart.gt.20000)
134     &  call errquit('tce_zones: atpart too big',1,MA_ERR)
135      sumx=0
136      do ii=1,atpart
137       tot_zone(ii)=sumx
138       sumx=sumx+nalength(ii)
139      enddo
140c
141      nodezero=(ga_nodeid().eq.0)
142c
143c
144c this module is called only if intorb = .true.
145c N is the number of correlated orbitals
146        N = nmo(1) - nfc(1) - nfv(1)
147        iha = nocc(1)-nfc(1)
148        ihb = nocc(ipol)-nfc(ipol)
149        ipa = nmo(1)-nocc(1)-nfv(1)
150        ipb = nmo(ipol)-nocc(ipol)-nfv(ipol)
151c
152c     Offset for 4a file
153c
154cc      sf_chunk=(imaxsize+10)**4
155      sf_chunk=(imaxsize)**4
156      if(.not.idiskl) then
157c GA is assumed
158       call tce_4a_offset(l_4af_offset,k_4af_offset,size_4af)
159      else
160cccx  call tce_filename('4aint',filename)
161       call tce_4a_offset(l_4af_offset,k_4af_offset,size_4af)
162cccx       call util_file_name('4aintx',.true.,.false.,filename)
163       call util_file_name('4aintx',.false.,.false.,filename)
164      end if
165      if(idiskl) then
166       if(.not.parallel)
167     1      call errquit('sf only for parallel runs',1,DISK_ERR)
168       if(parallel) call ga_sync()
169        if (sf_create(filename,dble(bytes)*dble(size_4af),
170     1    dble(bytes)*dble(size_4af),sf_chunk,d_4af).ne.0)
171     2    call errquit('4-index: sf problem',0,DISK_ERR)
172ccx        if (parallel) then
173ccx          if (sf_close(d_4af).ne.0)
174ccx     1      call errquit('createfile: sf problem',1,DISK_ERR)
175ccx          call ga_sync()
176ccx        end if
177      else
178       call createfile(filename,d_4af,size_4af)
179       call reconcilefile(d_4af,size_4af)
180       call ga_zero(d_4af)
181      end if
182c
183c
184c *** debug ***
185c        write(6,*)'step-1'
186c        call util_flush(6)
187c *************
188c
189c     Pair's structure of the integral file
190      call tce_mo2e_pairs_act(l_gpair,k_gpair,len_pair)
191      len_pair = int_mb(k_gpair)
192c
193c *** debug ***
194c        write(6,*)'step-0'
195c        call util_flush(6)
196c *************
197c
198c alpha orbitals only
199c
200      if (.not.ma_push_get(mt_dbl,nbf*(iha+ipa)
201     1  ,"sorted MO coeffs",
202     2  l_movecs_orb,k_movecs_orb))
203     3  call errquit('tce_mo2e_zone: MA problem 1',0,
204     2    BASIS_ERR)
205      call dfill(nbf*(iha+ipa),0.0d0, dbl_mb(k_movecs_orb), 1)
206      do i=1,iha
207      do isigma1=1,nbf
208       dbl_mb(k_movecs_orb+(i-1)*nbf+isigma1-1)=
209     & dbl_mb(k_movecs_sorted+(i-1)*nbf+isigma1-1)
210      enddo
211      enddo
212      do i=iha+1,iha+ipa
213      do isigma1=1,nbf
214       dbl_mb(k_movecs_orb+(i-1)*nbf+isigma1-1)=
215     & dbl_mb(k_movecs_sorted+(i+ihb-1)*nbf+isigma1-1)
216      enddo
217      enddo
218c
219c
220      call int_mem_2e4c(work1,work2)
221      if (.not.ma_push_get(mt_dbl,work1,'work1',l_work1,k_work1))
222     1  call errquit('tce_ao2e: MA problem work1',0,MA_ERR)
223      if (.not.ma_push_get(mt_dbl,work2,'work2',l_work2,k_work2))
224     1  call errquit('tce_ao2e: MA problem work2',1,MA_ERR)
225c
226c
227c *** debug ***
228c        write(6,*)'step1'
229c        call util_flush(6)
230c *************
231c
232c 4af file formed here
233c
234c
235             cpu1 = - util_cpusec()
236             wall1 = - util_wallsec()
237c
238      nprocs = GA_NNODES()
239      count = 0
240      next = NXTASK(nprocs, 1)
241      DO azone1 = 1,atpart      !nu
242      DO azone2 = azone1,atpart !mu
243      DO azone3 = 1,atpart      !sigma
244      DO azone4 = azone3,atpart !rho
245      IF (next.eq.count) THEN
246c ---------------------------
247        size_4a = nalength(azone1)*nalength(azone2)*
248     1            nalength(azone3)*nalength(azone4)
249        if(.not.ma_push_get(mt_dbl,size_4a,'4a',l_4a,k_4a))
250     1     call errquit('tce_4af_zones1: MA problem',0,MA_ERR)
251        call dfill(size_4a, 0.0d0, dbl_mb(k_4a), 1)
252         shift_mu = 0
253         do mu    = a2length(azone2)+1,a2length(azone2+1)
254            if (.not.bas_cn2bfr(ao_bas_han,mu,mu_lo,mu_hi))
255     1      call errquit('tce_ao2e: basis fn range problem 1',0,
256     2      BASIS_ERR)
257            mu_range = mu_hi - mu_lo + 1
258         shift_nu = 0
259         do nu    = a2length(azone1)+1,a2length(azone1+1)
260            if (.not.bas_cn2bfr(ao_bas_han,nu,nu_lo,nu_hi))
261     1      call errquit('tce_ao2e: basis fn range problem 1',0,
262     2      BASIS_ERR)
263            nu_range = nu_hi - nu_lo + 1
264         shift_rho = 0
265         do rho   = a2length(azone4)+1,a2length(azone4+1)
266            if (.not.bas_cn2bfr(ao_bas_han,rho,rho_lo,rho_hi))
267     1      call errquit('tce_ao2e: basis fn range problem 1',0,
268     2      BASIS_ERR)
269            rho_range = rho_hi - rho_lo + 1
270         shift_sigma = 0
271         do sigma = a2length(azone3)+1,a2length(azone3+1)
272            if (.not.bas_cn2bfr(ao_bas_han,sigma,sigma_lo,sigma_hi))
273     1      call errquit('tce_ao2e: basis fn range problem 1',0,
274     2      BASIS_ERR)
275            sigma_range = sigma_hi - sigma_lo + 1
276            if (schwarz_shell(rho,sigma)*schwarz_shell(mu,nu)
277     1          .ge. tol2e) then
278cccx            call dfill(work1, 0.0d0, dbl_mb(k_work1), 1)
279cccx            call dfill(work2, 0.0d0, dbl_mb(k_work2), 1)
280            call int_2e4c(ao_bas_han,mu,nu,ao_bas_han,rho,sigma,
281     1           work2,dbl_mb(k_work2),work1,dbl_mb(k_work1))
282c
283            i=0
284             do mu1     = 1,mu_range
285             do nu1     = 1,nu_range
286             do rho1    = 1,rho_range
287             do sigma1  = 1,sigma_range
288            i=i+1
289            inu1=nu1+shift_nu
290            isigma1=sigma1+shift_sigma
291            imu1=mu1+shift_mu
292            irho1=rho1+shift_rho
293c (isigma1,irho1|inu1, imu1)
294            ipos1=(((imu1-1)*nalength(azone1)+inu1-1)*
295     1            nalength(azone4)+irho1-1)*nalength(azone3)
296     2            +isigma1
297            dbl_mb(k_4a+ipos1-1)=dbl_mb(k_work1+i-1)
298            enddo
299            enddo
300            enddo
301            enddo
302            end if !schwarz  screening
303         shift_sigma = shift_sigma + sigma_range
304         enddo !sigma
305         shift_rho   = shift_rho + rho_range
306         enddo !rho
307         shift_nu    = shift_nu + nu_range
308         enddo !nu
309         shift_mu    = shift_mu + mu_range
310         enddo !mu
311c
312c fixing offsets and sf_writing
313         key_4af=azone4 - 1 + atpart * (azone3 - 1 +
314     &          atpart * (azone2 - 1 + atpart * (azone1 - 1)))
315         call tce_hash(int_mb(k_4af_offset),key_4af,offset_4af)
316      if(idiskl) then
317        if (sf_write(d_4af,dble(bytes)*dble(offset_4af),
318     1    dble(bytes)*dble(size_4a),dbl_mb(k_4a),request).ne.0)
319     2    call errquit('zones put: sf problem2',1,DISK_ERR)
320        if (sf_wait(request).ne.0)
321     1    call errquit('zones put: sf problem3',2,DISK_ERR)
322      else
323        call ga_put(d_4af,offset_4af+1,offset_4af+size_4a,1,1,
324     1    dbl_mb(k_4a),size_4a)
325      end if
326c closing l_4a file
327        if (.not.ma_pop_stack(l_4a))
328     1   call errquit('tcc_mo2e_4af2: l_4a',15,MA_ERR)
329c ---------------------------
330      next = NXTASK(nprocs, 1)
331      END IF
332      count = count + 1
333      ENDDO !azone4
334      ENDDO !azone3
335      ENDDO !azone2
336      ENDDO !azone1
337c
338c
339c
340       call ga_sync()
341c
342c
343c
344         call ga_sync()
345c
346c
347c *** debug ***
348c        write(6,*)'step2'
349c        call util_flush(6)
350c *************
351c
352c
353      next = nxtask(-nprocs,1)
354c
355c
356      if(.not.idiskl) then
357      call reconcilefile(d_4af,size_4af)
358      end if
359c
360c
361c
362      max_na=0
363      do ixi=1,atpart
364       if(nalength(ixi).gt.max_na) max_na=nalength(ixi)
365      enddo
366        size_ic=2*((max_na)**4)+1
367        size_icc=tile_dim*max_na
368c
369c
370c
371       if (.not.ma_push_get(mt_dbl,size_ic,'l_int',
372     1  l_integral,k_integral))
373     1  call errquit('tce_4s: MA problem l_int',0,MA_ERR)
374c
375       if (.not.ma_push_get(mt_dbl,size_icc,'l_coeff',
376     1  l_coeff,k_coeff))
377     1  call errquit('tce_4s: MA problem l_coeff',0,MA_ERR)
378c
379c
380c
381c
382      nprocs = GA_NNODES()
383      count = 0
384      next = NXTASK(nprocs, 1)
385      DO g3b = 1,noa+nva   !k
386      DO g4b = g3b,noa+nva !l
387      DO azone1 = 1,atpart !nu
388      DO azone2 = azone1,atpart !mu
389      IF (next.eq.count) THEN
390c
391             cpu1 = - util_cpusec()
392             wall1 = - util_wallsec()
393c
394      size_2g2a=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)
395     1          *nalength(azone1)*nalength(azone2)
396      if (.not.ma_push_get(mt_dbl,size_2g2a,'2g2a',l_2g2a,k_2g2a))
397     1    call errquit('tce_r2_divide1: MA problem',0,MA_ERR)
398      call dfill(size_2g2a, 0.0d0, dbl_mb(k_2g2a), 1)
399c *** debug ***
400c        write(6,*)'step3'
401c        call util_flush(6)
402c *************
403c
404      DO azone3 = 1,atpart      !sigma
405      DO azone4 = azone3,atpart !rho
406        size_4a = nalength(azone1)*nalength(azone2)*
407     1            nalength(azone3)*nalength(azone4)
408        if(.not.ma_push_get(mt_dbl,size_4a,'4a',l_4a,k_4a))
409     1     call errquit('tce_r2_divide2: MA problem',0,MA_ERR)
410cccx        call dfill(size_4a, 0.0d0, dbl_mb(k_4a), 1)
411c
412c
413c
414c
415         key_4af=azone4 - 1 + atpart * (azone3 - 1 +
416     &          atpart * (azone2 - 1 + atpart * (azone1 - 1)))
417         call tce_hash(int_mb(k_4af_offset),key_4af,offset_4af)
418       if(idiskl) then
419        if (sf_read(d_4af,dble(bytes)*dble(offset_4af),
420     1    dble(bytes)*dble(size_4a),dbl_mb(k_4a),request).ne.0)
421     1     call errquit('zones get2: sf problem',1,DISK_ERR)
422        if (sf_wait(request).ne.0)
423     1    call errquit('zones get3: sf problem',2,DISK_ERR)
424       else
425        call ga_get(d_4af,offset_4af+1,offset_4af+size_4a,1,1,
426     1    dbl_mb(k_4a),size_4a)
427       end if
428c
429c
430       tot_azone3_sh=tot_zone(azone3)
431       tot_azone4_sh=tot_zone(azone4)
432c
433      if(azone3.ne.azone4) then !!============azone3.ne.azone4
434c --- C_[g3][sigma]
435       i = 0
436       do isigma1 =  1,nalength(azone3)
437       do igi3    =  1,int_mb(k_range_alpha+g3b-1)
438       i = i+1
439       ipos1=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone3_sh
440     &       +isigma1
441       dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1)
442       enddo
443       enddo
444c --- C_[g3][sigma]([sigma]<[rho]|[nu] [mu])
445c            zone3   zone3  zone4
446      call  dgemm('N','N',
447     1 int_mb(k_range_alpha+g3b-1),  !m
448     1 nalength(azone1)*nalength(azone2)*nalength(azone4), !n
449     1 nalength(azone3), !k
450     1 1.0d0,
451     1 dbl_mb(k_coeff),                    !A
452     1 int_mb(k_range_alpha+g3b-1), !lda
453     1 dbl_mb(k_4a),nalength(azone3),          ! B,ldb
454     1 0.0d0,dbl_mb(k_integral),
455     1 int_mb(k_range_alpha+g3b-1))
456c transposition ([g3][rho]|[nu][mu]) => ([rho][g3]|[nu][mu])
457c
458       size_aux=int_mb(k_range_alpha+g3b-1)*nalength(azone4)*
459     1          nalength(azone1)*nalength(azone2)
460       if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux))
461     1     call errquit('tce_aux1: MA problem',0,MA_ERR)
462      CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux),
463     & int_mb(k_range_alpha+g3b-1),nalength(azone4),
464     & nalength(azone1),nalength(azone2),
465     &2,1,3,4,1.0d0)
466      do i=1,size_aux
467       dbl_mb(k_integral+i-1)=dbl_mb(k_aux+i-1)
468      enddo
469      if (.not.ma_pop_stack(l_aux))
470     1     call errquit('l_aux1',15,MA_ERR)
471c C([g4][azone4=rho])
472       i = 0
473       do irho1   =  1,nalength(azone4)
474       do igi4    =  1,int_mb(k_range_alpha+g4b-1)
475       i = i+1
476       ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone4_sh
477     &       +irho1
478       dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1)
479       enddo
480       enddo
481c C([g4][azone4=rho])([rho][g3]|[azone1][azone2])
482      call  dgemm('N','N',
483     1 int_mb(k_range_alpha+g4b-1),  !m
484     1 nalength(azone1)*nalength(azone2)*int_mb(k_range_alpha+g3b-1), !n
485     1 nalength(azone4), !k
486     1 1.0d0,
487     1 dbl_mb(k_coeff),                    !A
488     1 int_mb(k_range_alpha+g4b-1), !lda
489     1 dbl_mb(k_integral),nalength(azone4),          ! B,ldb
490     1 1.0d0,dbl_mb(k_2g2a),
491     1 int_mb(k_range_alpha+g4b-1))
492c  done with first  part of azone3.ne.azone4 case ----------------
493c  transposition of atomic integrals
494c  ([sigma=azone3][rho=azone4]|[nu][mu]) ==>
495c  ([rho][sigma]|[nu][mu])
496       size_aux=size_4a
497       if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux))
498     1     call errquit('tce_aux2: MA problem',0,MA_ERR)
499      CALL TCE_SORT_4KG_(dbl_mb(k_4a),dbl_mb(k_aux),
500     & nalength(azone3),nalength(azone4),
501     & nalength(azone1),nalength(azone2),
502     &2,1,3,4,1.0d0)
503ccx      if (.not.ma_pop_stack(l_aux))
504ccx     1     call errquit('l_aux2',15,MA_ERR) ! see after dgemm
505c C([g3][azone4=rho])
506       i = 0
507       do irho1   =  1,nalength(azone4)
508       do igi3    =  1,int_mb(k_range_alpha+g3b-1)
509       i = i+1
510       ipos1=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone4_sh
511     &       +irho1
512       dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1)
513       enddo
514       enddo
515c C([g3][azone4=rho])([rho=azone4][sigma=azone3]|[nu][mu])
516      call  dgemm('N','N',
517     1 int_mb(k_range_alpha+g3b-1),  !m
518     1 nalength(azone1)*nalength(azone2)*nalength(azone3), !n
519     1 nalength(azone4), !k
520     1 1.0d0,
521     1 dbl_mb(k_coeff),                    !A
522     1 int_mb(k_range_alpha+g3b-1), !lda
523     1 dbl_mb(k_aux),nalength(azone4),          ! B,ldb
524     1 0.0d0,dbl_mb(k_integral),
525     1 int_mb(k_range_alpha+g3b-1))
526c
527      if (.not.ma_pop_stack(l_aux))
528     1     call errquit('l_aux2',15,MA_ERR)
529c transposition ([g3][sigma]|[nu][mu]) => ([sigma][g3]|[nu][mu])
530c
531       size_aux=int_mb(k_range_alpha+g3b-1)*nalength(azone3)*
532     1          nalength(azone1)*nalength(azone2)
533       if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux))
534     1     call errquit('tce_aux1: MA problem',0,MA_ERR)
535      CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux),
536     & int_mb(k_range_alpha+g3b-1),nalength(azone3),
537     & nalength(azone1),nalength(azone2),
538     &2,1,3,4,1.0d0)
539      do i=1,size_aux
540       dbl_mb(k_integral+i-1)=dbl_mb(k_aux+i-1)
541      enddo
542      if (.not.ma_pop_stack(l_aux))
543     1     call errquit('l_aux1',15,MA_ERR)
544c C([g4][azone3=sigma])
545       i = 0
546       do isigma1   =  1,nalength(azone3)
547       do igi4    =  1,int_mb(k_range_alpha+g4b-1)
548       i = i+1
549       ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone3_sh
550     &       +isigma1
551       dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1)
552       enddo
553       enddo
554c C([g4][azone3=sigma])([sigma][g3]|[azone1][azone2])
555      call  dgemm('N','N',
556     1 int_mb(k_range_alpha+g4b-1),  !m
557     1 nalength(azone1)*nalength(azone2)*int_mb(k_range_alpha+g3b-1), !n
558     1 nalength(azone3), !k
559     1 1.0d0,
560     1 dbl_mb(k_coeff),                    !A
561     1 int_mb(k_range_alpha+g4b-1), !lda
562     1 dbl_mb(k_integral),nalength(azone3),          ! B,ldb
563     1 1.0d0,dbl_mb(k_2g2a),
564     1 int_mb(k_range_alpha+g4b-1))
565      end if !azone3.ne.azone4
566c
567c
568c
569      if(azone3.eq.azone4) then
570c --- C_[g3][sigma]
571       i = 0
572       do isigma1 =  1,nalength(azone3)
573       do igi3    =  1,int_mb(k_range_alpha+g3b-1)
574       i = i+1
575       ipos1=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone3_sh
576     &       +isigma1
577       dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1)
578       enddo
579       enddo
580c --- C_[g3][sigma]([sigma]=[rho]|[nu] [mu])
581c            zone3   zone3  zone4
582      call  dgemm('N','N',
583     1 int_mb(k_range_alpha+g3b-1),  !m
584     1 nalength(azone1)*nalength(azone2)*nalength(azone4), !n
585     1 nalength(azone3), !k
586     1 1.0d0,
587     1 dbl_mb(k_coeff),                    !A
588     1 int_mb(k_range_alpha+g3b-1), !lda
589     1 dbl_mb(k_4a),nalength(azone3),          ! B,ldb
590     1 0.0d0,dbl_mb(k_integral),
591     1 int_mb(k_range_alpha+g3b-1))
592c transposition ([g3][rho]|[nu][mu]) => ([rho][g3]|[nu][mu])
593c
594       size_aux=int_mb(k_range_alpha+g3b-1)*nalength(azone4)*
595     1          nalength(azone1)*nalength(azone2)
596       if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux))
597     1     call errquit('tce_aux1: MA problem',0,MA_ERR)
598      CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux),
599     & int_mb(k_range_alpha+g3b-1),nalength(azone4),
600     & nalength(azone1),nalength(azone2),
601     &2,1,3,4,1.0d0)
602      do i=1,size_aux
603       dbl_mb(k_integral+i-1)=dbl_mb(k_aux+i-1)
604      enddo
605      if (.not.ma_pop_stack(l_aux))
606     1     call errquit('l_aux1',15,MA_ERR)
607c C([g4][azone4=rho])
608       i = 0
609       do irho1   =  1,nalength(azone4)
610       do igi4    =  1,int_mb(k_range_alpha+g4b-1)
611       i = i+1
612       ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone4_sh
613     &       +irho1
614       dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1)
615       enddo
616       enddo
617c C([g4][azone4=rho])([rho][g3]|[azone1][azone2])
618      call  dgemm('N','N',
619     1 int_mb(k_range_alpha+g4b-1),  !m
620     1 nalength(azone1)*nalength(azone2)*int_mb(k_range_alpha+g3b-1), !n
621     1 nalength(azone4), !k
622     1 1.0d0,
623     1 dbl_mb(k_coeff),                    !A
624     1 int_mb(k_range_alpha+g4b-1), !lda
625     1 dbl_mb(k_integral),nalength(azone4),          ! B,ldb
626     1 1.0d0,dbl_mb(k_2g2a),
627     1 int_mb(k_range_alpha+g4b-1))
628      end if !azone3.eq.azone4
629c
630c
631c
632        if (.not.ma_pop_stack(l_4a))
633     1   call errquit('tcc_mo2e_trans_zones: l_4a',15,MA_ERR)
634      ENDDO !azone4
635c one step ahead
636      ENDDO !azone3
637c
638c
639c
640c
641c *** debug ***
642c        write(6,*)'step4'
643c        call util_flush(6)
644c *************
645c
646c
647c g2 g1 do loops
648c open (g4 g3 | all symmetry allowed g2 g1) - equally good we can split it into
649c                                              pieces
650c (k,l|i,j) pieces
651c
652c   calculate point_pair index here
653         ixi=noa+nva-g3b+1
654         jxi=noa+nva-g4b+1
655         point_pair=((noa+nva)*(noa+nva+1))/2-((ixi-1)*ixi)/2-jxi+1
656         size_stripe=int_mb(k_gpair+point_pair)
657         xoffset_p34 = int_mb(k_gpair+len_pair+point_pair)
658         addr=int_mb(k_gpair+2*len_pair+point_pair)
659c
660c - exascale ---
661       if((size_stripe.eq.0).and.(xoffset_p34.eq.0)) go to 300
662c ---
663c
664c
665c offset for blocking the (nu mu | g2 g1 ) = C*C matrix
666c
667      do i=1,20000
668       key_start(i) = 0
669       key_end(i)   = 0
670       xoffset_g12(i)  = 0
671       xoffset_size(i) = 0
672       xoffset_size_p1p2(i) = 0
673      enddo
674      imaxp12=0
675      size_temp=0
676      size_temp_4g=0
677      xoffset_temp=0
678      xoffset_temp_4g=0
679      size_p1p2=0
680      i=1
681      DO g1b = 1,noa+nva   !l
682      DO g2b = g1b,noa+nva !k
683c - exascale ---
684      IF(.not.((g3b.gt.noa).and.(g4b.gt.noa).and.(g1b.gt.noa).and.
685     &   (g2b.gt.noa).and.(.not.is_active_4_o(g3b,g4b,g1b,g2b)))) THEN
686c ---
687       IF (int_mb(k_spin_alpha+g3b-1)+int_mb(k_spin_alpha+g4b-1).eq.
688     & int_mb(k_spin_alpha+g1b-1)+int_mb(k_spin_alpha+g2b-1)) THEN
689       IF (ieor(int_mb(k_sym_alpha+g3b-1),ieor(int_mb(k_sym_alpha+g4b-1)
690     & ,ieor(int_mb(k_sym_alpha+g1b-1),int_mb(k_sym_alpha+g2b-1)))) .eq.
691     & irrep_v) THEN
692       IROW=INDEX_PAIR(g4b,g3b)
693       ICOL=INDEX_PAIR(g2b,g1b)
694       IF(IROW.GE.ICOL) THEN
695         if(size_temp_4g.eq.0) then
696          key_start(i)=icol
697         end if
698         size_temp=size_temp+int_mb(k_range_alpha+g2b-1)
699     1          *int_mb(k_range_alpha+g1b-1)
700     1          *nalength(azone1)*nalength(azone2)
701         size_temp_4g=size_temp_4g+int_mb(k_range_alpha+g2b-1)
702     1          *int_mb(k_range_alpha+g1b-1)
703     2          *int_mb(k_range_alpha+g3b-1)
704     3          *int_mb(k_range_alpha+g4b-1)
705         size_p1p2=size_p1p2+int_mb(k_range_alpha+g2b-1)
706     1          *int_mb(k_range_alpha+g1b-1)
707         ibuba=icol
708c
709c         if(size_temp.gt.max_size_temp) then
710c has to be fully consistent with later part
711c
712         if(size_temp_4g.gt.max_size_temp) then
713           xoffset_g12(i)=xoffset_temp_4g
714           xoffset_size(i)=size_temp
715           xoffset_size_p1p2(i)=size_p1p2
716           xoffset_temp=xoffset_temp+size_temp
717           xoffset_temp_4g=xoffset_temp_4g+size_temp_4g
718           key_end(i)=icol
719           size_temp=0
720           size_temp_4g=0
721           size_p1p2=0
722           imaxp12=i
723           i=i+1
724          end if
725       END IF
726       END IF
727       END IF
728c --- exascale -
729       END IF
730c --------------
731      ENDDO
732      ENDDO
733c
734      if(size_temp_4g.ne.0) then
735           xoffset_g12(i)=xoffset_temp_4g
736           xoffset_size(i)=size_temp
737           xoffset_size_p1p2(i)=size_p1p2
738           key_end(i)=ibuba
739           imaxp12=i
740      end if
741c
742      if(i.gt.20000)
743     1  call errquit('tce_zone: xoffset-size-problem',0,MA_ERR)
744c
745c *** debug ***
746c        write(6,*)'step5'
747c        call util_flush(6)
748c *************
749c
750      do i =1, imaxp12
751        size_g12p=int_mb(k_range_alpha+g4b-1)*
752     1                int_mb(k_range_alpha+g3b-1)*
753     1                xoffset_size_p1p2(i)
754        if (.not.ma_push_get(mt_dbl,size_g12p,'g12piece',
755     1      l_g12piece,k_g12piece))
756     1      call errquit('tce_zone: MA g12-piece ',0,MA_ERR)
757c TRY IF THIS CAN BE BY-PASSED
758        call dfill(size_g12p, 0.0d0, dbl_mb(k_g12piece), 1)
759c
760c
761c
762c
763      offset_2g2z=0 !
764c
765c
766c
767      iend=0
768      istart=0
769      DO g1b = 1,noa+nva     !l
770      DO g2b = g1b,noa+nva   !k
771       ICOL=INDEX_PAIR(g2b,g1b)
772       IF(iend.eq.1) go to 4444
773       if(istart.ne.1) then
774        IF(ICOL.eq.key_start(i)) istart=1
775       end if
776       if(icol.eq.key_end(i)) iend=1
777       IF(istart.eq.1) THEN
778c - exascale ---
779      IF(.not.((g3b.gt.noa).and.(g4b.gt.noa).and.(g1b.gt.noa).and.
780     &   (g2b.gt.noa).and.(.not.is_active_4_o(g3b,g4b,g1b,g2b)))) THEN
781c ---
782       IF (int_mb(k_spin_alpha+g3b-1)+int_mb(k_spin_alpha+g4b-1).eq.
783     & int_mb(k_spin_alpha+g1b-1)+int_mb(k_spin_alpha+g2b-1)) THEN
784       IF (ieor(int_mb(k_sym_alpha+g3b-1),ieor(int_mb(k_sym_alpha+g4b-1)
785     & ,ieor(int_mb(k_sym_alpha+g1b-1),int_mb(k_sym_alpha+g2b-1)))) .eq.
786     & irrep_v) THEN
787       IROW=INDEX_PAIR(g4b,g3b)
788       ICOL=INDEX_PAIR(g2b,g1b)
789       IF(IROW.GE.ICOL) THEN
790        tot_azone1_sh=tot_zone(azone1)
791        tot_azone2_sh=tot_zone(azone2)
792       if(azone1.ne.azone2) then !-----------------
793c C_([mu-azone2][g2])
794        ii=0
795        do igi2    =  1,int_mb(k_range_alpha+g2b-1)
796        do imu1    =  1,nalength(azone2)
797         ii = ii+1
798         ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone2_sh
799     &           +imu1
800         dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)
801        enddo
802        enddo
803c ([g4][g3]|[nu-azone1][mu-azone2])*C_([mu-azone2][g2])
804         call dgemm('N','N',
805     1   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
806     1   nalength(azone1),                                        !m
807     1   int_mb(k_range_alpha+g2b-1),                             !n
808     3   nalength(azone2),                                        !k
809     4   1.0d0,dbl_mb(k_2g2a),
810     5   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
811     5   nalength(azone1),
812     6   dbl_mb(k_coeff),nalength(azone2),
813     7   0.0d0,dbl_mb(k_integral),
814     8   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
815     8   nalength(azone1))
816c transposition ([g4][g3]|[nu-azone1][g2])=>([g4][g3]|[g2][nu-azone1])
817       size_aux=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)
818     1 *int_mb(k_range_alpha+g2b-1)*nalength(azone1)
819       if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux))
820     1     call errquit('tce_aux2: MA problem',0,MA_ERR)
821      CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux),
822     &int_mb(k_range_alpha+g4b-1),int_mb(k_range_alpha+g3b-1),
823     &nalength(azone1),int_mb(k_range_alpha+g2b-1),
824     &1,2,4,3,1.0d0)
825      do ii=1,size_aux
826       dbl_mb(k_integral+ii-1)=dbl_mb(k_aux+ii-1)
827      enddo
828      if (.not.ma_pop_stack(l_aux))
829     1     call errquit('l_aux2',15,MA_ERR)
830c C_([nu-azone1][g1])
831        ii=0
832        do igi1    =  1,int_mb(k_range_alpha+g1b-1)
833        do inu1    =  1,nalength(azone1)
834         ii = ii+1
835         ipos1=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone1_sh
836     &           +inu1
837         dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)
838        enddo
839        enddo
840c ([g4][g3]|[g2][nu-azone1])*C_([nu-azone1][g1])
841         call dgemm('N','N',
842     1   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
843     1   int_mb(k_range_alpha+g2b-1),                             !m
844     1   int_mb(k_range_alpha+g1b-1),                             !n
845     3   nalength(azone1),                                        !k
846     4   1.0d0,dbl_mb(k_integral),
847     5   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
848     5   int_mb(k_range_alpha+g2b-1),
849     6   dbl_mb(k_coeff),nalength(azone1),
850     7   1.0d0,dbl_mb(k_g12piece+offset_2g2z),
851     8   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
852     8   int_mb(k_range_alpha+g2b-1))
853c done with first  part of azone3.ne.azone4 case ----------------
854c  transposition of atomic integrals
855c  ([g4][g3]|[azone1-nu][azone2-mu]) ==>
856c  ([g4][g3]|[azone2-mu][azone1-nu])
857       size_aux=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)
858     1 *nalength(azone1)*nalength(azone2)
859       if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux))
860     1     call errquit('tce_aux2: MA problem',0,MA_ERR)
861      CALL TCE_SORT_4KG_(dbl_mb(k_2g2a),dbl_mb(k_aux),
862     & int_mb(k_range_alpha+g4b-1),int_mb(k_range_alpha+g3b-1),
863     & nalength(azone1),nalength(azone2),
864     & 1,2,4,3,1.0d0)
865ccx      if (.not.ma_pop_stack(l_aux))
866ccx     1     call errquit('l_aux2',15,MA_ERR) ! see after dgemm
867c C([azone1-nu][g2])
868        ii=0
869        do igi2    =  1,int_mb(k_range_alpha+g2b-1)
870        do inu1    =  1,nalength(azone1)
871         ii = ii+1
872         ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone1_sh
873     &           +inu1
874         dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)
875        enddo
876        enddo
877c ([g4][g3]|[azone2-mu][azone1-nu])*C([azone1-nu][g2])
878         call dgemm('N','N',
879     1   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
880     1   nalength(azone2),                                        !m
881     1   int_mb(k_range_alpha+g2b-1),                             !n
882     3   nalength(azone1),                                        !k
883     4   1.0d0,dbl_mb(k_aux),
884     5   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
885     5   nalength(azone2),
886     6   dbl_mb(k_coeff),nalength(azone1),
887     7   0.0d0,dbl_mb(k_integral),
888     8   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
889     8   nalength(azone2))
890c
891      if (.not.ma_pop_stack(l_aux))
892     1     call errquit('l_aux2',15,MA_ERR)
893c transposition ([g4][g3]|[azone2-mu][g2])=>([g4][g3]|[g2][azone2-mu])
894       size_aux=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)
895     1 *int_mb(k_range_alpha+g2b-1)*nalength(azone2)
896       if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux))
897     1     call errquit('tce_aux2: MA problem',0,MA_ERR)
898      CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux),
899     &int_mb(k_range_alpha+g4b-1),int_mb(k_range_alpha+g3b-1),
900     &nalength(azone2),int_mb(k_range_alpha+g2b-1),
901     &1,2,4,3,1.0d0)
902      do ii=1,size_aux
903       dbl_mb(k_integral+ii-1)=dbl_mb(k_aux+ii-1)
904      enddo
905      if (.not.ma_pop_stack(l_aux))
906     1     call errquit('l_aux2',15,MA_ERR)
907c C([azone2-mu][g1])
908        ii=0
909        do igi1    =  1,int_mb(k_range_alpha+g1b-1)
910        do imu1    =  1,nalength(azone2)
911         ii = ii+1
912         ipos1=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone2_sh
913     &           +imu1
914         dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)
915        enddo
916        enddo
917c ([g4][g3]|[g2][azone2-mu])*C([azone2-mu][g1])
918         call dgemm('N','N',
919     1   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
920     1   int_mb(k_range_alpha+g2b-1),                             !m
921     1   int_mb(k_range_alpha+g1b-1),                             !n
922     3   nalength(azone2),                                        !k
923     4   1.0d0,dbl_mb(k_integral),
924     5   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
925     5   int_mb(k_range_alpha+g2b-1),
926     6   dbl_mb(k_coeff),nalength(azone2),
927     7   1.0d0,dbl_mb(k_g12piece+offset_2g2z),
928     8   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
929     8   int_mb(k_range_alpha+g2b-1))
930c
931        offset_2g2z=offset_2g2z+int_mb(k_range_alpha+g4b-1)*
932     1    int_mb(k_range_alpha+g3b-1)*int_mb(k_range_alpha+g2b-1)*
933     1    int_mb(k_range_alpha+g1b-1)
934       end if ! azone1.ne.azone2 !-----------------
935c
936c second part
937c
938        if(azone1.eq.azone2) then
939c C_([mu-azone2][g2])
940        ii=0
941        do igi2    =  1,int_mb(k_range_alpha+g2b-1)
942        do imu1    =  1,nalength(azone2)
943         ii = ii+1
944         ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone2_sh
945     &           +imu1
946         dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)
947        enddo
948        enddo
949c ([g4][g3]|[nu-azone1][mu-azone2])*C_([mu-azone2][g2])
950         call dgemm('N','N',
951     1   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
952     1   nalength(azone1),                                        !m
953     1   int_mb(k_range_alpha+g2b-1),                             !n
954     3   nalength(azone2),                                        !k
955     4   1.0d0,dbl_mb(k_2g2a),
956     5   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
957     5   nalength(azone1),
958     6   dbl_mb(k_coeff),nalength(azone2),
959     7   0.0d0,dbl_mb(k_integral),
960     8   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
961     8   nalength(azone1))
962c transposition ([g4][g3]|[nu-azone1][g2])=>([g4][g3]|[g2][nu-azone1])
963       size_aux=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)
964     1 *int_mb(k_range_alpha+g2b-1)*nalength(azone1)
965       if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux))
966     1     call errquit('tce_aux2: MA problem',0,MA_ERR)
967      CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux),
968     &int_mb(k_range_alpha+g4b-1),int_mb(k_range_alpha+g3b-1),
969     &nalength(azone1),int_mb(k_range_alpha+g2b-1),
970     &1,2,4,3,1.0d0)
971      do ii=1,size_aux
972       dbl_mb(k_integral+ii-1)=dbl_mb(k_aux+ii-1)
973      enddo
974      if (.not.ma_pop_stack(l_aux))
975     1     call errquit('l_aux2',15,MA_ERR)
976c C_([nu-azone1][g1])
977        ii=0
978        do igi1    =  1,int_mb(k_range_alpha+g1b-1)
979        do inu1    =  1,nalength(azone1)
980         ii = ii+1
981         ipos1=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone1_sh
982     &           +inu1
983         dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)
984        enddo
985        enddo
986c ([g4][g3]|[g2][nu-azone1])*C_([nu-azone1][g1])
987         call dgemm('N','N',
988     1   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
989     1   int_mb(k_range_alpha+g2b-1),                             !m
990     1   int_mb(k_range_alpha+g1b-1),                             !n
991     3   nalength(azone1),                                        !k
992     4   1.0d0,dbl_mb(k_integral),
993     5   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
994     5   int_mb(k_range_alpha+g2b-1),
995     6   dbl_mb(k_coeff),nalength(azone1),
996     7   1.0d0,dbl_mb(k_g12piece+offset_2g2z),
997     8   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
998     8   int_mb(k_range_alpha+g2b-1))
999c
1000        offset_2g2z=offset_2g2z+int_mb(k_range_alpha+g4b-1)*
1001     1    int_mb(k_range_alpha+g3b-1)*int_mb(k_range_alpha+g2b-1)*
1002     1    int_mb(k_range_alpha+g1b-1)
1003c
1004        end if !azone1.eq.azone2
1005c !---------------------------------------------------------------------
1006       END IF ! irow.gt.icol
1007       END IF ! symmetry
1008       END IF ! spin
1009c --- exascale -
1010       END IF
1011c --------------
1012       END IF ! istart
1013      ENDDO  ! g2b-loop ends up here
1014      ENDDO  ! g1b-loop ends up here
1015c
1016 4444 continue
1017c
1018c now put here add_block
1019c *** debug ***
1020c        write(6,*)'step5a'
1021c        call util_flush(6)
1022c *************
1023      call add_block(d_v2,dbl_mb(k_g12piece),size_g12p,
1024     &               xoffset_p34+xoffset_g12(i))
1025c *** debug ***
1026c        write(6,*)'step5b'
1027c        call util_flush(6)
1028c *************
1029      istart=0
1030c
1031c
1032cccx         if (.not.ma_pop_stack(l_2g2z))
1033cccx     1   call errquit('tcc_mo2e_trans_zones: l_2g2z',15,MA_ERR)
1034         if (.not.ma_pop_stack(l_g12piece))
1035     1   call errquit('tcc_mo2e_trans_zones: l_g12piece',15,MA_ERR)
1036      ENDDO  ! i = 1,imaxp12
1037c
1038c *** debug ***
1039c        write(6,*)'step100'
1040c        call util_flush(6)
1041c *************
1042c
1043c - exascale ---
1044  300  continue
1045c ---
1046c
1047
1048      if (.not.ma_pop_stack(l_2g2a))
1049     1  call errquit('tcc_mo2e_trans_zones: MA problem',15,MA_ERR)
1050c
1051c
1052      next = NXTASK(nprocs, 1)
1053      END IF
1054      count = count + 1
1055      ENDDO !azone2
1056      ENDDO !azone1
1057      ENDDO !g4b
1058      ENDDO !g3b
1059c
1060      next = nxtask(-nprocs,1)
1061c
1062       call reconcilefile(d_v2,size_2e)
1063c
1064c
1065      if (.not.ma_pop_stack(l_coeff))
1066     1  call errquit('tcc_off_4a: MA problem',15,MA_ERR)
1067c
1068      if (.not.ma_pop_stack(l_integral))
1069     1  call errquit('tcc_off_4a: MA problem',15,MA_ERR)
1070c
1071c
1072c *** debug ***
1073c
1074c      if(nodezero) then
1075c      write(6,*)'DONE --- DONE ---- DONE ---- DONE'
1076c      end if
1077c
1078c *************
1079      if (.not.ma_pop_stack(l_work2))
1080     1  call errquit('tcc_ao2e: MA problem',14,MA_ERR)
1081      if (.not.ma_pop_stack(l_work1))
1082     1  call errquit('tcc_ao2e: MA problem',15,MA_ERR)
1083c
1084      if (.not.ma_pop_stack(l_movecs_orb))
1085     1  call errquit('tcc_ao2e: MA problem',15,MA_ERR)
1086c
1087      if (.not.ma_pop_stack(l_gpair))
1088     1  call errquit('tcc_ao2e: MA problem',15,MA_ERR)
1089c
1090c
1091c
1092      if(idiskl) then
1093cccx        if (parallel) then
1094cccx          call ga_sync()
1095cccx          if (sf_open(d_4af).ne.0)
1096cccx     1      call errquit('deletefile: sf problem',0,DISK_ERR)
1097cccx        endif
1098ccx      if (.not.sf_destroy(d_4af))
1099ccx     1  call errquit('tcc_sf_destroy2: sf problem',15,MA_ERR)
1100        if (parallel) call ga_sync()
1101      else
1102       call deletefile(d_4af)
1103      end if
1104c
1105c
1106c
1107      if (.not.ma_pop_stack(l_4af_offset))
1108     1  call errquit('tcc_ao2e: MA problem',15,MA_ERR)
1109c
1110      call ga_sync()
1111c *** debug ***
1112c 800  format('DGEMM1 MAX',i5,2x,3f15.5)
1113c 801  format('DGEMM2 ',i5,2x,3f15.5)
1114 9000 format('PART1',i4,1x,'Cpu  wall ',2(f17.12,1x),3x,'g4b g3b',2i5)
1115c 9001 format('PART2',i4,1x,'Cpu  wall ',2(f17.12,1x),3x,'g4b g3b',2i5)
1116c 9003 format('PART1-4a',i4,1x,'Cpu  wall ',2(f17.12,1x))
1117c 9004 format('PART1-2g2z',i4,1x,'Cpu  wall ',2(f17.12,1x))
1118c 9005 format('PART1-dgemm',i4,1x,'Cpu  wall ',2(f17.12,1x))
1119c 9010 format('  P1-mnrs',i3,1x,2i5,1x,2i5,1x,'Cpu  wall ',2(f17.12,1x))
1120  555  format('atom loop ',2x,i5,3x,2i5,3x,2i5,i12)
1121  556  format('atom time',2x,i5,3x,2i5,3x,2i5,'Cpu wall ',2(f12.7,1x))
1122  777  format('main do loop ',2x,i5,3x,2i5,3x,2i5)
1123  775  format('main loop step1 ',2x,i5,3x,2i5,3x,2i5)
1124  776  format('main loop step2 ',2x,i5,3x,2i5,3x,2i5)
1125  778  format('PART1',2x,i5,3x,2i5,3x,2i5,2x,'Cpu  wall ',2(f17.12,1x))
1126  779  format('PART2',2x,i5,3x,2i5,3x,2i5,2x,'Cpu  wall ',2(f17.12,1x))
1127  780  format('ADD BLOCK',2x,i5,3x,2i5,3x,2i5,2x,'Cpu  wall ',
1128     &        2(f17.12,1x))
1129c *************
1130c
1131      RETURN
1132      END
1133c
1134c
1135c
1136c
1137