1      SUBROUTINE tce_mo2e_zones_4a_disk_2s_new(rtdb,d_v2,
2     1                                kax_v2_alpha_offset,
3     1                                size_2e)
4C     $Id$
5C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
6C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
7C     t ( p1 p2 h3 h4 )_t
8      IMPLICIT NONE
9#include "rtdb.fh"
10#include "global.fh"
11#include "mafdecls.fh"
12#include "stdio.fh"
13#include "util.fh"
14#include "bas.fh"
15#include "schwarz.fh"
16#include "sym.fh"
17#include "sf.fh"
18#include "errquit.fh"
19#include "tce.fh"
20#include "tce_main.fh"
21c
22c
23      integer rtdb                 ! Run-time database
24      integer d_v2                 ! MO integrals
25      integer kax_v2_alpha_offset  ! MO integrals offset
26      integer size_2e              ! 2e file size
27c
28      INTEGER size_2g2a,l_2g2a,k_2g2a
29      INTEGER azone1,azone2,azone3,azone4
30      INTEGER g1b,g2b,g3b,g4b
31      INTEGER igi1,igi2,igi3,igi4
32      INTEGER ii,i,j,k,l,N,ipos1,ipos2,ipos3,ipos4
33      INTEGER del1,del2,p1rel,p2rel
34      INTEGER size_4a,l_4a,k_4a
35c
36      integer mu,nu,rho,sigma
37      integer mu_lo,mu_hi
38      integer nu_lo,nu_hi
39      integer rho_lo,rho_hi
40      integer sigma_lo,sigma_hi
41      integer mu_range
42      integer nu_range
43      integer rho_range
44      integer sigma_range
45      integer mu1,nu1,rho1,sigma1
46      integer shift_mu,shift_nu
47      integer shift_rho,shift_sigma
48      integer work1,work2          ! Work array sizes
49      integer l_work1,k_work1      ! Work array 1
50      integer l_work2,k_work2      ! Work array 2
51      integer imu1,inu1,irho1,isigma1
52c
53      integer l_movecs_orb,k_movecs_orb
54      integer l_gpair,k_gpair
55      integer len_pair,g12_shift
56c ATTENTION,ACHTUNG,UWAGA 2000 - max # of CPU
57c
58      integer size_2g2z,l_2g2z,k_2g2z
59      integer tot_azone1_sh,tot_azone2_sh
60      integer tot_azone3_sh,tot_azone4_sh
61      integer ixi,jxi,point_pair
62      integer size_stripe,l_p34,k_p34
63      integer addr,xoffset_34
64      integer size_g3g4,xoffset_p34
65      integer size_g4321,k_g4321,l_g4321
66c
67      integer iha,ihb !number of corr. alpha, beta holes
68      integer ipa,ipb !number of corr. alpha, beta particles
69      integer INDEX_PAIR,icol,irow
70c compression
71      integer xoffset_g12(1000),pointer_g12(1000)
72      integer xoffset_size_p1p2(1000)
73      integer size_temp,size_temp_4g,xoffset_temp_4g,size_p1p2
74      integer max_size_temp,xoffset_temp,iclose,iopen,offset_2g2z
75      integer imaxp12,istart,ibuba,sumx,slice_offset
76      double precision wall,cpu,wall1,cpu1,wall2,cpu2,wall3,cpu3
77c *** debug ***
78c       double precision xtot1,xtot2,xtot3
79c *************
80      double precision tot_zone(1000)
81      integer l_g12piece,k_g12piece,size_g12p,iguru,ilogu,ihigu
82c
83      integer l_4af_offset,k_4af_offset,d_4af
84      integer k_2a2m_offset,l_2a2m_offset
85      integer sf_chunk,request
86      integer key_4af,offset_4af,size_4af
87      integer sf2a2m_chunk,key_2a2m,offset_2a2m
88      integer d_2a2m,size_2a2mf
89      character*255 filename
90c
91      logical parallel
92c
93      INTEGER length
94      INTEGER NXTASK
95      INTEGER next
96      INTEGER nprocs
97      INTEGER count
98      EXTERNAL NXTASK
99      logical nodezero
100      logical idiskl
101c
102c
103c
104ccx      idisk=.true.
105      if(idisk.eq.0) then
106       idiskl=.false.
107      else
108       idiskl=.true.
109      end if
110c
111      parallel = .true.
112c
113c
114      max_size_temp=imaxsize**4
115c
116c
117      do ii=1,1000
118       tot_zone(ii)=0.0d0
119      enddo
120      if(atpart.gt.1000)
121     &  call errquit('tce_zones: atpart too big',1,MA_ERR)
122      sumx=0
123      do ii=1,atpart
124       tot_zone(ii)=sumx
125       sumx=sumx+nalength(ii)
126      enddo
127c
128      nodezero=(ga_nodeid().eq.0)
129c
130c
131c this module is called only if intorb = .true.
132c N is the number of correlated orbitals
133        N = nmo(1) - nfc(1) - nfv(1)
134        iha = nocc(1)-nfc(1)
135        ihb = nocc(ipol)-nfc(ipol)
136        ipa = nmo(1)-nocc(1)-nfv(1)
137        ipb = nmo(ipol)-nocc(ipol)-nfv(ipol)
138c
139      sf_chunk=(imaxsize+10)**4
140      sf2a2m_chunk=((tile_dim)**2)*((imaxsize+10)**2)
141c
142       call tce_4a_offset(l_4af_offset,k_4af_offset,size_4af)
143       call tce_2a2m_offset(l_2a2m_offset,k_2a2m_offset,size_2a2mf)
144c
145c
146c
147       if(nodezero) then
148        write(6,*)'INTERMEDIATE FILES'
149        write(6,*)'size_4af',size_4af
150        write(6,*)'size_2a2mf',size_2a2mf
151c        write(6,*)'bytes=',bytes
152c        write(6,*)'dfloat(bytes)=',dfloat(bytes)
153c        write(6,*)'dfloat(bytes)*dfloat(size_4af)',
154c     &             dfloat(bytes)*dfloat(size_4af)
155c        write(6,*)'dfloat(bytes)*dfloat(size_2a2mf)',
156c     &            dfloat(bytes)*dfloat(size_2a2mf)
157c        write(6,*)'                  '
158       end if
159c
160c
161c
162      if(idiskl) then
163        if(.not.parallel)
164     1       call errquit('sf only for parallel runs',1,DISK_ERR)
165        if(parallel) call ga_sync()
166        call util_file_name('4aintx',.false.,.false.,filename)
167        if (sf_create(filename,dfloat(bytes)*dfloat(size_4af),
168     1    dfloat(bytes)*dfloat(size_4af),sf_chunk,d_4af).ne.0)
169     2    call errquit('4-index: sf problem',0,DISK_ERR)
170        call util_file_name('2a2m',.false.,.false.,filename)
171        if (sf_create(filename,dfloat(bytes)*dfloat(size_2a2mf),
172     1   dfloat(bytes)*dfloat(size_2a2mf),sf2a2m_chunk,d_2a2m).ne.0)
173     2   call errquit('4-index: sf problem',0,DISK_ERR)
174      else
175       call createfile(filename,d_4af,size_4af)
176       call reconcilefile(d_4af,size_4af)
177       call ga_zero(d_4af)
178       call createfile(filename,d_2a2m,size_2a2mf)
179       call reconcilefile(d_2a2m,size_2a2mf)
180       call ga_zero(d_2a2m)
181      end if
182c
183c
184c     Pair's structure of the integral file
185      call tce_mo2e_pairs(l_gpair,k_gpair,len_pair)
186      len_pair = int_mb(k_gpair)
187c
188c
189c alpha orbitals only
190c
191      if (.not.ma_push_get(mt_dbl,nbf*(iha+ipa)
192     1  ,"sorted MO coeffs",
193     2  l_movecs_orb,k_movecs_orb))
194     3  call errquit('tce_mo2e_zone: MA problem 1',0,
195     2    BASIS_ERR)
196      call dfill(nbf*(iha+ipa),0.0d0, dbl_mb(k_movecs_orb), 1)
197      do i=1,iha
198      do isigma1=1,nbf
199       dbl_mb(k_movecs_orb+(i-1)*nbf+isigma1-1)=
200     & dbl_mb(k_movecs_sorted+(i-1)*nbf+isigma1-1)
201      enddo
202      enddo
203      do i=iha+1,iha+ipa
204      do isigma1=1,nbf
205       dbl_mb(k_movecs_orb+(i-1)*nbf+isigma1-1)=
206     & dbl_mb(k_movecs_sorted+(i+ihb-1)*nbf+isigma1-1)
207      enddo
208      enddo
209c
210c
211      call int_mem_2e4c(work1,work2)
212      if (.not.ma_push_get(mt_dbl,work1,'work1',l_work1,k_work1))
213     1  call errquit('tce_ao2e: MA problem work1',0,MA_ERR)
214      if (.not.ma_push_get(mt_dbl,work2,'work2',l_work2,k_work2))
215     1  call errquit('tce_ao2e: MA problem work2',1,MA_ERR)
216c
217c
218c
219c 4af file formed here
220c
221c
222             cpu1 = - util_cpusec()
223             wall1 = - util_wallsec()
224c
225      nprocs = GA_NNODES()
226      count = 0
227      next = NXTASK(nprocs, 1)
228      DO azone1 = 1,atpart      !nu
229      DO azone2 = azone1,atpart !mu
230      DO azone3 = 1,atpart      !sigma
231      DO azone4 = azone3,atpart !rho
232      IF (next.eq.count) THEN
233c ---------------------------
234        size_4a = nalength(azone1)*nalength(azone2)*
235     1            nalength(azone3)*nalength(azone4)
236c *** debug ***
237c        write(6,555) ga_nodeid(),azone1,azone2,azone3,azone4,size_4a
238c        call util_flush(6)
239c             cpu1 = - util_cpusec()
240c             wall1 = - util_wallsec()
241c *************
242        if(.not.ma_push_get(mt_dbl,size_4a,'4a',l_4a,k_4a))
243     1     call errquit('tce_4af_zones1: MA problem',0,MA_ERR)
244        call dfill(size_4a, 0.0d0, dbl_mb(k_4a), 1)
245         shift_mu = 0
246         do mu    = a2length(azone2)+1,a2length(azone2+1)
247            if (.not.bas_cn2bfr(ao_bas_han,mu,mu_lo,mu_hi))
248     1      call errquit('tce_ao2e: basis fn range problem 1',0,
249     2      BASIS_ERR)
250            mu_range = mu_hi - mu_lo + 1
251         shift_nu = 0
252         do nu    = a2length(azone1)+1,a2length(azone1+1)
253            if (.not.bas_cn2bfr(ao_bas_han,nu,nu_lo,nu_hi))
254     1      call errquit('tce_ao2e: basis fn range problem 1',0,
255     2      BASIS_ERR)
256            nu_range = nu_hi - nu_lo + 1
257         shift_rho = 0
258         do rho   = a2length(azone4)+1,a2length(azone4+1)
259            if (.not.bas_cn2bfr(ao_bas_han,rho,rho_lo,rho_hi))
260     1      call errquit('tce_ao2e: basis fn range problem 1',0,
261     2      BASIS_ERR)
262            rho_range = rho_hi - rho_lo + 1
263         shift_sigma = 0
264         do sigma = a2length(azone3)+1,a2length(azone3+1)
265            if (.not.bas_cn2bfr(ao_bas_han,sigma,sigma_lo,sigma_hi))
266     1      call errquit('tce_ao2e: basis fn range problem 1',0,
267     2      BASIS_ERR)
268            sigma_range = sigma_hi - sigma_lo + 1
269            if (schwarz_shell(rho,sigma)*schwarz_shell(mu,nu)
270     1          .ge. tol2e) then
271cccx            call dfill(work1, 0.0d0, dbl_mb(k_work1), 1)
272cccx            call dfill(work2, 0.0d0, dbl_mb(k_work2), 1)
273c *** debug ***
274c             cpu2 = - util_cpusec()
275c             wall2 = - util_wallsec()
276c **************************************
277            call int_2e4c(ao_bas_han,mu,nu,ao_bas_han,rho,sigma,
278     1           work2,dbl_mb(k_work2),work1,dbl_mb(k_work1))
279            i=0
280             do mu1     = 1,mu_range
281             do nu1     = 1,nu_range
282             do rho1    = 1,rho_range
283             do sigma1  = 1,sigma_range
284            i=i+1
285            inu1=nu1+shift_nu
286            isigma1=sigma1+shift_sigma
287            imu1=mu1+shift_mu
288            irho1=rho1+shift_rho
289c (isigma1,irho1|inu1, imu1)
290            ipos1=(((imu1-1)*nalength(azone1)+inu1-1)*
291     1            nalength(azone4)+irho1-1)*nalength(azone3)
292     2            +isigma1
293            dbl_mb(k_4a+ipos1-1)=dbl_mb(k_work1+i-1)
294            enddo
295            enddo
296            enddo
297            enddo
298c **** debug ****
299c             cpu3 = cpu3 + util_cpusec()
300c             wall3 = wall3 + util_wallsec()
301c             xtot2=xtot2+cpu3
302c ***************************
303            end if !schwarz  screening
304         shift_sigma = shift_sigma + sigma_range
305         enddo !sigma
306         shift_rho   = shift_rho + rho_range
307         enddo !rho
308         shift_nu    = shift_nu + nu_range
309         enddo !nu
310         shift_mu    = shift_mu + mu_range
311         enddo !mu
312c *** debug ****
313c             cpu1 = cpu1 + util_cpusec()
314c             wall1 = wall1 + util_wallsec()
315c       write(6,556) ga_nodeid(),azone1,azone2,azone3,azone4,cpu1,wall1
316c       call util_flush(6)
317c **************
318c
319c fixing offsets and sf_writing
320         key_4af=azone4 - 1 + atpart * (azone3 - 1 +
321     &          atpart * (azone2 - 1 + atpart * (azone1 - 1)))
322         call tce_hash(int_mb(k_4af_offset),key_4af,offset_4af)
323      if(idiskl) then
324        if (sf_write(d_4af,dfloat(bytes)*dfloat(offset_4af),
325     1    dfloat(bytes)*dfloat(size_4a),dbl_mb(k_4a),request).ne.0)
326     1    THEN
327          write(6,8155)ga_nodeid(),key_4af,offset_4af,size_4a
328          call errquit('zones put: sf problem2',1,DISK_ERR)
329        end if
330        if (sf_wait(request).ne.0)
331     1     call errquit('zones put: sf problem3',2,DISK_ERR)
332      else
333        call ga_put(d_4af,offset_4af+1,offset_4af+size_4a,1,1,
334     1    dbl_mb(k_4a),size_4a)
335      end if
336c closing l_4a file
337        if (.not.ma_pop_stack(l_4a))
338     1   call errquit('tce_mo2e_4af2: l_4a',15,MA_ERR)
339c ---------------------------
340      next = NXTASK(nprocs, 1)
341      END IF
342      count = count + 1
343      ENDDO !azone4
344      ENDDO !azone3
345      ENDDO !azone2
346      ENDDO !azone1
347c
348c
349c
350c
351c
352c
353c
354c
355       call ga_sync()
356       next = nxtask(-nprocs,1)
357c
358c
359c
360c
361c *** debug **********
362c           write(6,*)'first part OK',ga_nodeid()
363c           call util_flush(6)
364c ********************
365c
366c
367c
368c
369      nprocs = GA_NNODES()
370      count = 0
371      next = NXTASK(nprocs, 1)
372      DO g3b = 1,noa+nva   !k
373      DO g4b = g3b,noa+nva !l
374      DO azone1 = 1,atpart !nu
375      DO azone2 = azone1,atpart !mu
376      IF (next.eq.count) THEN
377c
378c *** debug ***
379c       write(6,777)ga_nodeid(),g3b,g4b,azone1,azone2
380c       call util_flush(6)
381c *************
382             cpu1 = - util_cpusec()
383             wall1 = - util_wallsec()
384c
385      size_2g2a=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)
386     1          *nalength(azone1)*nalength(azone2)
387      if (.not.ma_push_get(mt_dbl,size_2g2a,'2g2a',l_2g2a,k_2g2a))
388     1    call errquit('tce_r2_divide1: MA problem',0,MA_ERR)
389      call dfill(size_2g2a, 0.0d0, dbl_mb(k_2g2a), 1)
390c
391      DO azone3 = 1,atpart      !sigma
392      DO azone4 = azone3,atpart !rho
393        size_4a = nalength(azone1)*nalength(azone2)*
394     1            nalength(azone3)*nalength(azone4)
395        if(.not.ma_push_get(mt_dbl,size_4a,'4a',l_4a,k_4a))
396     1     call errquit('tce_r2_divide2: MA problem',0,MA_ERR)
397cccx        call dfill(size_4a, 0.0d0, dbl_mb(k_4a), 1)
398c
399c
400         key_4af=azone4 - 1 + atpart * (azone3 - 1 +
401     &          atpart * (azone2 - 1 + atpart * (azone1 - 1)))
402         call tce_hash(int_mb(k_4af_offset),key_4af,offset_4af)
403       if(idiskl) then
404        if (sf_read(d_4af,dfloat(bytes)*dfloat(offset_4af),
405     1    dfloat(bytes)*dfloat(size_4a),dbl_mb(k_4a),request).ne.0)
406     1     call errquit('zones get2: sf problem',1,DISK_ERR)
407        if (sf_wait(request).ne.0)
408     1    call errquit('zones get3: sf problem',2,DISK_ERR)
409       else
410        call ga_get(d_4af,offset_4af+1,offset_4af+size_4a,1,1,
411     1    dbl_mb(k_4a),size_4a)
412       end if
413c
414c
415c
416c
417c now forming auxiliary matrix  of orbital coeff.
418c (g4 g3 | sigma rho) = C(g4 simga)*C(g3 rho)
419      size_2g2z=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)
420     1          *nalength(azone3)*nalength(azone4)
421      if (.not.ma_push_get(mt_dbl,size_2g2z,'2g2z',l_2g2z,k_2g2z))
422     1    call errquit('tce_r2_divide3: MA problem',0,MA_ERR)
423ccxkk      call dfill(size_2g2z, 0.0d0, dbl_mb(k_2g2z), 1)
424
425      if(azone3.ne.azone4) then
426       i = 0
427       tot_azone3_sh=tot_zone(azone3)
428       tot_azone4_sh=tot_zone(azone4)
429       do irho1   =  1,nalength(azone4)
430       do isigma1 =  1,nalength(azone3)
431       do igi3    =  1,int_mb(k_range_alpha+g3b-1)
432       do igi4    =  1,int_mb(k_range_alpha+g4b-1)
433       i = i+1
434       ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone3_sh
435     &       +isigma1
436       ipos2=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone4_sh
437     &       +irho1
438       ipos3=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone4_sh
439     &       +irho1
440       ipos4=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone3_sh
441     &       +isigma1
442       dbl_mb(k_2g2z+i-1)=dbl_mb(k_movecs_orb+ipos1-1)*
443     &                    dbl_mb(k_movecs_orb+ipos2-1)
444     &                   +dbl_mb(k_movecs_orb+ipos3-1)*
445     &                    dbl_mb(k_movecs_orb+ipos4-1)
446       enddo
447       enddo
448       enddo
449       enddo
450      end if !azone3.ne.azone4
451      if(azone3.eq.azone4) then
452       i = 0
453       tot_azone3_sh=tot_zone(azone3)
454       tot_azone4_sh=tot_zone(azone4)
455       do irho1   =  1,nalength(azone4)
456       do isigma1 =  1,nalength(azone3)
457       do igi3    =  1,int_mb(k_range_alpha+g3b-1)
458       do igi4    =  1,int_mb(k_range_alpha+g4b-1)
459       i = i+1
460       ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone3_sh
461     &       +isigma1
462       ipos2=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone4_sh
463     &       +irho1
464       dbl_mb(k_2g2z+i-1)=dbl_mb(k_movecs_orb+ipos1-1)*
465     &                    dbl_mb(k_movecs_orb+ipos2-1)
466       enddo
467       enddo
468       enddo
469       enddo
470      end if !azone3.eq.azone4
471c now: dgemming
472      call  dgemm('N','N',
473     1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1),  !m
474     1 nalength(azone1)*nalength(azone2), !n
475     1 nalength(azone3)*nalength(azone4), !k
476     1 1.0d0,
477     1 dbl_mb(k_2g2z),                    !A
478     1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1), !lda
479     1 dbl_mb(k_4a),nalength(azone3)*nalength(azone4),          ! B,ldb
480     1 1.0d0,dbl_mb(k_2g2a),
481     1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1))
482c
483c
484        if (.not.ma_pop_stack(l_2g2z))
485     1   call errquit('tce_mo2e_trans_zones: l_2g2z',15,MA_ERR)
486c
487        if (.not.ma_pop_stack(l_4a))
488     1   call errquit('tce_mo2e_trans_zones: l_4a',15,MA_ERR)
489      ENDDO !azone4
490c one step ahead
491      ENDDO !azone3
492c
493         key_2a2m=azone2 - 1 + atpart * (azone1 - 1 +
494     &   atpart * (g4b - 1 + (noa+nva) * (g3b - 1)))
495         call tce_hash(int_mb(k_2a2m_offset),key_2a2m,offset_2a2m)
496      if(idiskl) then
497        if (sf_write(d_2a2m,dfloat(bytes)*dfloat(offset_2a2m),
498     1    dfloat(bytes)*dfloat(size_2g2a),dbl_mb(k_2g2a),request).ne.0)
499     2    call errquit('zones put: sf problem21',1,DISK_ERR)
500        if (sf_wait(request).ne.0)
501     1    call errquit('zones put: sf problem31',2,DISK_ERR)
502      else
503        call ga_put(d_2a2m,offset_2a2m+1,offset_2a2m+size_2g2a,1,1,
504     1    dbl_mb(k_2g2a),size_2g2a)
505      end if
506c
507      if (.not.ma_pop_stack(l_2g2a))
508     1  call errquit('tce_mo2e_trans_zones: MA problem',15,MA_ERR)
509c
510      next = NXTASK(nprocs, 1)
511      END IF
512      count = count + 1
513      ENDDO !azone2
514      ENDDO !azone1
515      ENDDO !g4b
516      ENDDO !g3b
517c
518c
519c
520c
521      call ga_sync()
522      next = nxtask(-nprocs,1)
523c
524c
525c
526c
527c
528c
529c
530      nprocs = GA_NNODES()
531      count = 0
532      next = NXTASK(nprocs, 1)
533      DO g3b = 1,noa+nva   !k
534      DO g4b = g3b,noa+nva !l
535c
536c        write(6,*)'g3b-g4b',g3b,g4b,ga_nodeid()
537c        call util_flush(6)
538c
539c g2 g1 do loops
540c open (g4 g3 | all symmetry allowed g2 g1) - equally good we can split it into
541c                                              pieces
542c (k,l|i,j) pieces
543c
544c   calculate point_pair index here
545c
546         ixi=noa+nva-g3b+1
547         jxi=noa+nva-g4b+1
548         point_pair=((noa+nva)*(noa+nva+1))/2-((ixi-1)*ixi)/2-jxi+1
549         size_stripe=int_mb(k_gpair+point_pair)
550         xoffset_p34 = int_mb(k_gpair+len_pair+point_pair)
551c
552c offset for blocking the (nu mu | g2 g1 ) = C*C matrix
553c
554      do i=1,1000
555       xoffset_g12(i)  = 0
556       pointer_g12(i)  = 0
557       xoffset_size_p1p2(i) = 0
558      enddo
559      imaxp12=0
560cccx      size_temp=0
561      size_temp_4g=0
562      xoffset_temp=0
563      xoffset_temp_4g=0
564      size_p1p2=0
565      i=1
566      iguru=0
567      DO g1b = 1,noa+nva   !l
568      DO g2b = g1b,noa+nva !k
569       iguru=iguru+1
570       IF (int_mb(k_spin_alpha+g3b-1)+int_mb(k_spin_alpha+g4b-1).eq.
571     & int_mb(k_spin_alpha+g1b-1)+int_mb(k_spin_alpha+g2b-1)) THEN
572       IF (ieor(int_mb(k_sym_alpha+g3b-1),ieor(int_mb(k_sym_alpha+g4b-1)
573     & ,ieor(int_mb(k_sym_alpha+g1b-1),int_mb(k_sym_alpha+g2b-1)))) .eq.
574     & irrep_v) THEN
575       IROW=INDEX_PAIR(g4b,g3b)
576       ICOL=INDEX_PAIR(g2b,g1b)
577       IF(IROW.GE.ICOL) THEN
578         size_temp_4g=size_temp_4g+int_mb(k_range_alpha+g2b-1)
579     1          *int_mb(k_range_alpha+g1b-1)
580     2          *int_mb(k_range_alpha+g3b-1)
581     3          *int_mb(k_range_alpha+g4b-1)
582         size_p1p2=size_p1p2+int_mb(k_range_alpha+g2b-1)
583     1          *int_mb(k_range_alpha+g1b-1)
584         if(size_temp_4g.gt.max_size_temp) then
585           xoffset_g12(i)=xoffset_temp_4g
586           xoffset_size_p1p2(i)=size_p1p2
587           pointer_g12(i)=iguru
588           xoffset_temp_4g=xoffset_temp_4g+size_temp_4g
589           size_temp_4g=0
590           size_p1p2=0
591           imaxp12=i
592           i=i+1
593          end if
594       END IF
595       END IF
596       END IF
597      ENDDO
598      ENDDO
599c
600      if(size_temp_4g.ne.0) then
601           xoffset_g12(i)=xoffset_temp_4g
602           xoffset_size_p1p2(i)=size_p1p2
603           pointer_g12(i)=iguru
604           imaxp12=i
605      end if
606c
607      if(i.gt.1000)
608     1  call errquit('tce_zone: xoffset-size-problem',0,MA_ERR)
609c
610c
611      do i =1, imaxp12
612c
613      IF (next.eq.count) THEN ! parallel job here
614c
615        if(i.eq.1) then
616          ilogu=1
617        else
618          ilogu=pointer_g12(i-1)+1
619        end if
620        ihigu=pointer_g12(i)
621        size_g12p=int_mb(k_range_alpha+g4b-1)*
622     1                int_mb(k_range_alpha+g3b-1)*
623     1                xoffset_size_p1p2(i)
624        if (.not.ma_push_get(mt_dbl,size_g12p,'g12piece',
625     1      l_g12piece,k_g12piece))
626     1      call errquit('tce_zone: MA g12-piece ',0,MA_ERR)
627        call dfill(size_g12p, 0.0d0, dbl_mb(k_g12piece), 1)
628c
629c -------------------------
630c azone1 azone2 loop -starts here
631c
632       DO azone1 = 1,atpart !nu
633       DO azone2 = azone1,atpart !mu
634       size_2g2a=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)
635     1           *nalength(azone1)*nalength(azone2)
636      if (.not.ma_push_get(mt_dbl,size_2g2a,'2g2a',l_2g2a,k_2g2a))
637     1    call errquit('tce_r2_divide1: MA problem',0,MA_ERR)
638c fixing offsets and sf_writing
639         key_2a2m=azone2 - 1 + atpart * (azone1 - 1 +
640     &   atpart * (g4b - 1 + (noa+nva) * (g3b - 1)))
641         call tce_hash(int_mb(k_2a2m_offset),key_2a2m,offset_2a2m)
642      if(idiskl) then
643        if (sf_read(d_2a2m,dfloat(bytes)*dfloat(offset_2a2m),
644     1    dfloat(bytes)*dfloat(size_2g2a),dbl_mb(k_2g2a),request).ne.0)
645     2    call errquit('zones put: sf problem22',1,DISK_ERR)
646        if (sf_wait(request).ne.0)
647     1    call errquit('zones put: sf problem32',2,DISK_ERR)
648      else
649        call ga_get(d_2a2m,offset_2a2m+1,offset_2a2m+size_2g2a,1,1,
650     1    dbl_mb(k_2g2a),size_2g2a)
651      end if
652c --------------------------
653      istart=0
654      iguru=0
655      slice_offset=0
656      DO g1b = 1,noa+nva     !l
657      DO g2b = g1b,noa+nva   !k
658       iguru=iguru+1
659       IF (int_mb(k_spin_alpha+g3b-1)+int_mb(k_spin_alpha+g4b-1).eq.
660     & int_mb(k_spin_alpha+g1b-1)+int_mb(k_spin_alpha+g2b-1)) THEN
661       IF (ieor(int_mb(k_sym_alpha+g3b-1),ieor(int_mb(k_sym_alpha+g4b-1)
662     & ,ieor(int_mb(k_sym_alpha+g1b-1),int_mb(k_sym_alpha+g2b-1)))) .eq.
663     & irrep_v) THEN
664       IROW=INDEX_PAIR(g4b,g3b)
665       ICOL=INDEX_PAIR(g2b,g1b)
666       IF(IROW.GE.ICOL) THEN
667c l_g4321 ==> ([g4][g3]|[g2][g1])
668c now forming auxiliary matrix  of orbital coeff.
669c ( nu mu | g2 g1 ) = C(nu g2) C(mu g1)
670c
671c !---------------------------------------------------------------------
672       IF((iguru.ge.ilogu).and.(iguru.le.ihigu)) THEN
673c otworz tutaj C(nu,mu,g2,g1)
674        if (.not.ma_push_get(mt_dbl,nalength(azone1)*nalength(azone2)*
675     1      int_mb(k_range_alpha+g2b-1)*int_mb(k_range_alpha+g1b-1),
676     1      '2g2z',l_2g2z,k_2g2z))
677     1      call errquit('tce_zone: MA problem 2g2z-b',0,MA_ERR)
678        if(azone1.ne.azone2) then
679         ii = 0
680         tot_azone1_sh=tot_zone(azone1)
681         tot_azone2_sh=tot_zone(azone2)
682          do igi1    =  1,int_mb(k_range_alpha+g1b-1)
683          do igi2    =  1,int_mb(k_range_alpha+g2b-1)
684          do imu1    =  1,nalength(azone2)
685          do inu1    =  1,nalength(azone1)
686           ii = ii+1
687           ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone1_sh
688     &           +inu1
689           ipos2=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone2_sh
690     &           +imu1
691           ipos3=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone2_sh
692     &           +imu1
693           ipos4=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone1_sh
694     &           +inu1
695           dbl_mb(k_2g2z+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)*
696     &                         dbl_mb(k_movecs_orb+ipos2-1)
697     &                        +dbl_mb(k_movecs_orb+ipos3-1)*
698     &                         dbl_mb(k_movecs_orb+ipos4-1)
699          enddo
700          enddo
701          enddo
702          enddo
703c           offset_2g2z=offset_2g2z+int_mb(k_range_alpha+g2b-1)
704c     1                 *int_mb(k_range_alpha+g1b-1)
705c     1                 *nalength(azone1)*nalength(azone2)
706        end if !azone1.ne.azone2
707        if(azone1.eq.azone2) then
708         ii = 0
709         tot_azone1_sh=tot_zone(azone1)
710         tot_azone2_sh=tot_zone(azone2)
711          do igi1    =  1,int_mb(k_range_alpha+g1b-1)
712          do igi2    =  1,int_mb(k_range_alpha+g2b-1)
713          do imu1    =  1,nalength(azone2)
714          do inu1    =  1,nalength(azone1)
715           ii = ii+1
716           ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone1_sh
717     &           +inu1
718           ipos2=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone2_sh
719     &           +imu1
720           dbl_mb(k_2g2z+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)*
721     &                         dbl_mb(k_movecs_orb+ipos2-1)
722          enddo
723          enddo
724          enddo
725          enddo
726c           offset_2g2z=offset_2g2z+int_mb(k_range_alpha+g2b-1)
727c     1                 *int_mb(k_range_alpha+g1b-1)
728c     1                 *nalength(azone1)*nalength(azone2)
729        end if !azone1.eq.azone2
730c !---------------------------------------------------------------------
731c
732c
733         call dgemm('N','N',
734     1   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1), !m
735     1   int_mb(k_range_alpha+g2b-1)*int_mb(k_range_alpha+g1b-1), !n
736     3   nalength(azone1)*nalength(azone2),                       !k
737     4   1.0d0,dbl_mb(k_2g2a),
738     5   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1),
739     6   dbl_mb(k_2g2z),nalength(azone1)*nalength(azone2),
740     7   1.0d0,dbl_mb(k_g12piece+slice_offset),
741     8   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1))
742c
743       slice_offset=slice_offset+
744     1   int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)*
745     1   int_mb(k_range_alpha+g1b-1)*int_mb(k_range_alpha+g2b-1)
746c
747         if (.not.ma_pop_stack(l_2g2z))
748     1   call errquit('tce_mo2e_trans_zones: l_2g2z',15,MA_ERR)
749c
750       END IF ! iguru
751       END IF ! irow.gt.icol
752       END IF ! symmetry
753       END IF ! spin
754      ENDDO  ! g2b-loop ends up here
755      ENDDO  ! g1b-loop ends up here
756c
757c
758c azone1 azone2 loop ends up here
759c
760      if (.not.ma_pop_stack(l_2g2a))
761     1  call errquit('tce_mo2e_trans_zones: MA problem',15,MA_ERR)
762c
763      ENDDO !azone2
764      ENDDO !azone1
765c now put here put_block
766      call put_block(d_v2,dbl_mb(k_g12piece),size_g12p,
767     &               xoffset_p34+xoffset_g12(i))
768      istart=0
769c
770cccx         if (.not.ma_pop_stack(l_2g2z))
771cccx     1   call errquit('tce_mo2e_trans_zones: l_2g2z',15,MA_ERR)
772         if (.not.ma_pop_stack(l_g12piece))
773     1   call errquit('tce_mo2e_trans_zones: l_g12piece',15,MA_ERR)
774
775      next = NXTASK(nprocs, 1)
776      END IF
777      count = count + 1
778c
779      ENDDO  ! i = 1,imaxp12
780c
781c
782c
783cccx      next = NXTASK(nprocs, 1)
784cccx      END IF
785cccx      count = count + 1
786      ENDDO !g4b
787      ENDDO !g3b
788c
789c
790c
791c
792c
793            call ga_sync()
794            next = nxtask(-nprocs,1)
795c
796c
797c      if(nodezero) then
798c      write(6,*)'DONE --- DONE ---- DONE ---- DONE'
799c      end if
800c
801      if (.not.ma_pop_stack(l_work2))
802     1  call errquit('tce_ao2e: MA problem',14,MA_ERR)
803      if (.not.ma_pop_stack(l_work1))
804     1  call errquit('tce_ao2e: MA problem',15,MA_ERR)
805c
806      if (.not.ma_pop_stack(l_movecs_orb))
807     1  call errquit('tce_ao2e: MA problem',15,MA_ERR)
808c
809      if (.not.ma_pop_stack(l_gpair))
810     1  call errquit('tce_ao2e: MA problem',15,MA_ERR)
811c
812c
813c
814      if(idiskl) then
815c       if (.not.sf_destroy(d_2a2m))
816c     1   call errquit('tce_sf_destroy22: sf problem',15,MA_ERR)
817c         if (parallel) call ga_sync()
818c       if (.not.sf_destroy(d_4af))
819c     1   call errquit('tce_sf_destroy33: sf problem',15,MA_ERR)
820c         if (parallel) call ga_sync()
821      else
822       call deletefile(d_2a2m)
823       call ga_sync()
824       call deletefile(d_4af)
825       call ga_sync()
826      end if
827c
828c
829      if (.not.ma_pop_stack(l_2a2m_offset))
830     1  call errquit('tce_ao2e_x: MA problem',15,MA_ERR)
831c
832      if (.not.ma_pop_stack(l_4af_offset))
833     1  call errquit('tce_ao2e_y: MA problem',15,MA_ERR)
834c
835      call ga_sync()
836c *** debug ***
837c 800  format('DGEMM1 MAX',i5,2x,3f15.5)
838c 801  format('DGEMM2 ',i5,2x,3f15.5)
839 8155 format('FU',i6,2x,3i20)
840 9000 format('PART1',i4,1x,'Cpu  wall ',2(f17.12,1x),3x,'g4b g3b',2i5)
841c 9001 format('PART2',i4,1x,'Cpu  wall ',2(f17.12,1x),3x,'g4b g3b',2i5)
842c 9003 format('PART1-4a',i4,1x,'Cpu  wall ',2(f17.12,1x))
843c 9004 format('PART1-2g2z',i4,1x,'Cpu  wall ',2(f17.12,1x))
844c 9005 format('PART1-dgemm',i4,1x,'Cpu  wall ',2(f17.12,1x))
845c 9010 format('  P1-mnrs',i3,1x,2i5,1x,2i5,1x,'Cpu  wall ',2(f17.12,1x))
846  555  format('atom loop ',2x,i5,3x,2i5,3x,2i5,i12)
847  556  format('atom time',2x,i5,3x,2i5,3x,2i5,'Cpu wall ',2(f12.7,1x))
848  777  format('main do loop ',2x,i5,3x,2i5,3x,2i5)
849  775  format('main loop step1 ',2x,i5,3x,2i5,3x,2i5)
850  776  format('main loop step2 ',2x,i5,3x,2i5,3x,2i5)
851  778  format('PART1',2x,i5,3x,2i5,3x,2i5,2x,'Cpu  wall ',2(f17.12,1x))
852  779  format('PART2',2x,i5,3x,2i5,3x,2i5,2x,'Cpu  wall ',2(f17.12,1x))
853  780  format('ADD BLOCK',2x,i5,3x,2i5,3x,2i5,2x,'Cpu  wall ',
854     &        2(f17.12,1x))
855c *************
856c
857      RETURN
858      END
859c
860c
861c
862      SUBROUTINE tce_4a_offset(l_a_offset,k_a_offset,size)
863C     $Id$
864C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
865C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
866      IMPLICIT NONE
867#include "global.fh"
868#include "mafdecls.fh"
869#include "sym.fh"
870#include "errquit.fh"
871#include "tce.fh"
872#include "tce_main.fh"
873      INTEGER l_a_offset
874      INTEGER k_a_offset
875      INTEGER size
876      INTEGER length
877      INTEGER addr
878      INTEGER mu
879      INTEGER nu
880      INTEGER rho
881      INTEGER sigma
882      INTEGER azone1,azone2,azone3,azone4
883      length = 0
884      DO azone1 = 1,atpart      !nu
885      DO azone2 = azone1,atpart !mu
886      DO azone3 = 1,atpart      !sigma
887      DO azone4 = azone3,atpart !rho
888      length = length + 1
889      END DO
890      END DO
891      END DO
892      END DO
893      IF (.not.MA_PUSH_GET(mt_int,2*length+1,'noname',l_a_offset,k_a_off
894     &set)) CALL ERRQUIT('tce_t2_offset',0,MA_ERR)
895      int_mb(k_a_offset) = length
896      addr = 0
897      size = 0
898      DO azone1 = 1,atpart      !nu
899      DO azone2 = azone1,atpart !mu
900      DO azone3 = 1,atpart      !sigma
901      DO azone4 = azone3,atpart !rho
902      addr = addr + 1
903      int_mb(k_a_offset+addr) = azone4 - 1 + atpart * (azone3 - 1 +
904     &  atpart * (azone2 - 1 + atpart * (azone1 - 1)))
905      int_mb(k_a_offset+length+addr) = size
906      size = size + nalength(azone1) * nalength(azone2) *
907     &  nalength(azone3) * nalength(azone4)
908      END DO
909      END DO
910      END DO
911      END DO
912      RETURN
913      END
914c
915c
916c
917c
918      SUBROUTINE tce_2a2m_offset(l_a_offset,k_a_offset,size)
919C     $Id$
920C     This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
921C     Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
922      IMPLICIT NONE
923#include "global.fh"
924#include "mafdecls.fh"
925#include "sym.fh"
926#include "errquit.fh"
927#include "tce.fh"
928#include "tce_main.fh"
929      INTEGER l_a_offset
930      INTEGER k_a_offset
931      INTEGER size
932      INTEGER length
933      INTEGER addr
934      INTEGER g3b,g4b
935      INTEGER azone1,azone2
936      length = 0
937      DO g3b = 1,noa+nva   !k
938      DO g4b = g3b,noa+nva !l
939      DO azone1 = 1,atpart !nu
940      DO azone2 = azone1,atpart !mu
941      length = length + 1
942      END DO
943      END DO
944      END DO
945      END DO
946      IF (.not.MA_PUSH_GET(mt_int,2*length+1,'noname',l_a_offset,k_a_off
947     &set)) CALL ERRQUIT('tce_t2_offset',0,MA_ERR)
948      int_mb(k_a_offset) = length
949      addr = 0
950      size = 0
951      DO g3b = 1,noa+nva   !k
952      DO g4b = g3b,noa+nva !l
953      DO azone1 = 1,atpart !nu
954      DO azone2 = azone1,atpart !mu
955      addr = addr + 1
956      int_mb(k_a_offset+addr) = azone2 - 1 + atpart * (azone1 - 1 +
957     &  atpart * (g4b - 1 + (noa+nva) * (g3b - 1)))
958      int_mb(k_a_offset+length+addr) = size
959      size = size +
960     &   int_mb(k_range_alpha+g3b-1) * int_mb(k_range_alpha+g4b-1)
961     &      *  nalength(azone1) * nalength(azone2)
962      END DO
963      END DO
964      END DO
965      END DO
966      RETURN
967      END
968
969