1#ifdef SOLARIS
2#define NEW_SPARSE
3#endif
4#define GAADD 1
5c
6c  $Id$
7c
8c  Generates and writes out half-transformed
9c  exchange integrals for some multipassed
10c  segment, (oseglo:oseghi)
11c
12c  Half-transformed integrals are transposed in GA memory
13c  buffers and flushed when full. Size of buffer is determined
14c  by available memory.
15c
16c  Flat file is written using word-addressable I/O. Prefaced
17c  with some indexing info. File layout of integrals is:
18c
19c    nnbf
20c  |<---->
21c  !       nnbf*nvirl
22c  |<------------------->
23c  |                        nnbf*nvirl*nseg
24c  |<------------------------------------------------------------->
25c
26c
27c  nnbf:  triangle of basis functions (with sparsity) <= (nbf*(nbf+1))/2
28c  nvirl: number of virtual indices this processor owns ~nvir/nproc
29c  nseg:  length of occupied segment
30c
31c
32c
33c
34       subroutine moints_semi( rtdb, basis, tol2e, oseglo, oseghi,
35     $                         olo, ohi, vlo, vhi, g_movecs, oblk ,
36     K     k_file_size)
37       implicit none
38#include "errquit.fh"
39#include "mafdecls.fh"
40#include "global.fh"
41#include "eaf.fh"
42#include "util.fh"
43#include "sym.fh"
44#include "bas.fh"
45#include "rtdb.fh"
46c
47       integer rtdb
48       integer basis                          ! [input]  Basis handle
49       double precision tol2e                 ! [input]  Integral tolerance
50       integer oseglo, oseghi                 ! [input]  Occupied segment range
51       integer olo, ohi                       ! [input]  Correlated occupied index range
52       integer vlo, vhi                       ! [input]  Virtual index range
53       integer g_movecs                       ! [input]  MO coefficients
54       logical oblk                           ! [input]  Toggle AO integral blocking
55c
56       integer l_shmap, k_shmap, l_bfmap, k_bfmap, l_rbfmap, k_rbfmap
57       integer l_glo, k_glo, l_ghi, k_ghi
58       integer l_gloc, k_gloc
59       integer l_ionext, k_ionext
60       integer l_rlen, k_rlen
61       integer l_rloc, k_rloc
62       integer nbf, nsh, maxbfsh, nseg, nocc, nvir
63       integer ngrp, blen, nnbf
64       integer pvlo, pvhi
65       integer niofl
66       integer myid, numnodes, rlo, rhi, clo, chi
67       integer itmp, vtmp(10), mxrlen,mxrlen_in
68       integer lmemreq, nvir_node
69       double precision gmem, gmem_agg
70       double precision iotmpptr
71       integer g_kbuf,g_kbuf_trans
72       logical status
73       logical osym
74       character*256 fnameK
75       integer kunit
76       logical oprint_mem
77c
78       integer moints_numgr
79       integer moints_lmem
80       integer eaftype,eaf_size_in_mb,inntsize
81       double precision k_file_size
82       integer mp2_eaftype
83       external mp2_eaftype
84       external moints_numgr
85       external moints_lmem
86       logical mp2cpybck
87       data osym/.false./
88c
89c  General info
90c  Compare definition for nseg      : batch range
91c                         nocc      : number of occupieds to highest i
92c
93       myid = ga_nodeid()
94       numnodes = ga_nnodes()
95       inntsize=MA_sizeof(MT_INT,1,MT_BYTE)
96       nocc = ohi - olo + 1
97       nseg = oseghi - oseglo + 1
98       nvir = vhi - vlo + 1
99       status = bas_numbf(basis,nbf)
100       status = status.and.bas_numcont(basis,nsh)
101       status = status.and.bas_nbf_cn_max(basis,maxbfsh)
102       if (.not.status) call errquit('moints: cannot get basis info',0,
103     &       BASIS_ERR)
104c
105       oprint_mem = util_print('memory',print_high) .and.
106     $      ga_nodeid().eq.0
107c
108c  Clear 4-index statistics
109c
110       call moints_stats_clear()
111c
112c     Initial block length is very generous for efficiency.  Reduce it
113c     in while loop (1212) if things do not fit.
114c
115       blen = min(nbf,16*maxbfsh) ! Initial generous allocation
116*       blen = 32
117 1212  continue
118c
119c  Memory arithmetic
120c
121       lmemreq = moints_lmem(basis, nseg, nvir, blen)
122       if ((.not.ga_uses_ma()).and.(.not.(ga_memory_limited())))
123     $   call errquit('moints_semi: cannot determine memory limit',0,
124     &       GA_ERR)
125c
126       gmem = ga_memory_avail() / 8 ! Want amount in doubles
127       call ga_dgop(313, gmem, 1, 'min')
128       if (oprint_mem) then
129          write(6,717) blen, gmem, dble(lmemreq)
130 717      format(/'   Block length           ', i8/
131     $            '   Local GA available     ',  1p, d10.2/
132     $            '   Local memory required  ', 1p, d10.2)
133          call util_flush(6)
134       endif
135c
136       if (ga_uses_ma()) gmem = gmem - lmemreq
137c
138       if (oprint_mem) then
139          write(6,718) gmem
140 718      format('   Adjusted GA available  ', 1p, d10.2)
141          call util_flush(6)
142       endif
143c
144       gmem_agg = gmem*numnodes
145c
146       if (oprint_mem) then
147          write(6,719) gmem_agg
148 719      format('   Aggregate GA available ', 1p, d10.2)
149          call util_flush(6)
150       endif
151c
152       nvir_node = int(nvir/numnodes)
153       if (mod(nvir,numnodes).ne.0) nvir_node=nvir_node+1
154c     we have g_kbuf and g_kbuf trans plus ga_add duplicates .. /3
155c     we have g_kbuf and g_kbuf trans
156       gmem=gmem/2
157       mxrlen = int(gmem/(nseg*nvir_node))
158       mxrlen = min(
159     $      mxrlen,
160     $      nbf*nbf)
161C     $      min(nbf*nbf,maxbfsh*maxbfsh*numnodes))
162
163       if (rtdb_get(rtdb, 'mp2:mxrlen', mt_int, 1, mxrlen_in))
164     c      mxrlen=max(mxrlen_in*1024,mxrlen)
165 1964  continue
166c
167       if (oprint_mem) then
168          write(6,720) mxrlen, nseg, nvir, nbf, maxbfsh, numnodes
169 720      format( '   Maximum record length  ', i8/
170     $            '   No. of segments        ', i8/
171     $            '   Virtuals dimension     ', i8/
172     $            '   No. of functions       ', i8/
173     $            '   Max. functions/shell   ', i8/
174     $            '   Number of nodes        ', i8/)
175          call util_flush(6)
176       endif
177c
178       call ga_sync()           ! Just to ensure printing completes
179c
180       if (mxrlen .lt. min(nbf*nbf,maxbfsh*maxbfsh*numnodes)) then
181          if (blen .le. maxbfsh) call errquit
182     $         ('moints_semi: insufficient GA available',
183     $         mxrlen*nseg*nvir, GA_ERR)
184          blen = max(maxbfsh,blen/2) ! Reduce blocksize and try again
185          goto 1212
186       endif
187c
188       if (oprint_mem)
189     $      write(6,900) lmemreq, nint(gmem/numnodes), mxrlen
190900   format(/,'Semi-direct integral transformation',
191     $     /,' Local Memory required:   ',i10,' words '
192     $     /,' Global Memory remaining: ',i10,' words per node',
193     $     /,' IO Buffer length:        ',i10,' words')
194c
195c  Create GA exchange buffer
196c
197c
198      itmp = nvir/numnodes + min(mod(nvir,numnodes),1)
199      gmem=0
200*ga:1:0
201      if (.not.(ga_create( MT_DBL, mxrlen*nseg, nvir, 'transp Kbuf',
202     $                     (mxrlen*nseg), 0, g_kbuf ))) gmem=-1
203       call ga_dgop(319, gmem, 1, 'min')
204       call ga_sync()
205       if(gmem.lt.0) then
206c     cant alloc ga .. reduce mxrlen
207         mxrlen=mxrlen/2
208         if(ga_nodeid().eq.0) write(6,1962)
209 1962    format(//,5x,' Halved record length',//)
210         goto 1964
211cold    call errquit('moints_semi: cannot allocate K buffer',0,
212cold     &       GA_ERR)
213         endif
214      call ga_distribution(g_kbuf, myid, rlo, rhi, clo, chi )
215      if ((clo.eq.0).and.(chi.eq.-1)) then
216        pvlo = 0
217        pvhi = -1
218      else
219        pvlo = vlo + clo - 1
220        pvhi = vlo + chi - 1
221      endif
222#ifdef GAADD
223      gmem=0
224      if (.not.(ga_create( MT_DBL, mxrlen*nseg, nvir, ' Kbuf',
225     $                     0, nvir, g_kbuf_trans ))) gmem=-1
226       call ga_dgop(321, gmem, 1, 'min')
227       call ga_sync()
228       if(gmem.lt.0) then
229c     cant alloc ga .. reduce mxrlen
230         mxrlen=mxrlen/2
231         if (.not. ga_destroy(g_kbuf))
232     $        call errquit('moints_semi: failed to destroy g_kbuf',
233     G        g_kbuf,   GA_ERR)
234         if(ga_nodeid().eq.0) write(6,1962)
235         goto 1964
236cold     $   call errquit('moints_semi: cannot allocate K buffer',0,
237cold     &       GA_ERR)
238      endif
239#else
240      g_kbuf_trans=g_kbuf
241#endif
242c
243c  Reorder shells by descending shell-length
244c  and group shells by blocksize
245c
246       status = .true.
247       status = status .and. ma_push_get(MT_INT,nsh,'shell order map',
248     $                                   l_shmap, k_shmap)
249       status = status .and. ma_push_get(MT_INT,nsh,'group lo',
250     $                                   l_glo, k_glo )
251       status = status .and. ma_push_get(MT_INT,nsh,'group hi',
252     $                                   l_ghi, k_ghi)
253       status = status .and. ma_push_get(MT_INT,nbf,'basis map',
254     $                                   l_bfmap, k_bfmap)
255       status = status .and. ma_push_get(MT_INT,nbf,'rev basis map',
256     $                                   l_rbfmap, k_rbfmap)
257       if (.not.status) then
258         call errquit("moints_semi: failed to allocate buffers shorder",
259     $                0,MA_ERR)
260       endif
261       call moints_shorder( basis, nsh, nbf, blen, ngrp,
262     $                      int_mb(k_glo), int_mb(k_ghi),
263     $                      int_mb(k_shmap),
264     $                      int_mb(k_bfmap), int_mb(k_rbfmap) )
265c
266c  Generate locator and reverse map
267c
268       itmp = (nsh*(nsh+1))/2+numnodes
269       status = .true.
270       status = status .and. ma_push_get(MT_INT, (nbf*nbf), 'loc',
271     $                                   l_gloc, k_gloc )
272       status = status .and. ma_push_get(MT_INT, itmp, 'io',
273     $                                   l_ionext, k_ionext )
274       status = status .and. ma_push_get(MT_INT, itmp, 'rec len',
275     $                                   l_rlen, k_rlen )
276       itmp = (nbf*(nbf+1))/2
277       itmp = itmp + mod(itmp,2)
278       status = status .and. ma_push_get(MT_INT, itmp, 'rev loc',
279     $                                   l_rloc, k_rloc )
280       if (.not.status) then
281         call errquit("moints_semi: failed to allocate buffers locmap",
282     $                0,MA_ERR)
283       endif
284       call ifill(itmp, 0, int_mb(k_rloc), 1 )
285       call moints_locmap( basis, nsh, nbf, tol2e, int_mb(k_shmap),
286     $                     mxrlen, int_mb(k_gloc), nnbf,
287     $                     int_mb(k_rloc), niofl, int_mb(k_ionext),
288     $                     int_mb(k_rlen))
289c
290c  Open local file and write initial info
291c
292       call ifill(10, 0, vtmp, 1 )
293       vtmp(1) = ma_sizeof(MT_INT,10+nnbf,MT_BYTE)
294       vtmp(2) = nnbf
295       vtmp(3) = pvlo
296       vtmp(4) = pvhi
297       call util_file_name('kh', .true.,.true., fnamek)
298#ifdef NOIO
299       eaftype=mp2_eaftype(rtdb,k_file_size)
300       if(ga_nodeid().eq.0) write(6,*) ' mp2_eaf for mointskh  ',eaftype
301#else
302       eaftype=EAF_RW
303#endif
304       if (eaf_open( fnamek, eaftype, kunit).ne.0)
305     $   call errquit('moints_semi: failed to open half int file',0,
306     &       DISK_ERR)
307       if (eaf_write(kunit, 0.d0, vtmp,
308     $               ma_sizeof(MT_INT,10,MT_BYTE)).ne.0)
309     $   call errquit('moints_semi: failed to write header info1',0,
310     &       DISK_ERR)
311       iotmpptr = ma_sizeof(MT_INT,10,MT_BYTE)
312       if (eaf_write(kunit, iotmpptr, int_mb(k_rloc),
313     $               ma_sizeof(MT_INT,nnbf,MT_BYTE)).ne.0)
314     $   call errquit('moints_semi: failed to write header info2',0,
315     &       DISK_ERR)
316       if (.not. ma_pop_stack(l_rloc))
317     $     call errquit('moints: failed to pop', 1, MA_ERR)
318c
319c  Call the real stuff
320c
321        if (.not. rtdb_get(rtdb, 'mp2:copyback', mt_log, 1, mp2cpybck))
322     $    mp2cpybck=.false.
323
324       call moints_semi_a( basis, nbf, nsh, maxbfsh, tol2e, oseglo,
325     $                   oseghi, olo, ohi, vlo, vhi, pvlo, pvhi,
326     $                   mxrlen, osym, oblk, kunit, int_mb(k_shmap),
327     $                   ngrp, int_mb(k_glo), int_mb(k_ghi),
328     $                   int_mb(k_bfmap), int_mb(k_rbfmap),
329     $                   blen, int_mb(k_gloc),
330     $                   nnbf, niofl, int_mb(k_ionext),
331     $                   int_mb(k_rlen), g_movecs, g_kbuf,g_kbuf_trans,
332     M                   mp2cpybck)
333c
334c  Clean up
335c
336       if (.not. ma_pop_stack(l_rlen))
337     $     call errquit('moints_semi: failed to pop', 2, MA_ERR)
338       if (.not. ma_pop_stack(l_ionext))
339     $     call errquit('moints_semi: failed to pop', 3, MA_ERR)
340       if (.not. ma_pop_stack(l_gloc))
341     $     call errquit('moints_semi: failed to pop', 4, MA_ERR)
342       if (.not. ma_pop_stack(l_rbfmap))
343     $     call errquit('moints_semi: failed to pop', 6, MA_ERR)
344       if (.not. ma_pop_stack(l_bfmap))
345     $     call errquit('moints_semi: failed to pop', 7, MA_ERR)
346       if (.not. ma_pop_stack(l_ghi))
347     $     call errquit('moints_semi: failed to pop', 8, MA_ERR)
348       if (.not. ma_pop_stack(l_glo))
349     $     call errquit('moints_semi: failed to pop', 9, MA_ERR)
350       if (.not. ma_pop_stack(l_shmap))
351     $     call errquit('moints_semi: failed to pop', 10, MA_ERR)
352       if (.not. ga_destroy(g_kbuf))
353     $      call errquit('moints_semi: failed to destroy g_kbuf',g_kbuf,
354     &       GA_ERR)
355#ifdef GAADD
356       if (.not. ga_destroy(g_kbuf_trans))
357     $      call errquit('moints_semi: failed to destroy g_kbuf',g_kbuf,
358     &       GA_ERR)
359#endif
360c
361       if (eaf_close(kunit).ne.0)
362     $   call errquit('moints_semi: failed to close file',kunit,
363     &       DISK_ERR)
364c
365       call ga_sync()
366       if (util_print('statistics', print_high))
367     $   call moints_stats_print( 'semi' )
368
369
370
371       return
372       end
373
374
375
376
377
378
379
380
381
382c
383c
384c
385c
386c
387       subroutine moints_semi_a( basis, nbf, nsh, maxbfsh, tol2e,
388     $                           oseglo, oseghi, olo, ohi, vlo, vhi,
389     $                           pvlo, pvhi, mxrlen, osym, oblk,
390     $                           kunit, shmap, ngrp, glo, ghi, bfmap,
391     $                           rbfmap, blen, gloc, nnbf,
392     $                           niofl, ionext, rlen, g_movecs, g_kbuf,
393     G     g_kbuf_trans, mp2cpybck)
394       implicit none
395#include "errquit.fh"
396#include "tcgmsg.fh"
397#include "global.fh"
398#include "mafdecls.fh"
399#include "bas.fh"
400#include "util.fh"
401#include "schwarz.fh"
402#include "msgids.fh"
403c
404       integer MSG_SEMI_IO_RATE
405       parameter(MSG_SEMI_IO_RATE=18421)
406c
407c   Arguments
408c
409       integer basis                          ! [input] Basis handle
410       integer nbf                            ! [input] Basis functions
411       integer nsh                            ! [input] Shells
412       integer maxbfsh                        ! [input] Largest shell
413       double precision tol2e                 ! [input] Integral tolerance
414       integer oseglo, oseghi                 ! [input] Occupied segment range
415       integer olo, ohi                       ! [input] Occupied index range
416       integer vlo, vhi                       ! [input] Virtual index range
417       integer pvlo, pvhi                     ! [input] Virtual index range for this processor
418       integer mxrlen                         ! [input] Max record length
419       logical osym                           ! [input] Toggle symmetry
420       logical oblk                           ! [input] Toggle AO integral blocking
421       integer kunit                          ! [input] Unit numbers for IO
422       integer shmap(nsh)                     ! [input] Map for shells
423       integer ngrp                           ! [input] Number of groups of shells
424       integer glo(*)                         ! [input] Group lower shell bound
425       integer ghi(*)                         ! [input] Group upper shell bound
426       integer bfmap(nbf)                     ! [input] BF map: orig --> new
427       integer rbfmap(nbf)                    ! [input] Reverse bf map: new --> orig
428       integer blen                           ! [input] Blocksize
429       integer gloc(nbf*nbf)                  ! [input] bf -> GA memory map
430       integer nnbf                           ! [input] Number of screened basf pairs <= (nbf*(nbf+1))/2
431       integer niofl                          ! [input] Total IO flushes required
432       integer ionext(niofl)                  ! [input] Next value for each IO flush
433       integer rlen(niofl)                    ! [input] Record lengths for IO writes
434       integer g_movecs                       ! [input] MO coefficients
435       integer g_kbuf                         ! [scratch] Buffer for half-trans exchange
436       integer g_kbuf_trans                         ! [scratch] Buffer for half-trans exchange
437       logical mp2cpybck
438c
439c   Local variables
440c
441       integer nmo, nmo1, nmo2
442       integer ish0, jsh0, ish, jsh, ilen, jlen
443       integer ibflo, ibfhi, jbflo, jbfhi
444       integer kbflo, kbfhi, lbflo, lbfhi
445       integer kshlo, kshhi, lshlo, lshhi
446       integer kblen, lblen
447       integer kgr, lgr
448       integer qlo, qhi
449       integer l_ssbb, k_ssbb
450c$$$       integer l_ssbbt, k_ssbbt
451       integer l_hlp, k_hlp, l_ssni,k_ssni
452       integer l_hlp2, k_hlp2
453#ifndef NEW_SPARSE
454       integer l_hlp3, k_hlp3
455#endif
456       integer l_eri, k_eri, l_iscr,k_iscr
457       integer l_xmo, k_xmo, l_xmo_t, k_xmo_t
458       integer n_ssbb, n_ssbb1, n_ssni, n_hlp, n_hlp2, n_hlp3,
459     I      n_hlpx
460       integer iz, jz, kz, bsize
461       integer mem2, max2e, jopass
462       double precision iocnt
463       integer num_nodes, ploop, next, iiofl
464       double precision schw_ij
465       double precision tpass, nwrbytes, iorate
466       double precision t0, t1, ttask
467       logical st
468c
469#include "moints_stats.fh"
470c
471       integer gr_len, nxtask
472       external gr_len, nxtask
473c
474       call ga_zero(g_kbuf)
475       call ga_zero(g_kbuf_trans)
476       num_nodes = ga_nnodes()
477       nmo1 = oseghi - oseglo + 1
478       nmo2 = vhi - vlo + 1
479       qlo = olo
480       qhi = vhi
481       nmo = qhi - qlo + 1
482c
483c$$$       WRITE(6,221) GA_NODEID(),PVLO,PVHI
484c$$$ 221   FORMAT('ME:',I5,5X,' V-RANGE:',2I3)
485c
486c  Local MO coefficients
487c  Integrals and temporary arrays sizes
488c
489*     call int_mem_2e4c(max2e, mem2)  ! For non-blocking code
490       call intb_mem_2e4c(max2e, mem2) ! Determine mem2 = scratch space
491       max2e = max(max2e,min(50*maxbfsh**4,21**4)) ! Enuf room for 1 cartesian H shell
492c
493       bsize = max(blen,maxbfsh)
494       n_ssbb = maxbfsh*maxbfsh*bsize*bsize
495       n_ssbb1 = max((nmo1*nmo1),n_ssbb)
496
497       n_hlp = max(nmo1,maxbfsh*maxbfsh)*nbf
498       n_hlp2 = maxbfsh*maxbfsh*nmo2
499
500       n_hlp3 = maxbfsh*maxbfsh*nmo2
501
502       n_ssni = maxbfsh*maxbfsh*nbf*nmo1
503c
504c     Get the MOS, reorder the basis functions, transpose
505c
506       if (.not. ma_push_get(MT_DBL,nbf*vhi,'xmot',l_xmo_t,k_xmo_t))
507     $     call errquit('moints_semi: failed to alloc mosT', nbf*vhi,
508     &       MA_ERR)
509c
510       if (.not. ma_push_get(MT_DBL,(nbf*vhi),'r movecs',l_xmo,k_xmo))
511     $     then
512      write(6,*) ' avail dbles',ma_inquire_avail(MT_DBL)
513       call util_flush(6)
514          call errquit('moints_semi: failed to alloc mos', nbf*vhi,
515     &       MA_ERR)
516      endif
517c
518#if 0
519       call ga_get(g_movecs, 1, nbf, qlo, qhi, dbl_mb(k_xmo_t), nbf )
520#else
521       call util_mygabcast2(g_movecs,1, nbf, qlo, qhi,
522     D      dbl_mb(k_xmo_t), nbf )
523#endif
524       call row_exch( nbf, nmo, rbfmap, dbl_mb(k_xmo_t), dbl_mb(k_xmo) )
525       call moints_xmot(nbf,qhi-qlo+1,dbl_mb(k_xmo),dbl_mb(k_xmo_t))
526
527c     Only the transposed MOs are needed in the inner loop
528c
529       if (.not. ma_pop_stack(l_xmo))
530     $      call errquit('moints_semi: pop failed', 5551212, MA_ERR)
531
532       if (.not. ma_push_get(MT_DBL,n_ssni,'ssni blk',l_ssni,k_ssni))
533     $      call errquit('mp2_semi: failed to alloc ssni ', n_ssni,
534     &       MA_ERR)
535c
536c Initialize
537c
538       ploop = 0
539       iiofl = 1
540       jopass = 1
541       iocnt = ma_sizeof(MT_INT,10+nnbf,MT_BYTE)
542       next = nxtask(num_nodes, 1)
543       nwrbytes = 0.d0
544       tpass = util_cpusec()
545c
546c  4-fold shell loop
547c
548       do ish0=1,nsh
549         do jsh0=1,ish0
550           ish = max(shmap(ish0),shmap(jsh0))
551           jsh = min(shmap(ish0),shmap(jsh0))
552           st = bas_cn2bfr(basis,ish,ibflo,ibfhi)
553           st = bas_cn2bfr(basis,jsh,jbflo,jbfhi)
554           ilen = ibfhi - ibflo + 1
555           jlen = jbfhi - jbflo + 1
556           schw_ij = schwarz_shell(ish,jsh)
557           if (schw_ij*schwarz_max().ge.tol2e) then
558             if (next.eq.ploop) then
559c
560c  -------------
561c  Parallel task
562               mi_ntasks = mi_ntasks + 1
563               ttask = util_cpusec()
564               call dfill((ilen*jlen*nbf*nmo1),0.d0,dbl_mb(k_ssni),1)
565c
566c  Half-tranformed Integral generation
567c
568               do kgr=1,ngrp
569                 kshlo = glo(kgr)
570                 kshhi = ghi(kgr)
571                 st = bas_cn2bfr(basis,shmap(kshlo),iz,kz)
572                 st = bas_cn2bfr(basis,shmap(kshhi),kz,jz)
573                 kbflo = rbfmap(iz)
574                 kbfhi = rbfmap(jz)
575                 kblen = kbfhi - kbflo + 1
576                 do lgr=1,kgr
577                   lshlo = glo(lgr)
578                   lshhi = ghi(lgr)
579                   st = bas_cn2bfr(basis,shmap(lshlo),iz,kz)
580                   st = bas_cn2bfr(basis,shmap(lshhi),kz,jz)
581                   lbflo = rbfmap(iz)
582                   lbfhi = rbfmap(jz)
583                   lblen = lbfhi - lbflo + 1
584                   t0 = util_cpusec()
585
586                   if (.not. ma_push_get(MT_DBL,n_ssbb1,'ssbb blk',
587     $                  l_ssbb,k_ssbb)) then
588                      write(6,*) ga_nodeid(),
589     C                     ' avail dbles',ma_inquire_avail(MT_DBL)
590                      call util_flush(6)
591                      if(ga_nodeid().eq.0)
592     M                     call ma_summarize_allocated_blocks()
593
594                      call ga_sync()
595                       call errquit('moints_semi: ssbb', n_ssbb1,
596     &       MA_ERR)
597                    endif
598                   if (.not. ma_push_get(MT_DBL, max2e, 'ibuf',
599     $                  l_eri, k_eri))
600     $                  call errquit('moints_semi: eri', max2e,
601     &       MA_ERR)
602                   if (.not. ma_push_get(MT_DBL, mem2, 'int scr',
603     $                  l_iscr, k_iscr))
604     $                  call errquit('moints_semi: scr', mem2,
605     &       MA_ERR)
606
607                   call moints_gblk( basis, ish, jsh,
608     $                               kshlo, kshhi, lshlo, lshhi,
609     $                               shmap, rbfmap,
610     $                               schw_ij, tol2e, osym, oblk,
611     $                               max2e, dbl_mb(k_eri),
612     $                               mem2, dbl_mb(k_iscr),
613     $                               ibflo, ibfhi, jbflo, jbfhi,
614     $                               kbflo, kbfhi, lbflo, lbfhi,
615     $                               dbl_mb(k_ssbb) )
616
617
618                  if (.not. ma_pop_stack(l_iscr))
619     $                 call errquit('moints_semi: pop failed ',14,
620     &       MA_ERR)
621                  if (.not. ma_pop_stack(l_eri))
622     $                 call errquit('moints_semi: pop failed ',15,
623     &       MA_ERR)
624
625                   mi_tint = mi_tint + util_cpusec() - t0
626c
627                   t0 = util_cpusec()
628                   mi_flop1 = mi_flop1 +
629     $                   4.d-6*ilen*jlen*kblen*lblen*(oseghi-oseglo+1)
630
631       n_hlpx = (oseghi-oseglo+1)*max(kbfhi-kbflo+1,lbfhi-lbflo+1)
632                   if (.not. ma_push_get(MT_DBL,n_hlpx,'hlp block',
633     $                  l_hlp,k_hlp))
634     $                  call errquit('moints_semi: hlp', n_hlp,
635     &       MA_ERR)
636
637                   call moints_trf1_new(nbf, qlo, qhi, oseglo, oseghi,
638     $                  ilen, jlen, kbflo, kbfhi,
639     $                  lbflo, lbfhi,
640     $                  dbl_mb(k_ssbb),
641     $                  dbl_mb(k_xmo_t),
642     $                  dbl_mb(k_ssni), dbl_mb(k_hlp))
643
644                   if (.not. ma_pop_stack(l_hlp))
645     $                 call errquit('moints_semi: pop failed ',16,
646     &       MA_ERR)
647
648                  if (.not. ma_pop_stack(l_ssbb))
649     $                 call errquit('moints_semi: pop failed ',17,
650     &       MA_ERR)
651
652                   mi_t1 = mi_t1 + util_cpusec() - t0
653                  enddo
654               enddo
655c
656c  Transform 2nd index and put into buffer
657c
658               t0 = util_cpusec()
659
660               if (.not. ma_push_get(MT_DBL,n_hlp2,'hlp2 block',
661     $              l_hlp2,k_hlp2))
662     $              call errquit('moints_semi: hlp2', n_hlp2, MA_ERR)
663
664#ifdef NEW_SPARSE
665               if (.not. ma_push_get(MT_DBL,n_hlp,'hlp block',
666     $              l_hlp,k_hlp))
667     $              call errquit('moints_semi: hlp', n_hlp, MA_ERR)
668
669               call moints_trf2Kynew(nbf, qlo, qhi, oseglo, oseghi,
670     $                               vlo, vhi, ibflo, ibfhi,
671     $                               jbflo, jbfhi, rlen(iiofl),
672     $                               gloc, dbl_mb(k_ssni),
673     $                               dbl_mb(k_hlp), dbl_mb(k_hlp2),
674     $                               dbl_mb(k_xmo_t), g_kbuf_trans)
675
676               if (.not. ma_pop_stack(l_hlp))
677     $                 call errquit('moints_semi: pop failed ',18,
678     &       MA_ERR)
679#else
680               if (.not. ma_push_get(MT_DBL,n_hlp3,'hlp3 block',
681     $              l_hlp3,k_hlp3))
682     $              call errquit('moints_semi: hlp3', n_hlp3,
683     &       MA_ERR)
684               if (.not. ma_push_get(MT_DBL,(nbf*vhi),'r movecs',
685     $              l_xmo,k_xmo)) then
686      write(6,*) ' avail dbles',ma_inquire_avail(MT_DBL)
687       call util_flush(6)
688                     call errquit
689     $              ('moints_semi: failed to alloc mos3', nbf*vhi,
690     &       MA_ERR)
691             endif
692c
693c     Generate the untranposed mos by transposing the transpose
694c
695               call moints_xmot(qhi-qlo+1,nbf,dbl_mb(k_xmo_t),
696     $              dbl_mb(k_xmo))
697c
698               call moints_trf2Ky(nbf, qlo, qhi, oseglo, oseghi,
699     $              vlo, vhi, ibflo, ibfhi,
700     $              jbflo, jbfhi, rlen(iiofl), gloc,
701     $              dbl_mb(k_ssni),
702     $              dbl_mb(k_hlp3), dbl_mb(k_hlp2),
703     $              dbl_mb(k_xmo), g_kbuf_trans)
704
705               if (.not. ma_pop_stack(l_xmo))
706     $                 call errquit('moints_semi: pop failed ',180,
707     &       MA_ERR)
708               if (.not. ma_pop_stack(l_hlp3))
709     $                 call errquit('moints_semi: pop failed ',18,
710     &       MA_ERR)
711#endif
712               if (.not. ma_pop_stack(l_hlp2))
713     $                 call errquit('moints_semi: pop failed ',19,
714     &       MA_ERR)
715
716
717               mi_t2k = mi_t2k + util_cpusec() - t0
718
719               ttask = util_cpusec() - ttask
720               mi_mintask = min(mi_mintask,ttask)
721               mi_maxtask = max(mi_maxtask,ttask)
722               mi_aggtask = mi_aggtask + ttask
723c
724               next = nxtask(num_nodes, 1)
725c
726c  End parallel task
727c  -----------------
728c
729             endif
730             ploop = ploop + 1
731c
732c  Checkpoint for I/O
733c
734             if (ploop.eq.ionext(iiofl)) then
735               t0 = util_cpusec()
736               call ga_mask_sync(.true.,.false.)
737               call ga_sync()
738               t1 = util_cpusec() - t0
739               mi_nsynchs = mi_nsynchs + 1
740               call summaxmin(t1, mi_aggsynch, mi_maxsynch, mi_minsynch)
741               t0 = util_cpusec()
742#ifdef GAADD
743               if (mp2cpybck) then
744                  call mp2_copyback(g_kbuf,g_kbuf_trans)
745               else
746                  call ga_copy(g_kbuf_trans,g_kbuf)
747               endif
748C               call ga_mask_sync(.true.,.false.)
749               call ga_sync()
750c               call ga_add(1d0,g_kbuf_trans,1d0,g_kbuf,g_kbuf)
751
752#endif
753               call moints_wrbuf( kunit, oseglo, oseghi,
754     $                            vlo, pvlo, pvhi, nnbf, iocnt,
755     $                            rlen(iiofl), g_kbuf)
756               mi_tio = mi_tio + util_cpusec() - t0
757               nwrbytes = nwrbytes + nmo1*(pvhi-pvlo+1)*rlen(iiofl)
758               call ga_mask_sync(.false.,.false.)
759               call ga_zero(g_kbuf)
760               call ga_mask_sync(.false.,.true.)
761               call ga_zero(g_kbuf_trans)
762               iocnt = iocnt + ma_sizeof(MT_DBL,rlen(iiofl),MT_BYTE)
763               iiofl = iiofl + 1
764             endif
765           endif
766         enddo
767       enddo
768       t0 = util_cpusec()
769cedo       call ga_sync
770       next = nxtask(-num_nodes, 1)
771       t1 = util_cpusec() - t0
772       mi_nsynchs = mi_nsynchs + 1
773       call summaxmin(t1, mi_aggsynch, mi_maxsynch, mi_minsynch)
774       tpass = util_cpusec() - tpass
775
776c
777c I/O stats
778c
779       nwrbytes = nwrbytes*8.e-6
780       if(mi_tio.eq.0) then
781          iorate=0d0
782       else
783          iorate = nwrbytes/mi_tio
784       endif
785       call ga_dgop(MSG_SEMI_IO_RATE,iorate,1,'+')
786       if ((ga_nodeid().eq.0).and.
787     $     (util_print('io_stats',print_default))) then
788         write(6,886) nwrbytes, mi_tio, iorate
789 886     format('Node 0 wrote ',f8.1,' Mb in ',f8.1,' s',5x,
790     $          'Agg I/O rate:', f8.1, ' Mb/s')
791         call util_flush(6)
792       endif
793c
794c Clean-up
795c
796       if (.not. ma_pop_stack(l_ssni))
797     $      call errquit('moints_semi: pop failed ',20, MA_ERR)
798c
799c  Complete in-core section
800c
801       if (.not. ma_pop_stack(l_xmo_t))
802     $      call errquit('moints_semi: pop failed ',22, MA_ERR)
803c
804       end
805
806
807
808      subroutine moints_trf2Ky( nbf, qlo, qhi, oseglo, oseghi,
809     $                          vlo, vhi, ilo, ihi, jlo, jhi, rlen,
810     $                          gloc, ssni, h, h2, c, g_buf )
811      implicit none
812#include "global.fh"
813      integer nbf, qlo, qhi, oseglo, oseghi, vlo, vhi
814      integer ilo, ihi, jlo, jhi
815      integer rlen
816      integer gloc(nbf,nbf)
817      double precision ssni(nbf,jlo:jhi,ilo:ihi,oseglo:oseghi)
818      double precision h(jlo:jhi,ilo:ihi,vlo:vhi),h2(*)
819      double precision c(nbf,qlo:qhi)
820      integer g_buf
821      integer ilen, jlen, ijlen, nvir
822      integer glo, ghi, i, j, ij, ijolo, ijohi
823      integer o, v
824      integer nij
825c
826      ilen = ihi - ilo + 1
827      jlen = jhi - jlo + 1
828      nij = (ilen*(ilen+1))/2
829      ijlen = ilen*jlen
830      glo = gloc(ilo,jlo)
831      ghi = gloc(ihi,jhi)
832      nvir = vhi - vlo + 1
833      do o=oseglo,oseghi
834        call dgemm('t', 'n', ijlen, nvir, nbf, 1.d0, ssni(1,jlo,ilo,o),
835     $              nbf, c(1,vlo), nbf, 0.d0, h(jlo,ilo,vlo), ijlen)
836        ijolo = (o - oseglo)*rlen + glo
837        ijohi = (o - oseglo)*rlen + ghi
838        if (ihi.eq.jhi) then
839          ij = 0
840          do v=vlo,vhi
841            do i=ilo,ihi
842              do j=ilo,i
843                ij =   ij + 1
844                h2(ij) = h(j,i,v)
845              enddo
846            enddo
847          enddo
848#ifndef NOCOMMS
849          call ga_put( g_buf, ijolo, ijohi, 1, nvir, h2, nij )
850#endif
851        else
852#ifndef NOCOMMS
853          call ga_put( g_buf, ijolo, ijohi, 1, nvir, h, ijlen )
854#endif
855        endif
856      enddo
857      return
858      end
859
860c
861c  *** For completely in-core 4-index ***
862c
863
864      subroutine moints_trf34Ky( nbf, ostart, olo, ohi, vlo, vhi,
865     $                            nnbf, rloc, h1, h2, c, g_buf )
866      implicit none
867#include "global.fh"
868#include "mafdecls.fh"
869      integer nbf, ostart, ohi, olo, vlo, vhi
870      integer nnbf, rloc(nnbf)
871      double precision h1(*), h2(*)
872      double precision c(nbf,nbf)
873      integer g_buf
874c
875      integer nocc, nvir, v, o, vo
876      integer k_local, ld, my_id, rlo, rhi, clo, chi
877
878      nocc = ohi - olo + 1
879      nvir = vhi - vlo + 1
880      my_id = ga_nodeid()
881      call ga_distribution(g_buf, my_id, rlo, rhi, clo, chi )
882      do v=vlo,vhi
883        do o=olo,ohi
884          vo = (v-1)*(ohi-olo+1) + o
885          if ((vo.ge.clo).and.(vo.le.chi)) then
886            call dfill( (nbf*nbf), 0.d0, h1, 1 )
887            call ga_access(g_buf, rlo, rhi, vo, vo, k_local, ld )
888            call scatter(nnbf, h1, rloc, dbl_mb(k_local) )
889            call ga_release(g_buf, rlo, rhi, vo, vo )
890            call upper2square(nbf,h1,h1)
891
892            call dgemm( 'n', 'n', nbf, nocc, nbf, 1.d0, h1, nbf,
893     $                   c(1,olo), nbf, 0.d0, h2, nbf )
894            call dgemm( 't', 'n', nvir, nocc, nbf, 1.d0, c(1,vlo), nbf,
895     $                   h2, nbf, 0.d0, h1, nvir )
896
897C$$$             WRITE(6,911) V,O
898C$$$ 911         FORMAT(//,5X,'V :',I5,5X,'O :',I5)
899C$$$             CALL MOINTS_MATPRINT(NVIR,NOCC,H1)
900
901          endif
902        enddo
903      enddo
904      return
905      end
906
907
908
909
910
911
912      subroutine moints_wrbuf( munit, oseglo, oseghi, vlo, pvlo, pvhi,
913     $                         nnbf, iocnt, rlen, g_buf )
914      implicit none
915#include "errquit.fh"
916#include "mafdecls.fh"
917#include "global.fh"
918#include "inp.fh"
919#include "eaf.fh"
920      integer munit                       ! I/O unit
921      integer oseglo, oseghi              ! Segment range
922      integer vlo                         ! Lowest virtual index
923      integer pvlo, pvhi                  ! Virtual index range for this processor
924      integer nnbf, rlen
925      double precision iocnt              ! file header offset
926      integer g_buf
927c
928      double precision faddr
929      integer o, vv, pv, nocc, npv, ooffset
930      integer myid, rlo, rhi, clo, chi, k_local, ld
931#ifdef  BAD_GACCESS
932      integer l_local,howmy
933#endif
934      integer ierr
935      character*80 errmsg
936c
937      nocc = oseghi - oseglo + 1
938      npv = pvhi - pvlo + 1
939      myid = ga_nodeid()
940      call ga_distribution(g_buf, myid, rlo, rhi, clo, chi )
941c
942      do o=1,nocc
943        ooffset = (o-1)*rlen
944        do pv=pvlo,pvhi
945	  vv = pv - vlo + 1
946          faddr = iocnt +
947     $            ma_sizeof(MT_DBL,((o-1)*npv+pv-pvlo)*nnbf,MT_BYTE)
948          if ((vv.ge.clo).and.(vv.le.chi)) then
949#ifdef  BAD_GACCESS
950      ld=rhi-rlo+1
951      howmy=max(ld,ooffset+rlen)
952      if(.not.ma_push_get(MT_DBL,howmy,
953     $  'scratch buff', l_local, k_local)) call
954     $  errquit('mointsemi: pushget failed',howmy,0)
955            call ga_get(g_buf,rlo,rhi,vv,vv,dbl_mb(k_local),ld)
956#else
957            call ga_access(g_buf, rlo, rhi, vv, vv, k_local, ld )
958#endif
959            ierr=eaf_write(munit, faddr, dbl_mb(k_local+ooffset),
960     $                    ma_sizeof(MT_DBL,rlen,MT_BYTE))
961            if(ierr.ne.0) then
962               call eaf_errmsg(ierr, errmsg)
963               write(6,*) ' IO offset ', faddr
964               write(6,*) ' IO error message ',
965     ,              errmsg(1:inp_strlen(errmsg))
966               call errquit('moints_wrbuf: failed on write',0, DISK_ERR)
967            endif
968#ifdef  BAD_GACCESS
969      if(.not.ma_pop_stack(l_local)) call
970     $  errquit('mointsemi: popstack failed',0,0)
971#else
972            call ga_release(g_buf, rlo, rhi, vv, vv )
973#endif
974          else
975            call errquit('moints_semi: wrong distrib for GA buffer',0,
976     &       GA_ERR)
977          endif
978        enddo
979      enddo
980      return
981      end
982
983
984
985
986
987
988
989
990
991      subroutine moints_readintK( nbf, oseglo, oseghi, olo, ohi,
992     $                            vlo, vhi, c, orbe, g_epair )
993      implicit none
994#include "errquit.fh"
995#include "mafdecls.fh"
996#include "global.fh"
997#include "eaf.fh"
998#include "util.fh"
999      integer MSG_SEMIMP2_SUM
1000      parameter(MSG_SEMIMP2_SUM=10241)
1001      integer nbf, oseglo, oseghi, olo, ohi, vlo, vhi
1002      double precision c(nbf,nbf)
1003      double precision orbe(nbf)
1004      integer g_epair
1005c
1006      integer nseg, nocc, nvir, nnbf, ioff, v, o, oo
1007      integer pvlo, pvhi, itmp
1008      integer l_v, k_v, l_t, k_t, l_x, k_x
1009      integer j, oj, jj, vvlo, vvhi, k_local, ld, myid
1010      integer rlo, rhi, clo, chi
1011      integer gtype, dim1, oomax
1012      integer g_exch
1013      double precision tmp2
1014      character*256 fname
1015      integer kunit
1016      double precision xx, denom, e2
1017      logical status
1018      double precision moints_epair_eval
1019      external moints_epair_eval
1020      double precision c1,c2       ! constants for moints_epair_eval (because of SCS-MP2)
1021
1022      tmp2 = util_cpusec()
1023c
1024      c1=4.0d0
1025      c2=2.0d0
1026c
1027      call util_file_name('kh',.true.,.true.,fname)
1028#ifdef NOIO
1029      if (eaf_open(fname, 200, kunit).ne.0)
1030#else
1031      if (eaf_open(fname, EAF_RW, kunit).ne.0)
1032#endif
1033     $  call errquit('moints_readintK: cannot open half int file',0,
1034     &       DISK_ERR)
1035      myid = ga_nodeid()
1036      e2 = 0.d0
1037      nocc = ohi - olo + 1
1038      nvir = vhi - vlo + 1
1039      nseg = oseghi - oseglo + 1
1040      call moints_vrange( kunit, pvlo, pvhi, nnbf, ioff )
1041      status = ma_push_get(MT_INT,(nnbf+mod(nnbf,2)),'scatter',l_v,k_v)
1042      call moints_getscattv( kunit, nnbf, int_mb(k_v) )
1043c$$$       do i=1,nnbf
1044c$$$         write(6,*) 'k_v',int_mb(k_v+i-1)
1045c$$$       enddo
1046*ga:1:0
1047      if (.not.ga_create( MT_DBL, (nvir*nvir), nocc, 'exch',
1048     $                    (nvir*nvir), 0, g_exch))
1049     $    call errquit('moints_semimp2: cannot allocate exch',0, GA_ERR)
1050      call ga_distribution(g_exch, myid, rlo, rhi, clo, chi )
1051      call ga_inquire(g_epair,gtype,dim1,oomax)
1052c
1053c
1054c$$$      WRITE(6,934) GA_NODEID(),PVLO,PVHI,CLO,CHI
1055c$$$ 934  FORMAT('ME:',I5,5x,'VRANGE:',2I5,5x,'OCC RANGE FOR K:',2I5)
1056c$$$      CALL UTIL_FLUSH(6)
1057c
1058c  Loop over occupied/virtual pairs
1059c
1060      itmp = max(nnbf,(nbf*nocc))
1061      status = ma_push_get(MT_DBL,itmp,'read buff',l_t,k_t)
1062      status = ma_push_get(MT_DBL,(nbf*nbf),'U half',l_x,k_x)
1063
1064      do o=oseglo,oseghi
1065        oo = o - oseglo + 1
1066        call ga_zero(g_exch)
1067        do v=pvlo,pvhi
1068          call dfill((nbf*nbf), 0.d0, dbl_mb(k_x), 1 )
1069          call moints_rdhfint( kunit, pvlo, pvhi, oo, v, nnbf,
1070     $                        ioff, dbl_mb(k_t))
1071          call scatter( nnbf, dbl_mb(k_x), int_mb(k_v), dbl_mb(k_t) )
1072          call upper2square( nbf, dbl_mb(k_x), dbl_mb(k_x) )
1073          call dgemm( 'n', 'n', nbf, nocc, nbf, 1.d0, dbl_mb(k_x),
1074     $               nbf, c(1,olo), nbf, 0.d0, dbl_mb(k_t), nbf )
1075          call dgemm( 't', 'n', nvir, nocc, nbf, 1.d0, c(1,vlo),
1076     $               nbf, dbl_mb(k_t), nbf, 0.d0, dbl_mb(k_x), nvir )
1077          vvlo = (v-vlo)*nvir + 1
1078          vvhi = (v-vlo+1)*nvir
1079          do j=olo,o
1080            oj = j-olo+1
1081            jj = (j-olo)*nvir
1082            call ga_put( g_exch, vvlo, vvhi, oj, oj,
1083     $                   dbl_mb(k_x+jj), nvir)
1084          enddo
1085        enddo
1086        call ga_sync()
1087        do j=olo,o
1088          jj = j-olo+1
1089          if ((jj.ge.clo).and.(jj.le.chi)) then
1090            denom = orbe(o) + orbe(j)
1091            call ga_access(g_exch, rlo, rhi, jj, jj, k_local, ld )
1092
1093c$$$            WRITE(6,920) O,J
1094c$$$ 920        FORMAT(/,' Operator: [',i4,',',i4,']')
1095c$$$            CALL MOINTS_MATPRINT(NVIR,NVIR,DBL_MB(K_LOCAL))
1096c$$$            CALL UTIL_FLUSH(6)
1097c$$$
1098
1099c
1100c     The following call is suspect since the definition of this
1101c     function has 8 arguments (4 integers at the start, not 3.
1102c     Added dummy arg to get it to link under WIN32, but someone
1103c     should fix this properly.
1104c     BGJ (9/99)
1105c
1106c            xx = moints_epair_eval( nvir, 0, nvir, dbl_mb(k_local),
1107c     $                              orbe(ohi+1), denom )
1108            call errquit('suspect call to moints_epair_eval',0,
1109     &       UNKNOWN_ERR)
1110            xx = moints_epair_eval( nvir, 0, nvir, 0, dbl_mb(k_local),
1111     $                              orbe(ohi+1), denom, c1, c2 )
1112            call ga_release(g_exch, rlo, rhi, jj, jj )
1113            if (o.eq.j) xx = xx*0.5d0
1114            e2 = e2 + xx
1115            oj = ((o-olo+1)*(o-olo))/2 + jj
1116            call ga_put(g_epair,1,1,oj,oj,xx,1)
1117
1118c$$$            WRITE(6,922) GA_NODEID(),O,J,XX
1119c$$$ 922        FORMAT(I3,' %%%%%% ',2I5,5X,F16.10)
1120c$$$	        CALL UTIL_FLUSH(6)
1121
1122          endif
1123c$$$          CALL GA_SYNC()
1124        enddo
1125      enddo
1126c
1127c
1128c$$$      CALL GA_SYNC()
1129c$$$      WRITE(6,967) GA_NODEID(), E2
1130c$$$ 967  FORMAT('ME:',I5,'  MP2 Contribution:',f16.10)
1131c$$$      CALL UTIL_FLUSH(6)
1132c$$$      CALL GA_PRINT(G_PAIR)
1133c$$$      call ga_sync()
1134c$$$      call ga_dgop(MSG_SEMIMP2_SUM,e2,1,'+')
1135c$$$      if (ga_nodeid().eq.0) then
1136c$$$        write(6,681) e2
1137c$$$ 681    format(/,10x,'MP2 correction:',5x,f16.10)
1138c$$$        call util_flush(6)
1139c$$$      endif
1140c
1141c  Clean up
1142c
1143      if (.not.ga_destroy(g_exch))
1144     $  call errquit('moints_semimp2: failed to destroy',g_exch, GA_ERR)
1145      if (.not. ma_pop_stack(l_x))
1146     $  call errquit('moints_readint: failed to pop', l_x, MA_ERR)
1147      if (.not. ma_pop_stack(l_t))
1148     $  call errquit('moints_readint: failed to pop', l_t, MA_ERR)
1149      if (.not. ma_pop_stack(l_v))
1150     $  call errquit('moints_readint: failed to pop', l_v, MA_ERR)
1151c
1152      if (eaf_close(kunit).ne.0)
1153     $  call errquit('moints_readintK: failed to close file',kunit,
1154     &       DISK_ERR)
1155c
1156      call ga_sync()
1157
1158      return
1159      end
1160
1161
1162
1163
1164
1165c
1166c  Read half-integrals from local file
1167c
1168      subroutine moints_rdhfint( munit, plo, phi, o, p, nnbf, ioff, t )
1169      implicit none
1170#include "errquit.fh"
1171#include "mafdecls.fh"
1172#include "eaf.fh"
1173      integer munit
1174      integer plo, phi, o, p, nnbf
1175      integer ioff                       ! header offset in bytes
1176      double precision t(nnbf)
1177      integer np, rlen, recnum
1178      double precision faddr
1179
1180      np = phi - plo + 1
1181      recnum = (o-1)*np + p - plo
1182      rlen = ma_sizeof(MT_DBL,nnbf,MT_BYTE)
1183      faddr = ioff + recnum*rlen
1184      if (eaf_read(munit, faddr, t, rlen).ne.0)
1185     $  call errquit('moints_rdhfint:cannot read integral record',0,
1186     &       DISK_ERR)
1187
1188c$$$      WRITE(6,771) RECNUM, FADDR
1189c$$$ 771  FORMAT(' READ RECORD#:',I8,' @ ',F10.0)
1190
1191      return
1192      end
1193
1194
1195
1196
1197c
1198c  Read header on the local file and
1199c  return index ranges, offsets and
1200c  number of AO indices
1201c
1202
1203      subroutine moints_vrange( munit, plo, phi, nnbf, ioff )
1204      implicit none
1205#include "errquit.fh"
1206#include "global.fh"
1207#include "mafdecls.fh"
1208#include "eaf.fh"
1209      integer munit
1210      integer plo, phi, nnbf, ioff
1211      integer vtmp(10)
1212c$$$      INTEGER I
1213
1214      if (eaf_read(munit, 0.d0, vtmp,
1215     $             ma_sizeof(MT_INT,10,MT_BYTE)).ne.0)
1216     $  call errquit('moints_vrange: cannot read header info',0,
1217     &       DISK_ERR)
1218      ioff = vtmp(1)
1219      nnbf = vtmp(2)
1220      plo = vtmp(3)
1221      phi = vtmp(4)
1222
1223c$$$      IF (GA_NODEID().EQ.0) THEN
1224c$$$        PRINT*,'READ BACK INITIAL PARAMETERS'
1225c$$$        WRITE(6,771) (VTMP(I),I=1,10)
1226c$$$ 771    FORMAT(16I4)
1227c$$$        CALL UTIL_FLUSH(6)
1228c$$$      ENDIF
1229c$$$      CALL GA_SYNC()
1230
1231      return
1232      end
1233
1234
1235c
1236c  Recover the scatter array from local file header.
1237c  This scatter array maps the dense compound
1238c  AO index (mu nu) to square array (mu,nu).
1239c  Includes sparsity by Schwarz screening and petite list.
1240c
1241      subroutine moints_getscattv( munit, nnbf, v )
1242      implicit none
1243#include "errquit.fh"
1244#include "mafdecls.fh"
1245#include "eaf.fh"
1246      integer munit
1247      integer nnbf, v(*)
1248      integer itmp
1249      double precision ioptr
1250c$$$      INTEGER I
1251
1252      itmp = nnbf+mod(nnbf,2)
1253
1254      ioptr = ma_sizeof(MT_INT,10,MT_BYTE)
1255      if (eaf_read(munit, ioptr, v,
1256     $             ma_sizeof(MT_INT,nnbf,MT_BYTE)).ne.0)
1257     $  call errquit('moints_getscattv: cannot read scatter v',0,
1258     &       DISK_ERR)
1259
1260c$$$      PRINT*,'READ BACK SCATTER ARRAY'
1261c$$$      WRITE(6,771) (V(I),I=1,NNBF)
1262c$$$ 771  FORMAT(16I4)
1263
1264      return
1265      end
1266
1267
1268
1269
1270
1271
1272
1273c
1274c  Units in bytes
1275c
1276       integer function moints_lmem( basis, nocc, nvir, blen )
1277       implicit none
1278#include "mafdecls.fh"
1279#include "global.fh"
1280#include "bas.fh"
1281#include "util.fh"
1282       integer basis, nocc, nvir, blen
1283c
1284       integer nsh, nbf, maxbfsh
1285       integer bsize, ngrp, imax2e, imem2, memi, memd
1286       logical status, oprint_mem
1287       integer moints_numgr
1288       external moints_numgr
1289c
1290       oprint_mem = util_print('memory',print_high) .and.
1291     $      ga_nodeid().eq.0
1292c
1293       status = bas_numbf(basis,nbf)
1294       status = status.and.bas_numcont(basis,nsh)
1295       status = status.and.bas_nbf_cn_max(basis,maxbfsh)
1296       bsize = max(blen,maxbfsh)
1297       ngrp = moints_numgr( basis, blen )
1298
1299       call intb_mem_2e4c(imax2e, imem2)
1300       imax2e = max(imax2e,min(50*maxbfsh**4,21**4)) ! Enuf room for 1 cartesian H shell
1301
1302*      call int_mem_2e4c(imax2e, imem2) For non-blocking integrals
1303       memi = nsh + 4*ngrp + nbf*nbf + nsh*(nsh+1) +
1304     $        2*ga_nnodes()
1305       memd = nbf**2 + blen*nbf*maxbfsh**2 +
1306     $      max(maxbfsh**2*blen**2+3*imax2e+imem2,
1307     $          2*nvir*maxbfsh**2+nbf**2)
1308c
1309c     Add on the ubiquitous 10%
1310c
1311       moints_lmem = (memd + ma_sizeof(MT_INT, memi, MT_DBL))*1.1
1312c
1313       if (oprint_mem) then
1314          write(6,*)
1315          write(6,1) ' Integer Lmemory ',
1316     $         nsh , 4*ngrp , nbf*nbf , nsh*(nsh+1) ,
1317     $        2*ga_nnodes()
1318 1        format(a,1x,10i8)
1319          write(6,1) ' Real Lmemory    ',
1320     $         nbf**2, blen*nbf*maxbfsh**2,
1321     $         max(maxbfsh**2*blen**2+3*imax2e+imem2,
1322     $         2*nvir*maxbfsh**2+nbf**2)
1323          write(6,1) ' Total Lmemory   ',
1324     $         int((memd + ma_sizeof(MT_INT, memi, MT_DBL))*1.1)
1325          call util_flush(6)
1326       endif
1327c
1328       end
1329
1330      subroutine moints_xmot(nbf,nq,xmo, xmo_t)
1331      implicit none
1332      integer nbf, nq
1333      double precision xmo(nbf,nq), xmo_t(nq,nbf)
1334      integer i,j
1335      do i = 1, nbf
1336         do j = 1, nq
1337            xmo_t(j,i) = xmo(i,j)
1338         enddo
1339      enddo
1340      end
1341
1342#ifdef NEW_SPARSE
1343      subroutine moints_trf2Kynew( nbf, qlo, qhi, oseglo, oseghi,
1344     $                             vlo, vhi, ilo, ihi, jlo, jhi,
1345     $                             rlen, gloc, ssni, h, h2, ct,
1346     $                             g_buf )
1347      implicit none
1348#include "global.fh"
1349      integer nbf, qlo, qhi, oseglo, oseghi, vlo, vhi
1350      integer ilo, ihi, jlo, jhi
1351      integer rlen
1352      integer gloc(nbf,nbf)
1353      double precision ssni(nbf,jlo:jhi,ilo:ihi,oseglo:oseghi)
1354      double precision h(vlo:vhi,jlo:jhi,ilo:ihi),h2(*)
1355      double precision ct(qlo:qhi,nbf)
1356      integer g_buf
1357c
1358      double precision s
1359      integer ijolo, ijohi, nij
1360      integer ilen, jlen, vlen
1361      integer glo, ghi, i, j, ij, k, jtop, klo, khi
1362      integer o, v, vtop, vbot
1363      integer kchunk, vchunk
1364      parameter (kchunk=32, vchunk=96) ! For cache reuse
1365c
1366      ilen = ihi - ilo + 1
1367      jlen = jhi - jlo + 1
1368      vlen = vhi - vlo + 1
1369      glo  = gloc(ilo,jlo)
1370      ghi  = gloc(ihi,jhi)
1371      nij = ilen*jlen
1372      if (ihi.eq.jhi) nij = (ilen*(ilen+1))/2
1373
1374      do o = oseglo, oseghi
1375        ijolo = (o - oseglo)*rlen + glo
1376        ijohi = (o - oseglo)*rlen + ghi
1377        call dfill((vhi-vlo+1)*ilen*jlen, 0.0d0, h, 1 )
1378        do klo = 1, nbf, kchunk                       ! Blocking for cache
1379          khi = min(nbf,klo+kchunk-1)
1380          do vbot = vlo, vhi, vchunk                  ! Blocking for cache
1381            vtop = min(vhi,vbot+vchunk-1)
1382            do i = ilo, ihi
1383              jtop = jhi
1384              if (ilo .eq. jlo) jtop = i
1385              do j = jlo, jtop
1386                do k = klo, khi
1387                  s = ssni(k,j,i,o)
1388                  if (abs(s) .gt. 1d-16) then
1389                    do v = vbot,vtop
1390                      h(v,j,i) = h(v,j,i) + s*ct(v,k)
1391                    enddo
1392                  endif
1393                enddo
1394              enddo
1395            enddo
1396          enddo
1397        enddo
1398        ij = 0
1399        s = 0.d0
1400        do v=vlo,vhi
1401          do i=ilo,ihi
1402            if (ihi.eq.jhi) jtop = i
1403            do j=jlo,jtop
1404              ij = ij + 1
1405              h2(ij) = h(v,j,i)
1406              s = s + h(v,j,i)*h(v,j,i)
1407            enddo
1408          enddo
1409        enddo
1410        if (s.gt.1.d-20) call ga_put(g_buf,ijolo,ijohi,1,vlen,h2,nij)
1411      enddo
1412c
1413      return
1414      end
1415#endif
1416