1#ifdef SOLARIS 2#define NEW_SPARSE 3#endif 4#define GAADD 1 5c 6c $Id$ 7c 8c Generates and writes out half-transformed 9c exchange integrals for some multipassed 10c segment, (oseglo:oseghi) 11c 12c Half-transformed integrals are transposed in GA memory 13c buffers and flushed when full. Size of buffer is determined 14c by available memory. 15c 16c Flat file is written using word-addressable I/O. Prefaced 17c with some indexing info. File layout of integrals is: 18c 19c nnbf 20c |<----> 21c ! nnbf*nvirl 22c |<-------------------> 23c | nnbf*nvirl*nseg 24c |<-------------------------------------------------------------> 25c 26c 27c nnbf: triangle of basis functions (with sparsity) <= (nbf*(nbf+1))/2 28c nvirl: number of virtual indices this processor owns ~nvir/nproc 29c nseg: length of occupied segment 30c 31c 32c 33c 34 subroutine moints_semi( rtdb, basis, tol2e, oseglo, oseghi, 35 $ olo, ohi, vlo, vhi, g_movecs, oblk , 36 K k_file_size) 37 implicit none 38#include "errquit.fh" 39#include "mafdecls.fh" 40#include "global.fh" 41#include "eaf.fh" 42#include "util.fh" 43#include "sym.fh" 44#include "bas.fh" 45#include "rtdb.fh" 46c 47 integer rtdb 48 integer basis ! [input] Basis handle 49 double precision tol2e ! [input] Integral tolerance 50 integer oseglo, oseghi ! [input] Occupied segment range 51 integer olo, ohi ! [input] Correlated occupied index range 52 integer vlo, vhi ! [input] Virtual index range 53 integer g_movecs ! [input] MO coefficients 54 logical oblk ! [input] Toggle AO integral blocking 55c 56 integer l_shmap, k_shmap, l_bfmap, k_bfmap, l_rbfmap, k_rbfmap 57 integer l_glo, k_glo, l_ghi, k_ghi 58 integer l_gloc, k_gloc 59 integer l_ionext, k_ionext 60 integer l_rlen, k_rlen 61 integer l_rloc, k_rloc 62 integer nbf, nsh, maxbfsh, nseg, nocc, nvir 63 integer ngrp, blen, nnbf 64 integer pvlo, pvhi 65 integer niofl 66 integer myid, numnodes, rlo, rhi, clo, chi 67 integer itmp, vtmp(10), mxrlen,mxrlen_in 68 integer lmemreq, nvir_node 69 double precision gmem, gmem_agg 70 double precision iotmpptr 71 integer g_kbuf,g_kbuf_trans 72 logical status 73 logical osym 74 character*256 fnameK 75 integer kunit 76 logical oprint_mem 77c 78 integer moints_numgr 79 integer moints_lmem 80 integer eaftype,eaf_size_in_mb,inntsize 81 double precision k_file_size 82 integer mp2_eaftype 83 external mp2_eaftype 84 external moints_numgr 85 external moints_lmem 86 logical mp2cpybck 87 data osym/.false./ 88c 89c General info 90c Compare definition for nseg : batch range 91c nocc : number of occupieds to highest i 92c 93 myid = ga_nodeid() 94 numnodes = ga_nnodes() 95 inntsize=MA_sizeof(MT_INT,1,MT_BYTE) 96 nocc = ohi - olo + 1 97 nseg = oseghi - oseglo + 1 98 nvir = vhi - vlo + 1 99 status = bas_numbf(basis,nbf) 100 status = status.and.bas_numcont(basis,nsh) 101 status = status.and.bas_nbf_cn_max(basis,maxbfsh) 102 if (.not.status) call errquit('moints: cannot get basis info',0, 103 & BASIS_ERR) 104c 105 oprint_mem = util_print('memory',print_high) .and. 106 $ ga_nodeid().eq.0 107c 108c Clear 4-index statistics 109c 110 call moints_stats_clear() 111c 112c Initial block length is very generous for efficiency. Reduce it 113c in while loop (1212) if things do not fit. 114c 115 blen = min(nbf,16*maxbfsh) ! Initial generous allocation 116* blen = 32 117 1212 continue 118c 119c Memory arithmetic 120c 121 lmemreq = moints_lmem(basis, nseg, nvir, blen) 122 if ((.not.ga_uses_ma()).and.(.not.(ga_memory_limited()))) 123 $ call errquit('moints_semi: cannot determine memory limit',0, 124 & GA_ERR) 125c 126 gmem = ga_memory_avail() / 8 ! Want amount in doubles 127 call ga_dgop(313, gmem, 1, 'min') 128 if (oprint_mem) then 129 write(6,717) blen, gmem, dble(lmemreq) 130 717 format(/' Block length ', i8/ 131 $ ' Local GA available ', 1p, d10.2/ 132 $ ' Local memory required ', 1p, d10.2) 133 call util_flush(6) 134 endif 135c 136 if (ga_uses_ma()) gmem = gmem - lmemreq 137c 138 if (oprint_mem) then 139 write(6,718) gmem 140 718 format(' Adjusted GA available ', 1p, d10.2) 141 call util_flush(6) 142 endif 143c 144 gmem_agg = gmem*numnodes 145c 146 if (oprint_mem) then 147 write(6,719) gmem_agg 148 719 format(' Aggregate GA available ', 1p, d10.2) 149 call util_flush(6) 150 endif 151c 152 nvir_node = int(nvir/numnodes) 153 if (mod(nvir,numnodes).ne.0) nvir_node=nvir_node+1 154c we have g_kbuf and g_kbuf trans plus ga_add duplicates .. /3 155c we have g_kbuf and g_kbuf trans 156 gmem=gmem/2 157 mxrlen = int(gmem/(nseg*nvir_node)) 158 mxrlen = min( 159 $ mxrlen, 160 $ nbf*nbf) 161C $ min(nbf*nbf,maxbfsh*maxbfsh*numnodes)) 162 163 if (rtdb_get(rtdb, 'mp2:mxrlen', mt_int, 1, mxrlen_in)) 164 c mxrlen=max(mxrlen_in*1024,mxrlen) 165 1964 continue 166c 167 if (oprint_mem) then 168 write(6,720) mxrlen, nseg, nvir, nbf, maxbfsh, numnodes 169 720 format( ' Maximum record length ', i8/ 170 $ ' No. of segments ', i8/ 171 $ ' Virtuals dimension ', i8/ 172 $ ' No. of functions ', i8/ 173 $ ' Max. functions/shell ', i8/ 174 $ ' Number of nodes ', i8/) 175 call util_flush(6) 176 endif 177c 178 call ga_sync() ! Just to ensure printing completes 179c 180 if (mxrlen .lt. min(nbf*nbf,maxbfsh*maxbfsh*numnodes)) then 181 if (blen .le. maxbfsh) call errquit 182 $ ('moints_semi: insufficient GA available', 183 $ mxrlen*nseg*nvir, GA_ERR) 184 blen = max(maxbfsh,blen/2) ! Reduce blocksize and try again 185 goto 1212 186 endif 187c 188 if (oprint_mem) 189 $ write(6,900) lmemreq, nint(gmem/numnodes), mxrlen 190900 format(/,'Semi-direct integral transformation', 191 $ /,' Local Memory required: ',i10,' words ' 192 $ /,' Global Memory remaining: ',i10,' words per node', 193 $ /,' IO Buffer length: ',i10,' words') 194c 195c Create GA exchange buffer 196c 197c 198 itmp = nvir/numnodes + min(mod(nvir,numnodes),1) 199 gmem=0 200*ga:1:0 201 if (.not.(ga_create( MT_DBL, mxrlen*nseg, nvir, 'transp Kbuf', 202 $ (mxrlen*nseg), 0, g_kbuf ))) gmem=-1 203 call ga_dgop(319, gmem, 1, 'min') 204 call ga_sync() 205 if(gmem.lt.0) then 206c cant alloc ga .. reduce mxrlen 207 mxrlen=mxrlen/2 208 if(ga_nodeid().eq.0) write(6,1962) 209 1962 format(//,5x,' Halved record length',//) 210 goto 1964 211cold call errquit('moints_semi: cannot allocate K buffer',0, 212cold & GA_ERR) 213 endif 214 call ga_distribution(g_kbuf, myid, rlo, rhi, clo, chi ) 215 if ((clo.eq.0).and.(chi.eq.-1)) then 216 pvlo = 0 217 pvhi = -1 218 else 219 pvlo = vlo + clo - 1 220 pvhi = vlo + chi - 1 221 endif 222#ifdef GAADD 223 gmem=0 224 if (.not.(ga_create( MT_DBL, mxrlen*nseg, nvir, ' Kbuf', 225 $ 0, nvir, g_kbuf_trans ))) gmem=-1 226 call ga_dgop(321, gmem, 1, 'min') 227 call ga_sync() 228 if(gmem.lt.0) then 229c cant alloc ga .. reduce mxrlen 230 mxrlen=mxrlen/2 231 if (.not. ga_destroy(g_kbuf)) 232 $ call errquit('moints_semi: failed to destroy g_kbuf', 233 G g_kbuf, GA_ERR) 234 if(ga_nodeid().eq.0) write(6,1962) 235 goto 1964 236cold $ call errquit('moints_semi: cannot allocate K buffer',0, 237cold & GA_ERR) 238 endif 239#else 240 g_kbuf_trans=g_kbuf 241#endif 242c 243c Reorder shells by descending shell-length 244c and group shells by blocksize 245c 246 status = .true. 247 status = status .and. ma_push_get(MT_INT,nsh,'shell order map', 248 $ l_shmap, k_shmap) 249 status = status .and. ma_push_get(MT_INT,nsh,'group lo', 250 $ l_glo, k_glo ) 251 status = status .and. ma_push_get(MT_INT,nsh,'group hi', 252 $ l_ghi, k_ghi) 253 status = status .and. ma_push_get(MT_INT,nbf,'basis map', 254 $ l_bfmap, k_bfmap) 255 status = status .and. ma_push_get(MT_INT,nbf,'rev basis map', 256 $ l_rbfmap, k_rbfmap) 257 if (.not.status) then 258 call errquit("moints_semi: failed to allocate buffers shorder", 259 $ 0,MA_ERR) 260 endif 261 call moints_shorder( basis, nsh, nbf, blen, ngrp, 262 $ int_mb(k_glo), int_mb(k_ghi), 263 $ int_mb(k_shmap), 264 $ int_mb(k_bfmap), int_mb(k_rbfmap) ) 265c 266c Generate locator and reverse map 267c 268 itmp = (nsh*(nsh+1))/2+numnodes 269 status = .true. 270 status = status .and. ma_push_get(MT_INT, (nbf*nbf), 'loc', 271 $ l_gloc, k_gloc ) 272 status = status .and. ma_push_get(MT_INT, itmp, 'io', 273 $ l_ionext, k_ionext ) 274 status = status .and. ma_push_get(MT_INT, itmp, 'rec len', 275 $ l_rlen, k_rlen ) 276 itmp = (nbf*(nbf+1))/2 277 itmp = itmp + mod(itmp,2) 278 status = status .and. ma_push_get(MT_INT, itmp, 'rev loc', 279 $ l_rloc, k_rloc ) 280 if (.not.status) then 281 call errquit("moints_semi: failed to allocate buffers locmap", 282 $ 0,MA_ERR) 283 endif 284 call ifill(itmp, 0, int_mb(k_rloc), 1 ) 285 call moints_locmap( basis, nsh, nbf, tol2e, int_mb(k_shmap), 286 $ mxrlen, int_mb(k_gloc), nnbf, 287 $ int_mb(k_rloc), niofl, int_mb(k_ionext), 288 $ int_mb(k_rlen)) 289c 290c Open local file and write initial info 291c 292 call ifill(10, 0, vtmp, 1 ) 293 vtmp(1) = ma_sizeof(MT_INT,10+nnbf,MT_BYTE) 294 vtmp(2) = nnbf 295 vtmp(3) = pvlo 296 vtmp(4) = pvhi 297 call util_file_name('kh', .true.,.true., fnamek) 298#ifdef NOIO 299 eaftype=mp2_eaftype(rtdb,k_file_size) 300 if(ga_nodeid().eq.0) write(6,*) ' mp2_eaf for mointskh ',eaftype 301#else 302 eaftype=EAF_RW 303#endif 304 if (eaf_open( fnamek, eaftype, kunit).ne.0) 305 $ call errquit('moints_semi: failed to open half int file',0, 306 & DISK_ERR) 307 if (eaf_write(kunit, 0.d0, vtmp, 308 $ ma_sizeof(MT_INT,10,MT_BYTE)).ne.0) 309 $ call errquit('moints_semi: failed to write header info1',0, 310 & DISK_ERR) 311 iotmpptr = ma_sizeof(MT_INT,10,MT_BYTE) 312 if (eaf_write(kunit, iotmpptr, int_mb(k_rloc), 313 $ ma_sizeof(MT_INT,nnbf,MT_BYTE)).ne.0) 314 $ call errquit('moints_semi: failed to write header info2',0, 315 & DISK_ERR) 316 if (.not. ma_pop_stack(l_rloc)) 317 $ call errquit('moints: failed to pop', 1, MA_ERR) 318c 319c Call the real stuff 320c 321 if (.not. rtdb_get(rtdb, 'mp2:copyback', mt_log, 1, mp2cpybck)) 322 $ mp2cpybck=.false. 323 324 call moints_semi_a( basis, nbf, nsh, maxbfsh, tol2e, oseglo, 325 $ oseghi, olo, ohi, vlo, vhi, pvlo, pvhi, 326 $ mxrlen, osym, oblk, kunit, int_mb(k_shmap), 327 $ ngrp, int_mb(k_glo), int_mb(k_ghi), 328 $ int_mb(k_bfmap), int_mb(k_rbfmap), 329 $ blen, int_mb(k_gloc), 330 $ nnbf, niofl, int_mb(k_ionext), 331 $ int_mb(k_rlen), g_movecs, g_kbuf,g_kbuf_trans, 332 M mp2cpybck) 333c 334c Clean up 335c 336 if (.not. ma_pop_stack(l_rlen)) 337 $ call errquit('moints_semi: failed to pop', 2, MA_ERR) 338 if (.not. ma_pop_stack(l_ionext)) 339 $ call errquit('moints_semi: failed to pop', 3, MA_ERR) 340 if (.not. ma_pop_stack(l_gloc)) 341 $ call errquit('moints_semi: failed to pop', 4, MA_ERR) 342 if (.not. ma_pop_stack(l_rbfmap)) 343 $ call errquit('moints_semi: failed to pop', 6, MA_ERR) 344 if (.not. ma_pop_stack(l_bfmap)) 345 $ call errquit('moints_semi: failed to pop', 7, MA_ERR) 346 if (.not. ma_pop_stack(l_ghi)) 347 $ call errquit('moints_semi: failed to pop', 8, MA_ERR) 348 if (.not. ma_pop_stack(l_glo)) 349 $ call errquit('moints_semi: failed to pop', 9, MA_ERR) 350 if (.not. ma_pop_stack(l_shmap)) 351 $ call errquit('moints_semi: failed to pop', 10, MA_ERR) 352 if (.not. ga_destroy(g_kbuf)) 353 $ call errquit('moints_semi: failed to destroy g_kbuf',g_kbuf, 354 & GA_ERR) 355#ifdef GAADD 356 if (.not. ga_destroy(g_kbuf_trans)) 357 $ call errquit('moints_semi: failed to destroy g_kbuf',g_kbuf, 358 & GA_ERR) 359#endif 360c 361 if (eaf_close(kunit).ne.0) 362 $ call errquit('moints_semi: failed to close file',kunit, 363 & DISK_ERR) 364c 365 call ga_sync() 366 if (util_print('statistics', print_high)) 367 $ call moints_stats_print( 'semi' ) 368 369 370 371 return 372 end 373 374 375 376 377 378 379 380 381 382c 383c 384c 385c 386c 387 subroutine moints_semi_a( basis, nbf, nsh, maxbfsh, tol2e, 388 $ oseglo, oseghi, olo, ohi, vlo, vhi, 389 $ pvlo, pvhi, mxrlen, osym, oblk, 390 $ kunit, shmap, ngrp, glo, ghi, bfmap, 391 $ rbfmap, blen, gloc, nnbf, 392 $ niofl, ionext, rlen, g_movecs, g_kbuf, 393 G g_kbuf_trans, mp2cpybck) 394 implicit none 395#include "errquit.fh" 396#include "tcgmsg.fh" 397#include "global.fh" 398#include "mafdecls.fh" 399#include "bas.fh" 400#include "util.fh" 401#include "schwarz.fh" 402#include "msgids.fh" 403c 404 integer MSG_SEMI_IO_RATE 405 parameter(MSG_SEMI_IO_RATE=18421) 406c 407c Arguments 408c 409 integer basis ! [input] Basis handle 410 integer nbf ! [input] Basis functions 411 integer nsh ! [input] Shells 412 integer maxbfsh ! [input] Largest shell 413 double precision tol2e ! [input] Integral tolerance 414 integer oseglo, oseghi ! [input] Occupied segment range 415 integer olo, ohi ! [input] Occupied index range 416 integer vlo, vhi ! [input] Virtual index range 417 integer pvlo, pvhi ! [input] Virtual index range for this processor 418 integer mxrlen ! [input] Max record length 419 logical osym ! [input] Toggle symmetry 420 logical oblk ! [input] Toggle AO integral blocking 421 integer kunit ! [input] Unit numbers for IO 422 integer shmap(nsh) ! [input] Map for shells 423 integer ngrp ! [input] Number of groups of shells 424 integer glo(*) ! [input] Group lower shell bound 425 integer ghi(*) ! [input] Group upper shell bound 426 integer bfmap(nbf) ! [input] BF map: orig --> new 427 integer rbfmap(nbf) ! [input] Reverse bf map: new --> orig 428 integer blen ! [input] Blocksize 429 integer gloc(nbf*nbf) ! [input] bf -> GA memory map 430 integer nnbf ! [input] Number of screened basf pairs <= (nbf*(nbf+1))/2 431 integer niofl ! [input] Total IO flushes required 432 integer ionext(niofl) ! [input] Next value for each IO flush 433 integer rlen(niofl) ! [input] Record lengths for IO writes 434 integer g_movecs ! [input] MO coefficients 435 integer g_kbuf ! [scratch] Buffer for half-trans exchange 436 integer g_kbuf_trans ! [scratch] Buffer for half-trans exchange 437 logical mp2cpybck 438c 439c Local variables 440c 441 integer nmo, nmo1, nmo2 442 integer ish0, jsh0, ish, jsh, ilen, jlen 443 integer ibflo, ibfhi, jbflo, jbfhi 444 integer kbflo, kbfhi, lbflo, lbfhi 445 integer kshlo, kshhi, lshlo, lshhi 446 integer kblen, lblen 447 integer kgr, lgr 448 integer qlo, qhi 449 integer l_ssbb, k_ssbb 450c$$$ integer l_ssbbt, k_ssbbt 451 integer l_hlp, k_hlp, l_ssni,k_ssni 452 integer l_hlp2, k_hlp2 453#ifndef NEW_SPARSE 454 integer l_hlp3, k_hlp3 455#endif 456 integer l_eri, k_eri, l_iscr,k_iscr 457 integer l_xmo, k_xmo, l_xmo_t, k_xmo_t 458 integer n_ssbb, n_ssbb1, n_ssni, n_hlp, n_hlp2, n_hlp3, 459 I n_hlpx 460 integer iz, jz, kz, bsize 461 integer mem2, max2e, jopass 462 double precision iocnt 463 integer num_nodes, ploop, next, iiofl 464 double precision schw_ij 465 double precision tpass, nwrbytes, iorate 466 double precision t0, t1, ttask 467 logical st 468c 469#include "moints_stats.fh" 470c 471 integer gr_len, nxtask 472 external gr_len, nxtask 473c 474 call ga_zero(g_kbuf) 475 call ga_zero(g_kbuf_trans) 476 num_nodes = ga_nnodes() 477 nmo1 = oseghi - oseglo + 1 478 nmo2 = vhi - vlo + 1 479 qlo = olo 480 qhi = vhi 481 nmo = qhi - qlo + 1 482c 483c$$$ WRITE(6,221) GA_NODEID(),PVLO,PVHI 484c$$$ 221 FORMAT('ME:',I5,5X,' V-RANGE:',2I3) 485c 486c Local MO coefficients 487c Integrals and temporary arrays sizes 488c 489* call int_mem_2e4c(max2e, mem2) ! For non-blocking code 490 call intb_mem_2e4c(max2e, mem2) ! Determine mem2 = scratch space 491 max2e = max(max2e,min(50*maxbfsh**4,21**4)) ! Enuf room for 1 cartesian H shell 492c 493 bsize = max(blen,maxbfsh) 494 n_ssbb = maxbfsh*maxbfsh*bsize*bsize 495 n_ssbb1 = max((nmo1*nmo1),n_ssbb) 496 497 n_hlp = max(nmo1,maxbfsh*maxbfsh)*nbf 498 n_hlp2 = maxbfsh*maxbfsh*nmo2 499 500 n_hlp3 = maxbfsh*maxbfsh*nmo2 501 502 n_ssni = maxbfsh*maxbfsh*nbf*nmo1 503c 504c Get the MOS, reorder the basis functions, transpose 505c 506 if (.not. ma_push_get(MT_DBL,nbf*vhi,'xmot',l_xmo_t,k_xmo_t)) 507 $ call errquit('moints_semi: failed to alloc mosT', nbf*vhi, 508 & MA_ERR) 509c 510 if (.not. ma_push_get(MT_DBL,(nbf*vhi),'r movecs',l_xmo,k_xmo)) 511 $ then 512 write(6,*) ' avail dbles',ma_inquire_avail(MT_DBL) 513 call util_flush(6) 514 call errquit('moints_semi: failed to alloc mos', nbf*vhi, 515 & MA_ERR) 516 endif 517c 518#if 0 519 call ga_get(g_movecs, 1, nbf, qlo, qhi, dbl_mb(k_xmo_t), nbf ) 520#else 521 call util_mygabcast2(g_movecs,1, nbf, qlo, qhi, 522 D dbl_mb(k_xmo_t), nbf ) 523#endif 524 call row_exch( nbf, nmo, rbfmap, dbl_mb(k_xmo_t), dbl_mb(k_xmo) ) 525 call moints_xmot(nbf,qhi-qlo+1,dbl_mb(k_xmo),dbl_mb(k_xmo_t)) 526 527c Only the transposed MOs are needed in the inner loop 528c 529 if (.not. ma_pop_stack(l_xmo)) 530 $ call errquit('moints_semi: pop failed', 5551212, MA_ERR) 531 532 if (.not. ma_push_get(MT_DBL,n_ssni,'ssni blk',l_ssni,k_ssni)) 533 $ call errquit('mp2_semi: failed to alloc ssni ', n_ssni, 534 & MA_ERR) 535c 536c Initialize 537c 538 ploop = 0 539 iiofl = 1 540 jopass = 1 541 iocnt = ma_sizeof(MT_INT,10+nnbf,MT_BYTE) 542 next = nxtask(num_nodes, 1) 543 nwrbytes = 0.d0 544 tpass = util_cpusec() 545c 546c 4-fold shell loop 547c 548 do ish0=1,nsh 549 do jsh0=1,ish0 550 ish = max(shmap(ish0),shmap(jsh0)) 551 jsh = min(shmap(ish0),shmap(jsh0)) 552 st = bas_cn2bfr(basis,ish,ibflo,ibfhi) 553 st = bas_cn2bfr(basis,jsh,jbflo,jbfhi) 554 ilen = ibfhi - ibflo + 1 555 jlen = jbfhi - jbflo + 1 556 schw_ij = schwarz_shell(ish,jsh) 557 if (schw_ij*schwarz_max().ge.tol2e) then 558 if (next.eq.ploop) then 559c 560c ------------- 561c Parallel task 562 mi_ntasks = mi_ntasks + 1 563 ttask = util_cpusec() 564 call dfill((ilen*jlen*nbf*nmo1),0.d0,dbl_mb(k_ssni),1) 565c 566c Half-tranformed Integral generation 567c 568 do kgr=1,ngrp 569 kshlo = glo(kgr) 570 kshhi = ghi(kgr) 571 st = bas_cn2bfr(basis,shmap(kshlo),iz,kz) 572 st = bas_cn2bfr(basis,shmap(kshhi),kz,jz) 573 kbflo = rbfmap(iz) 574 kbfhi = rbfmap(jz) 575 kblen = kbfhi - kbflo + 1 576 do lgr=1,kgr 577 lshlo = glo(lgr) 578 lshhi = ghi(lgr) 579 st = bas_cn2bfr(basis,shmap(lshlo),iz,kz) 580 st = bas_cn2bfr(basis,shmap(lshhi),kz,jz) 581 lbflo = rbfmap(iz) 582 lbfhi = rbfmap(jz) 583 lblen = lbfhi - lbflo + 1 584 t0 = util_cpusec() 585 586 if (.not. ma_push_get(MT_DBL,n_ssbb1,'ssbb blk', 587 $ l_ssbb,k_ssbb)) then 588 write(6,*) ga_nodeid(), 589 C ' avail dbles',ma_inquire_avail(MT_DBL) 590 call util_flush(6) 591 if(ga_nodeid().eq.0) 592 M call ma_summarize_allocated_blocks() 593 594 call ga_sync() 595 call errquit('moints_semi: ssbb', n_ssbb1, 596 & MA_ERR) 597 endif 598 if (.not. ma_push_get(MT_DBL, max2e, 'ibuf', 599 $ l_eri, k_eri)) 600 $ call errquit('moints_semi: eri', max2e, 601 & MA_ERR) 602 if (.not. ma_push_get(MT_DBL, mem2, 'int scr', 603 $ l_iscr, k_iscr)) 604 $ call errquit('moints_semi: scr', mem2, 605 & MA_ERR) 606 607 call moints_gblk( basis, ish, jsh, 608 $ kshlo, kshhi, lshlo, lshhi, 609 $ shmap, rbfmap, 610 $ schw_ij, tol2e, osym, oblk, 611 $ max2e, dbl_mb(k_eri), 612 $ mem2, dbl_mb(k_iscr), 613 $ ibflo, ibfhi, jbflo, jbfhi, 614 $ kbflo, kbfhi, lbflo, lbfhi, 615 $ dbl_mb(k_ssbb) ) 616 617 618 if (.not. ma_pop_stack(l_iscr)) 619 $ call errquit('moints_semi: pop failed ',14, 620 & MA_ERR) 621 if (.not. ma_pop_stack(l_eri)) 622 $ call errquit('moints_semi: pop failed ',15, 623 & MA_ERR) 624 625 mi_tint = mi_tint + util_cpusec() - t0 626c 627 t0 = util_cpusec() 628 mi_flop1 = mi_flop1 + 629 $ 4.d-6*ilen*jlen*kblen*lblen*(oseghi-oseglo+1) 630 631 n_hlpx = (oseghi-oseglo+1)*max(kbfhi-kbflo+1,lbfhi-lbflo+1) 632 if (.not. ma_push_get(MT_DBL,n_hlpx,'hlp block', 633 $ l_hlp,k_hlp)) 634 $ call errquit('moints_semi: hlp', n_hlp, 635 & MA_ERR) 636 637 call moints_trf1_new(nbf, qlo, qhi, oseglo, oseghi, 638 $ ilen, jlen, kbflo, kbfhi, 639 $ lbflo, lbfhi, 640 $ dbl_mb(k_ssbb), 641 $ dbl_mb(k_xmo_t), 642 $ dbl_mb(k_ssni), dbl_mb(k_hlp)) 643 644 if (.not. ma_pop_stack(l_hlp)) 645 $ call errquit('moints_semi: pop failed ',16, 646 & MA_ERR) 647 648 if (.not. ma_pop_stack(l_ssbb)) 649 $ call errquit('moints_semi: pop failed ',17, 650 & MA_ERR) 651 652 mi_t1 = mi_t1 + util_cpusec() - t0 653 enddo 654 enddo 655c 656c Transform 2nd index and put into buffer 657c 658 t0 = util_cpusec() 659 660 if (.not. ma_push_get(MT_DBL,n_hlp2,'hlp2 block', 661 $ l_hlp2,k_hlp2)) 662 $ call errquit('moints_semi: hlp2', n_hlp2, MA_ERR) 663 664#ifdef NEW_SPARSE 665 if (.not. ma_push_get(MT_DBL,n_hlp,'hlp block', 666 $ l_hlp,k_hlp)) 667 $ call errquit('moints_semi: hlp', n_hlp, MA_ERR) 668 669 call moints_trf2Kynew(nbf, qlo, qhi, oseglo, oseghi, 670 $ vlo, vhi, ibflo, ibfhi, 671 $ jbflo, jbfhi, rlen(iiofl), 672 $ gloc, dbl_mb(k_ssni), 673 $ dbl_mb(k_hlp), dbl_mb(k_hlp2), 674 $ dbl_mb(k_xmo_t), g_kbuf_trans) 675 676 if (.not. ma_pop_stack(l_hlp)) 677 $ call errquit('moints_semi: pop failed ',18, 678 & MA_ERR) 679#else 680 if (.not. ma_push_get(MT_DBL,n_hlp3,'hlp3 block', 681 $ l_hlp3,k_hlp3)) 682 $ call errquit('moints_semi: hlp3', n_hlp3, 683 & MA_ERR) 684 if (.not. ma_push_get(MT_DBL,(nbf*vhi),'r movecs', 685 $ l_xmo,k_xmo)) then 686 write(6,*) ' avail dbles',ma_inquire_avail(MT_DBL) 687 call util_flush(6) 688 call errquit 689 $ ('moints_semi: failed to alloc mos3', nbf*vhi, 690 & MA_ERR) 691 endif 692c 693c Generate the untranposed mos by transposing the transpose 694c 695 call moints_xmot(qhi-qlo+1,nbf,dbl_mb(k_xmo_t), 696 $ dbl_mb(k_xmo)) 697c 698 call moints_trf2Ky(nbf, qlo, qhi, oseglo, oseghi, 699 $ vlo, vhi, ibflo, ibfhi, 700 $ jbflo, jbfhi, rlen(iiofl), gloc, 701 $ dbl_mb(k_ssni), 702 $ dbl_mb(k_hlp3), dbl_mb(k_hlp2), 703 $ dbl_mb(k_xmo), g_kbuf_trans) 704 705 if (.not. ma_pop_stack(l_xmo)) 706 $ call errquit('moints_semi: pop failed ',180, 707 & MA_ERR) 708 if (.not. ma_pop_stack(l_hlp3)) 709 $ call errquit('moints_semi: pop failed ',18, 710 & MA_ERR) 711#endif 712 if (.not. ma_pop_stack(l_hlp2)) 713 $ call errquit('moints_semi: pop failed ',19, 714 & MA_ERR) 715 716 717 mi_t2k = mi_t2k + util_cpusec() - t0 718 719 ttask = util_cpusec() - ttask 720 mi_mintask = min(mi_mintask,ttask) 721 mi_maxtask = max(mi_maxtask,ttask) 722 mi_aggtask = mi_aggtask + ttask 723c 724 next = nxtask(num_nodes, 1) 725c 726c End parallel task 727c ----------------- 728c 729 endif 730 ploop = ploop + 1 731c 732c Checkpoint for I/O 733c 734 if (ploop.eq.ionext(iiofl)) then 735 t0 = util_cpusec() 736 call ga_mask_sync(.true.,.false.) 737 call ga_sync() 738 t1 = util_cpusec() - t0 739 mi_nsynchs = mi_nsynchs + 1 740 call summaxmin(t1, mi_aggsynch, mi_maxsynch, mi_minsynch) 741 t0 = util_cpusec() 742#ifdef GAADD 743 if (mp2cpybck) then 744 call mp2_copyback(g_kbuf,g_kbuf_trans) 745 else 746 call ga_copy(g_kbuf_trans,g_kbuf) 747 endif 748C call ga_mask_sync(.true.,.false.) 749 call ga_sync() 750c call ga_add(1d0,g_kbuf_trans,1d0,g_kbuf,g_kbuf) 751 752#endif 753 call moints_wrbuf( kunit, oseglo, oseghi, 754 $ vlo, pvlo, pvhi, nnbf, iocnt, 755 $ rlen(iiofl), g_kbuf) 756 mi_tio = mi_tio + util_cpusec() - t0 757 nwrbytes = nwrbytes + nmo1*(pvhi-pvlo+1)*rlen(iiofl) 758 call ga_mask_sync(.false.,.false.) 759 call ga_zero(g_kbuf) 760 call ga_mask_sync(.false.,.true.) 761 call ga_zero(g_kbuf_trans) 762 iocnt = iocnt + ma_sizeof(MT_DBL,rlen(iiofl),MT_BYTE) 763 iiofl = iiofl + 1 764 endif 765 endif 766 enddo 767 enddo 768 t0 = util_cpusec() 769cedo call ga_sync 770 next = nxtask(-num_nodes, 1) 771 t1 = util_cpusec() - t0 772 mi_nsynchs = mi_nsynchs + 1 773 call summaxmin(t1, mi_aggsynch, mi_maxsynch, mi_minsynch) 774 tpass = util_cpusec() - tpass 775 776c 777c I/O stats 778c 779 nwrbytes = nwrbytes*8.e-6 780 if(mi_tio.eq.0) then 781 iorate=0d0 782 else 783 iorate = nwrbytes/mi_tio 784 endif 785 call ga_dgop(MSG_SEMI_IO_RATE,iorate,1,'+') 786 if ((ga_nodeid().eq.0).and. 787 $ (util_print('io_stats',print_default))) then 788 write(6,886) nwrbytes, mi_tio, iorate 789 886 format('Node 0 wrote ',f8.1,' Mb in ',f8.1,' s',5x, 790 $ 'Agg I/O rate:', f8.1, ' Mb/s') 791 call util_flush(6) 792 endif 793c 794c Clean-up 795c 796 if (.not. ma_pop_stack(l_ssni)) 797 $ call errquit('moints_semi: pop failed ',20, MA_ERR) 798c 799c Complete in-core section 800c 801 if (.not. ma_pop_stack(l_xmo_t)) 802 $ call errquit('moints_semi: pop failed ',22, MA_ERR) 803c 804 end 805 806 807 808 subroutine moints_trf2Ky( nbf, qlo, qhi, oseglo, oseghi, 809 $ vlo, vhi, ilo, ihi, jlo, jhi, rlen, 810 $ gloc, ssni, h, h2, c, g_buf ) 811 implicit none 812#include "global.fh" 813 integer nbf, qlo, qhi, oseglo, oseghi, vlo, vhi 814 integer ilo, ihi, jlo, jhi 815 integer rlen 816 integer gloc(nbf,nbf) 817 double precision ssni(nbf,jlo:jhi,ilo:ihi,oseglo:oseghi) 818 double precision h(jlo:jhi,ilo:ihi,vlo:vhi),h2(*) 819 double precision c(nbf,qlo:qhi) 820 integer g_buf 821 integer ilen, jlen, ijlen, nvir 822 integer glo, ghi, i, j, ij, ijolo, ijohi 823 integer o, v 824 integer nij 825c 826 ilen = ihi - ilo + 1 827 jlen = jhi - jlo + 1 828 nij = (ilen*(ilen+1))/2 829 ijlen = ilen*jlen 830 glo = gloc(ilo,jlo) 831 ghi = gloc(ihi,jhi) 832 nvir = vhi - vlo + 1 833 do o=oseglo,oseghi 834 call dgemm('t', 'n', ijlen, nvir, nbf, 1.d0, ssni(1,jlo,ilo,o), 835 $ nbf, c(1,vlo), nbf, 0.d0, h(jlo,ilo,vlo), ijlen) 836 ijolo = (o - oseglo)*rlen + glo 837 ijohi = (o - oseglo)*rlen + ghi 838 if (ihi.eq.jhi) then 839 ij = 0 840 do v=vlo,vhi 841 do i=ilo,ihi 842 do j=ilo,i 843 ij = ij + 1 844 h2(ij) = h(j,i,v) 845 enddo 846 enddo 847 enddo 848#ifndef NOCOMMS 849 call ga_put( g_buf, ijolo, ijohi, 1, nvir, h2, nij ) 850#endif 851 else 852#ifndef NOCOMMS 853 call ga_put( g_buf, ijolo, ijohi, 1, nvir, h, ijlen ) 854#endif 855 endif 856 enddo 857 return 858 end 859 860c 861c *** For completely in-core 4-index *** 862c 863 864 subroutine moints_trf34Ky( nbf, ostart, olo, ohi, vlo, vhi, 865 $ nnbf, rloc, h1, h2, c, g_buf ) 866 implicit none 867#include "global.fh" 868#include "mafdecls.fh" 869 integer nbf, ostart, ohi, olo, vlo, vhi 870 integer nnbf, rloc(nnbf) 871 double precision h1(*), h2(*) 872 double precision c(nbf,nbf) 873 integer g_buf 874c 875 integer nocc, nvir, v, o, vo 876 integer k_local, ld, my_id, rlo, rhi, clo, chi 877 878 nocc = ohi - olo + 1 879 nvir = vhi - vlo + 1 880 my_id = ga_nodeid() 881 call ga_distribution(g_buf, my_id, rlo, rhi, clo, chi ) 882 do v=vlo,vhi 883 do o=olo,ohi 884 vo = (v-1)*(ohi-olo+1) + o 885 if ((vo.ge.clo).and.(vo.le.chi)) then 886 call dfill( (nbf*nbf), 0.d0, h1, 1 ) 887 call ga_access(g_buf, rlo, rhi, vo, vo, k_local, ld ) 888 call scatter(nnbf, h1, rloc, dbl_mb(k_local) ) 889 call ga_release(g_buf, rlo, rhi, vo, vo ) 890 call upper2square(nbf,h1,h1) 891 892 call dgemm( 'n', 'n', nbf, nocc, nbf, 1.d0, h1, nbf, 893 $ c(1,olo), nbf, 0.d0, h2, nbf ) 894 call dgemm( 't', 'n', nvir, nocc, nbf, 1.d0, c(1,vlo), nbf, 895 $ h2, nbf, 0.d0, h1, nvir ) 896 897C$$$ WRITE(6,911) V,O 898C$$$ 911 FORMAT(//,5X,'V :',I5,5X,'O :',I5) 899C$$$ CALL MOINTS_MATPRINT(NVIR,NOCC,H1) 900 901 endif 902 enddo 903 enddo 904 return 905 end 906 907 908 909 910 911 912 subroutine moints_wrbuf( munit, oseglo, oseghi, vlo, pvlo, pvhi, 913 $ nnbf, iocnt, rlen, g_buf ) 914 implicit none 915#include "errquit.fh" 916#include "mafdecls.fh" 917#include "global.fh" 918#include "inp.fh" 919#include "eaf.fh" 920 integer munit ! I/O unit 921 integer oseglo, oseghi ! Segment range 922 integer vlo ! Lowest virtual index 923 integer pvlo, pvhi ! Virtual index range for this processor 924 integer nnbf, rlen 925 double precision iocnt ! file header offset 926 integer g_buf 927c 928 double precision faddr 929 integer o, vv, pv, nocc, npv, ooffset 930 integer myid, rlo, rhi, clo, chi, k_local, ld 931#ifdef BAD_GACCESS 932 integer l_local,howmy 933#endif 934 integer ierr 935 character*80 errmsg 936c 937 nocc = oseghi - oseglo + 1 938 npv = pvhi - pvlo + 1 939 myid = ga_nodeid() 940 call ga_distribution(g_buf, myid, rlo, rhi, clo, chi ) 941c 942 do o=1,nocc 943 ooffset = (o-1)*rlen 944 do pv=pvlo,pvhi 945 vv = pv - vlo + 1 946 faddr = iocnt + 947 $ ma_sizeof(MT_DBL,((o-1)*npv+pv-pvlo)*nnbf,MT_BYTE) 948 if ((vv.ge.clo).and.(vv.le.chi)) then 949#ifdef BAD_GACCESS 950 ld=rhi-rlo+1 951 howmy=max(ld,ooffset+rlen) 952 if(.not.ma_push_get(MT_DBL,howmy, 953 $ 'scratch buff', l_local, k_local)) call 954 $ errquit('mointsemi: pushget failed',howmy,0) 955 call ga_get(g_buf,rlo,rhi,vv,vv,dbl_mb(k_local),ld) 956#else 957 call ga_access(g_buf, rlo, rhi, vv, vv, k_local, ld ) 958#endif 959 ierr=eaf_write(munit, faddr, dbl_mb(k_local+ooffset), 960 $ ma_sizeof(MT_DBL,rlen,MT_BYTE)) 961 if(ierr.ne.0) then 962 call eaf_errmsg(ierr, errmsg) 963 write(6,*) ' IO offset ', faddr 964 write(6,*) ' IO error message ', 965 , errmsg(1:inp_strlen(errmsg)) 966 call errquit('moints_wrbuf: failed on write',0, DISK_ERR) 967 endif 968#ifdef BAD_GACCESS 969 if(.not.ma_pop_stack(l_local)) call 970 $ errquit('mointsemi: popstack failed',0,0) 971#else 972 call ga_release(g_buf, rlo, rhi, vv, vv ) 973#endif 974 else 975 call errquit('moints_semi: wrong distrib for GA buffer',0, 976 & GA_ERR) 977 endif 978 enddo 979 enddo 980 return 981 end 982 983 984 985 986 987 988 989 990 991 subroutine moints_readintK( nbf, oseglo, oseghi, olo, ohi, 992 $ vlo, vhi, c, orbe, g_epair ) 993 implicit none 994#include "errquit.fh" 995#include "mafdecls.fh" 996#include "global.fh" 997#include "eaf.fh" 998#include "util.fh" 999 integer MSG_SEMIMP2_SUM 1000 parameter(MSG_SEMIMP2_SUM=10241) 1001 integer nbf, oseglo, oseghi, olo, ohi, vlo, vhi 1002 double precision c(nbf,nbf) 1003 double precision orbe(nbf) 1004 integer g_epair 1005c 1006 integer nseg, nocc, nvir, nnbf, ioff, v, o, oo 1007 integer pvlo, pvhi, itmp 1008 integer l_v, k_v, l_t, k_t, l_x, k_x 1009 integer j, oj, jj, vvlo, vvhi, k_local, ld, myid 1010 integer rlo, rhi, clo, chi 1011 integer gtype, dim1, oomax 1012 integer g_exch 1013 double precision tmp2 1014 character*256 fname 1015 integer kunit 1016 double precision xx, denom, e2 1017 logical status 1018 double precision moints_epair_eval 1019 external moints_epair_eval 1020 double precision c1,c2 ! constants for moints_epair_eval (because of SCS-MP2) 1021 1022 tmp2 = util_cpusec() 1023c 1024 c1=4.0d0 1025 c2=2.0d0 1026c 1027 call util_file_name('kh',.true.,.true.,fname) 1028#ifdef NOIO 1029 if (eaf_open(fname, 200, kunit).ne.0) 1030#else 1031 if (eaf_open(fname, EAF_RW, kunit).ne.0) 1032#endif 1033 $ call errquit('moints_readintK: cannot open half int file',0, 1034 & DISK_ERR) 1035 myid = ga_nodeid() 1036 e2 = 0.d0 1037 nocc = ohi - olo + 1 1038 nvir = vhi - vlo + 1 1039 nseg = oseghi - oseglo + 1 1040 call moints_vrange( kunit, pvlo, pvhi, nnbf, ioff ) 1041 status = ma_push_get(MT_INT,(nnbf+mod(nnbf,2)),'scatter',l_v,k_v) 1042 call moints_getscattv( kunit, nnbf, int_mb(k_v) ) 1043c$$$ do i=1,nnbf 1044c$$$ write(6,*) 'k_v',int_mb(k_v+i-1) 1045c$$$ enddo 1046*ga:1:0 1047 if (.not.ga_create( MT_DBL, (nvir*nvir), nocc, 'exch', 1048 $ (nvir*nvir), 0, g_exch)) 1049 $ call errquit('moints_semimp2: cannot allocate exch',0, GA_ERR) 1050 call ga_distribution(g_exch, myid, rlo, rhi, clo, chi ) 1051 call ga_inquire(g_epair,gtype,dim1,oomax) 1052c 1053c 1054c$$$ WRITE(6,934) GA_NODEID(),PVLO,PVHI,CLO,CHI 1055c$$$ 934 FORMAT('ME:',I5,5x,'VRANGE:',2I5,5x,'OCC RANGE FOR K:',2I5) 1056c$$$ CALL UTIL_FLUSH(6) 1057c 1058c Loop over occupied/virtual pairs 1059c 1060 itmp = max(nnbf,(nbf*nocc)) 1061 status = ma_push_get(MT_DBL,itmp,'read buff',l_t,k_t) 1062 status = ma_push_get(MT_DBL,(nbf*nbf),'U half',l_x,k_x) 1063 1064 do o=oseglo,oseghi 1065 oo = o - oseglo + 1 1066 call ga_zero(g_exch) 1067 do v=pvlo,pvhi 1068 call dfill((nbf*nbf), 0.d0, dbl_mb(k_x), 1 ) 1069 call moints_rdhfint( kunit, pvlo, pvhi, oo, v, nnbf, 1070 $ ioff, dbl_mb(k_t)) 1071 call scatter( nnbf, dbl_mb(k_x), int_mb(k_v), dbl_mb(k_t) ) 1072 call upper2square( nbf, dbl_mb(k_x), dbl_mb(k_x) ) 1073 call dgemm( 'n', 'n', nbf, nocc, nbf, 1.d0, dbl_mb(k_x), 1074 $ nbf, c(1,olo), nbf, 0.d0, dbl_mb(k_t), nbf ) 1075 call dgemm( 't', 'n', nvir, nocc, nbf, 1.d0, c(1,vlo), 1076 $ nbf, dbl_mb(k_t), nbf, 0.d0, dbl_mb(k_x), nvir ) 1077 vvlo = (v-vlo)*nvir + 1 1078 vvhi = (v-vlo+1)*nvir 1079 do j=olo,o 1080 oj = j-olo+1 1081 jj = (j-olo)*nvir 1082 call ga_put( g_exch, vvlo, vvhi, oj, oj, 1083 $ dbl_mb(k_x+jj), nvir) 1084 enddo 1085 enddo 1086 call ga_sync() 1087 do j=olo,o 1088 jj = j-olo+1 1089 if ((jj.ge.clo).and.(jj.le.chi)) then 1090 denom = orbe(o) + orbe(j) 1091 call ga_access(g_exch, rlo, rhi, jj, jj, k_local, ld ) 1092 1093c$$$ WRITE(6,920) O,J 1094c$$$ 920 FORMAT(/,' Operator: [',i4,',',i4,']') 1095c$$$ CALL MOINTS_MATPRINT(NVIR,NVIR,DBL_MB(K_LOCAL)) 1096c$$$ CALL UTIL_FLUSH(6) 1097c$$$ 1098 1099c 1100c The following call is suspect since the definition of this 1101c function has 8 arguments (4 integers at the start, not 3. 1102c Added dummy arg to get it to link under WIN32, but someone 1103c should fix this properly. 1104c BGJ (9/99) 1105c 1106c xx = moints_epair_eval( nvir, 0, nvir, dbl_mb(k_local), 1107c $ orbe(ohi+1), denom ) 1108 call errquit('suspect call to moints_epair_eval',0, 1109 & UNKNOWN_ERR) 1110 xx = moints_epair_eval( nvir, 0, nvir, 0, dbl_mb(k_local), 1111 $ orbe(ohi+1), denom, c1, c2 ) 1112 call ga_release(g_exch, rlo, rhi, jj, jj ) 1113 if (o.eq.j) xx = xx*0.5d0 1114 e2 = e2 + xx 1115 oj = ((o-olo+1)*(o-olo))/2 + jj 1116 call ga_put(g_epair,1,1,oj,oj,xx,1) 1117 1118c$$$ WRITE(6,922) GA_NODEID(),O,J,XX 1119c$$$ 922 FORMAT(I3,' %%%%%% ',2I5,5X,F16.10) 1120c$$$ CALL UTIL_FLUSH(6) 1121 1122 endif 1123c$$$ CALL GA_SYNC() 1124 enddo 1125 enddo 1126c 1127c 1128c$$$ CALL GA_SYNC() 1129c$$$ WRITE(6,967) GA_NODEID(), E2 1130c$$$ 967 FORMAT('ME:',I5,' MP2 Contribution:',f16.10) 1131c$$$ CALL UTIL_FLUSH(6) 1132c$$$ CALL GA_PRINT(G_PAIR) 1133c$$$ call ga_sync() 1134c$$$ call ga_dgop(MSG_SEMIMP2_SUM,e2,1,'+') 1135c$$$ if (ga_nodeid().eq.0) then 1136c$$$ write(6,681) e2 1137c$$$ 681 format(/,10x,'MP2 correction:',5x,f16.10) 1138c$$$ call util_flush(6) 1139c$$$ endif 1140c 1141c Clean up 1142c 1143 if (.not.ga_destroy(g_exch)) 1144 $ call errquit('moints_semimp2: failed to destroy',g_exch, GA_ERR) 1145 if (.not. ma_pop_stack(l_x)) 1146 $ call errquit('moints_readint: failed to pop', l_x, MA_ERR) 1147 if (.not. ma_pop_stack(l_t)) 1148 $ call errquit('moints_readint: failed to pop', l_t, MA_ERR) 1149 if (.not. ma_pop_stack(l_v)) 1150 $ call errquit('moints_readint: failed to pop', l_v, MA_ERR) 1151c 1152 if (eaf_close(kunit).ne.0) 1153 $ call errquit('moints_readintK: failed to close file',kunit, 1154 & DISK_ERR) 1155c 1156 call ga_sync() 1157 1158 return 1159 end 1160 1161 1162 1163 1164 1165c 1166c Read half-integrals from local file 1167c 1168 subroutine moints_rdhfint( munit, plo, phi, o, p, nnbf, ioff, t ) 1169 implicit none 1170#include "errquit.fh" 1171#include "mafdecls.fh" 1172#include "eaf.fh" 1173 integer munit 1174 integer plo, phi, o, p, nnbf 1175 integer ioff ! header offset in bytes 1176 double precision t(nnbf) 1177 integer np, rlen, recnum 1178 double precision faddr 1179 1180 np = phi - plo + 1 1181 recnum = (o-1)*np + p - plo 1182 rlen = ma_sizeof(MT_DBL,nnbf,MT_BYTE) 1183 faddr = ioff + recnum*rlen 1184 if (eaf_read(munit, faddr, t, rlen).ne.0) 1185 $ call errquit('moints_rdhfint:cannot read integral record',0, 1186 & DISK_ERR) 1187 1188c$$$ WRITE(6,771) RECNUM, FADDR 1189c$$$ 771 FORMAT(' READ RECORD#:',I8,' @ ',F10.0) 1190 1191 return 1192 end 1193 1194 1195 1196 1197c 1198c Read header on the local file and 1199c return index ranges, offsets and 1200c number of AO indices 1201c 1202 1203 subroutine moints_vrange( munit, plo, phi, nnbf, ioff ) 1204 implicit none 1205#include "errquit.fh" 1206#include "global.fh" 1207#include "mafdecls.fh" 1208#include "eaf.fh" 1209 integer munit 1210 integer plo, phi, nnbf, ioff 1211 integer vtmp(10) 1212c$$$ INTEGER I 1213 1214 if (eaf_read(munit, 0.d0, vtmp, 1215 $ ma_sizeof(MT_INT,10,MT_BYTE)).ne.0) 1216 $ call errquit('moints_vrange: cannot read header info',0, 1217 & DISK_ERR) 1218 ioff = vtmp(1) 1219 nnbf = vtmp(2) 1220 plo = vtmp(3) 1221 phi = vtmp(4) 1222 1223c$$$ IF (GA_NODEID().EQ.0) THEN 1224c$$$ PRINT*,'READ BACK INITIAL PARAMETERS' 1225c$$$ WRITE(6,771) (VTMP(I),I=1,10) 1226c$$$ 771 FORMAT(16I4) 1227c$$$ CALL UTIL_FLUSH(6) 1228c$$$ ENDIF 1229c$$$ CALL GA_SYNC() 1230 1231 return 1232 end 1233 1234 1235c 1236c Recover the scatter array from local file header. 1237c This scatter array maps the dense compound 1238c AO index (mu nu) to square array (mu,nu). 1239c Includes sparsity by Schwarz screening and petite list. 1240c 1241 subroutine moints_getscattv( munit, nnbf, v ) 1242 implicit none 1243#include "errquit.fh" 1244#include "mafdecls.fh" 1245#include "eaf.fh" 1246 integer munit 1247 integer nnbf, v(*) 1248 integer itmp 1249 double precision ioptr 1250c$$$ INTEGER I 1251 1252 itmp = nnbf+mod(nnbf,2) 1253 1254 ioptr = ma_sizeof(MT_INT,10,MT_BYTE) 1255 if (eaf_read(munit, ioptr, v, 1256 $ ma_sizeof(MT_INT,nnbf,MT_BYTE)).ne.0) 1257 $ call errquit('moints_getscattv: cannot read scatter v',0, 1258 & DISK_ERR) 1259 1260c$$$ PRINT*,'READ BACK SCATTER ARRAY' 1261c$$$ WRITE(6,771) (V(I),I=1,NNBF) 1262c$$$ 771 FORMAT(16I4) 1263 1264 return 1265 end 1266 1267 1268 1269 1270 1271 1272 1273c 1274c Units in bytes 1275c 1276 integer function moints_lmem( basis, nocc, nvir, blen ) 1277 implicit none 1278#include "mafdecls.fh" 1279#include "global.fh" 1280#include "bas.fh" 1281#include "util.fh" 1282 integer basis, nocc, nvir, blen 1283c 1284 integer nsh, nbf, maxbfsh 1285 integer bsize, ngrp, imax2e, imem2, memi, memd 1286 logical status, oprint_mem 1287 integer moints_numgr 1288 external moints_numgr 1289c 1290 oprint_mem = util_print('memory',print_high) .and. 1291 $ ga_nodeid().eq.0 1292c 1293 status = bas_numbf(basis,nbf) 1294 status = status.and.bas_numcont(basis,nsh) 1295 status = status.and.bas_nbf_cn_max(basis,maxbfsh) 1296 bsize = max(blen,maxbfsh) 1297 ngrp = moints_numgr( basis, blen ) 1298 1299 call intb_mem_2e4c(imax2e, imem2) 1300 imax2e = max(imax2e,min(50*maxbfsh**4,21**4)) ! Enuf room for 1 cartesian H shell 1301 1302* call int_mem_2e4c(imax2e, imem2) For non-blocking integrals 1303 memi = nsh + 4*ngrp + nbf*nbf + nsh*(nsh+1) + 1304 $ 2*ga_nnodes() 1305 memd = nbf**2 + blen*nbf*maxbfsh**2 + 1306 $ max(maxbfsh**2*blen**2+3*imax2e+imem2, 1307 $ 2*nvir*maxbfsh**2+nbf**2) 1308c 1309c Add on the ubiquitous 10% 1310c 1311 moints_lmem = (memd + ma_sizeof(MT_INT, memi, MT_DBL))*1.1 1312c 1313 if (oprint_mem) then 1314 write(6,*) 1315 write(6,1) ' Integer Lmemory ', 1316 $ nsh , 4*ngrp , nbf*nbf , nsh*(nsh+1) , 1317 $ 2*ga_nnodes() 1318 1 format(a,1x,10i8) 1319 write(6,1) ' Real Lmemory ', 1320 $ nbf**2, blen*nbf*maxbfsh**2, 1321 $ max(maxbfsh**2*blen**2+3*imax2e+imem2, 1322 $ 2*nvir*maxbfsh**2+nbf**2) 1323 write(6,1) ' Total Lmemory ', 1324 $ int((memd + ma_sizeof(MT_INT, memi, MT_DBL))*1.1) 1325 call util_flush(6) 1326 endif 1327c 1328 end 1329 1330 subroutine moints_xmot(nbf,nq,xmo, xmo_t) 1331 implicit none 1332 integer nbf, nq 1333 double precision xmo(nbf,nq), xmo_t(nq,nbf) 1334 integer i,j 1335 do i = 1, nbf 1336 do j = 1, nq 1337 xmo_t(j,i) = xmo(i,j) 1338 enddo 1339 enddo 1340 end 1341 1342#ifdef NEW_SPARSE 1343 subroutine moints_trf2Kynew( nbf, qlo, qhi, oseglo, oseghi, 1344 $ vlo, vhi, ilo, ihi, jlo, jhi, 1345 $ rlen, gloc, ssni, h, h2, ct, 1346 $ g_buf ) 1347 implicit none 1348#include "global.fh" 1349 integer nbf, qlo, qhi, oseglo, oseghi, vlo, vhi 1350 integer ilo, ihi, jlo, jhi 1351 integer rlen 1352 integer gloc(nbf,nbf) 1353 double precision ssni(nbf,jlo:jhi,ilo:ihi,oseglo:oseghi) 1354 double precision h(vlo:vhi,jlo:jhi,ilo:ihi),h2(*) 1355 double precision ct(qlo:qhi,nbf) 1356 integer g_buf 1357c 1358 double precision s 1359 integer ijolo, ijohi, nij 1360 integer ilen, jlen, vlen 1361 integer glo, ghi, i, j, ij, k, jtop, klo, khi 1362 integer o, v, vtop, vbot 1363 integer kchunk, vchunk 1364 parameter (kchunk=32, vchunk=96) ! For cache reuse 1365c 1366 ilen = ihi - ilo + 1 1367 jlen = jhi - jlo + 1 1368 vlen = vhi - vlo + 1 1369 glo = gloc(ilo,jlo) 1370 ghi = gloc(ihi,jhi) 1371 nij = ilen*jlen 1372 if (ihi.eq.jhi) nij = (ilen*(ilen+1))/2 1373 1374 do o = oseglo, oseghi 1375 ijolo = (o - oseglo)*rlen + glo 1376 ijohi = (o - oseglo)*rlen + ghi 1377 call dfill((vhi-vlo+1)*ilen*jlen, 0.0d0, h, 1 ) 1378 do klo = 1, nbf, kchunk ! Blocking for cache 1379 khi = min(nbf,klo+kchunk-1) 1380 do vbot = vlo, vhi, vchunk ! Blocking for cache 1381 vtop = min(vhi,vbot+vchunk-1) 1382 do i = ilo, ihi 1383 jtop = jhi 1384 if (ilo .eq. jlo) jtop = i 1385 do j = jlo, jtop 1386 do k = klo, khi 1387 s = ssni(k,j,i,o) 1388 if (abs(s) .gt. 1d-16) then 1389 do v = vbot,vtop 1390 h(v,j,i) = h(v,j,i) + s*ct(v,k) 1391 enddo 1392 endif 1393 enddo 1394 enddo 1395 enddo 1396 enddo 1397 enddo 1398 ij = 0 1399 s = 0.d0 1400 do v=vlo,vhi 1401 do i=ilo,ihi 1402 if (ihi.eq.jhi) jtop = i 1403 do j=jlo,jtop 1404 ij = ij + 1 1405 h2(ij) = h(v,j,i) 1406 s = s + h(v,j,i)*h(v,j,i) 1407 enddo 1408 enddo 1409 enddo 1410 if (s.gt.1.d-20) call ga_put(g_buf,ijolo,ijohi,1,vlen,h2,nij) 1411 enddo 1412c 1413 return 1414 end 1415#endif 1416