1 SUBROUTINE tce_mo2e_zones_4a_disk_2s_new(rtdb,d_v2, 2 1 kax_v2_alpha_offset, 3 1 size_2e) 4C $Id$ 5C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 6C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 7C t ( p1 p2 h3 h4 )_t 8 IMPLICIT NONE 9#include "rtdb.fh" 10#include "global.fh" 11#include "mafdecls.fh" 12#include "stdio.fh" 13#include "util.fh" 14#include "bas.fh" 15#include "schwarz.fh" 16#include "sym.fh" 17#include "sf.fh" 18#include "errquit.fh" 19#include "tce.fh" 20#include "tce_main.fh" 21c 22c 23 integer rtdb ! Run-time database 24 integer d_v2 ! MO integrals 25 integer kax_v2_alpha_offset ! MO integrals offset 26 integer size_2e ! 2e file size 27c 28 INTEGER size_2g2a,l_2g2a,k_2g2a 29 INTEGER azone1,azone2,azone3,azone4 30 INTEGER g1b,g2b,g3b,g4b 31 INTEGER igi1,igi2,igi3,igi4 32 INTEGER ii,i,j,k,l,N,ipos1,ipos2,ipos3,ipos4 33 INTEGER del1,del2,p1rel,p2rel 34 INTEGER size_4a,l_4a,k_4a 35c 36 integer mu,nu,rho,sigma 37 integer mu_lo,mu_hi 38 integer nu_lo,nu_hi 39 integer rho_lo,rho_hi 40 integer sigma_lo,sigma_hi 41 integer mu_range 42 integer nu_range 43 integer rho_range 44 integer sigma_range 45 integer mu1,nu1,rho1,sigma1 46 integer shift_mu,shift_nu 47 integer shift_rho,shift_sigma 48 integer work1,work2 ! Work array sizes 49 integer l_work1,k_work1 ! Work array 1 50 integer l_work2,k_work2 ! Work array 2 51 integer imu1,inu1,irho1,isigma1 52c 53 integer l_movecs_orb,k_movecs_orb 54 integer l_gpair,k_gpair 55 integer len_pair,g12_shift 56c ATTENTION,ACHTUNG,UWAGA 2000 - max # of CPU 57c 58 integer size_2g2z,l_2g2z,k_2g2z 59 integer tot_azone1_sh,tot_azone2_sh 60 integer tot_azone3_sh,tot_azone4_sh 61 integer ixi,jxi,point_pair 62 integer size_stripe,l_p34,k_p34 63 integer addr,xoffset_34 64 integer size_g3g4,xoffset_p34 65 integer size_g4321,k_g4321,l_g4321 66c 67 integer iha,ihb !number of corr. alpha, beta holes 68 integer ipa,ipb !number of corr. alpha, beta particles 69 integer INDEX_PAIR,icol,irow 70c compression 71 integer xoffset_g12(1000),pointer_g12(1000) 72 integer xoffset_size_p1p2(1000) 73 integer size_temp,size_temp_4g,xoffset_temp_4g,size_p1p2 74 integer max_size_temp,xoffset_temp,iclose,iopen,offset_2g2z 75 integer imaxp12,istart,ibuba,sumx,slice_offset 76 double precision wall,cpu,wall1,cpu1,wall2,cpu2,wall3,cpu3 77c *** debug *** 78c double precision xtot1,xtot2,xtot3 79c ************* 80 double precision tot_zone(1000) 81 integer l_g12piece,k_g12piece,size_g12p,iguru,ilogu,ihigu 82c 83 integer l_4af_offset,k_4af_offset,d_4af 84 integer k_2a2m_offset,l_2a2m_offset 85 integer sf_chunk,request 86 integer key_4af,offset_4af,size_4af 87 integer sf2a2m_chunk,key_2a2m,offset_2a2m 88 integer d_2a2m,size_2a2mf 89 character*255 filename 90c 91 logical parallel 92c 93 INTEGER length 94 INTEGER NXTASK 95 INTEGER next 96 INTEGER nprocs 97 INTEGER count 98 EXTERNAL NXTASK 99 logical nodezero 100 logical idiskl 101c 102c 103c 104ccx idisk=.true. 105 if(idisk.eq.0) then 106 idiskl=.false. 107 else 108 idiskl=.true. 109 end if 110c 111 parallel = .true. 112c 113c 114 max_size_temp=imaxsize**4 115c 116c 117 do ii=1,1000 118 tot_zone(ii)=0.0d0 119 enddo 120 if(atpart.gt.1000) 121 & call errquit('tce_zones: atpart too big',1,MA_ERR) 122 sumx=0 123 do ii=1,atpart 124 tot_zone(ii)=sumx 125 sumx=sumx+nalength(ii) 126 enddo 127c 128 nodezero=(ga_nodeid().eq.0) 129c 130c 131c this module is called only if intorb = .true. 132c N is the number of correlated orbitals 133 N = nmo(1) - nfc(1) - nfv(1) 134 iha = nocc(1)-nfc(1) 135 ihb = nocc(ipol)-nfc(ipol) 136 ipa = nmo(1)-nocc(1)-nfv(1) 137 ipb = nmo(ipol)-nocc(ipol)-nfv(ipol) 138c 139 sf_chunk=(imaxsize+10)**4 140 sf2a2m_chunk=((tile_dim)**2)*((imaxsize+10)**2) 141c 142 call tce_4a_offset(l_4af_offset,k_4af_offset,size_4af) 143 call tce_2a2m_offset(l_2a2m_offset,k_2a2m_offset,size_2a2mf) 144c 145c 146c 147 if(nodezero) then 148 write(6,*)'INTERMEDIATE FILES' 149 write(6,*)'size_4af',size_4af 150 write(6,*)'size_2a2mf',size_2a2mf 151c write(6,*)'bytes=',bytes 152c write(6,*)'dfloat(bytes)=',dfloat(bytes) 153c write(6,*)'dfloat(bytes)*dfloat(size_4af)', 154c & dfloat(bytes)*dfloat(size_4af) 155c write(6,*)'dfloat(bytes)*dfloat(size_2a2mf)', 156c & dfloat(bytes)*dfloat(size_2a2mf) 157c write(6,*)' ' 158 end if 159c 160c 161c 162 if(idiskl) then 163 if(.not.parallel) 164 1 call errquit('sf only for parallel runs',1,DISK_ERR) 165 if(parallel) call ga_sync() 166 call util_file_name('4aintx',.false.,.false.,filename) 167 if (sf_create(filename,dfloat(bytes)*dfloat(size_4af), 168 1 dfloat(bytes)*dfloat(size_4af),sf_chunk,d_4af).ne.0) 169 2 call errquit('4-index: sf problem',0,DISK_ERR) 170 call util_file_name('2a2m',.false.,.false.,filename) 171 if (sf_create(filename,dfloat(bytes)*dfloat(size_2a2mf), 172 1 dfloat(bytes)*dfloat(size_2a2mf),sf2a2m_chunk,d_2a2m).ne.0) 173 2 call errquit('4-index: sf problem',0,DISK_ERR) 174 else 175 call createfile(filename,d_4af,size_4af) 176 call reconcilefile(d_4af,size_4af) 177 call ga_zero(d_4af) 178 call createfile(filename,d_2a2m,size_2a2mf) 179 call reconcilefile(d_2a2m,size_2a2mf) 180 call ga_zero(d_2a2m) 181 end if 182c 183c 184c Pair's structure of the integral file 185 call tce_mo2e_pairs(l_gpair,k_gpair,len_pair) 186 len_pair = int_mb(k_gpair) 187c 188c 189c alpha orbitals only 190c 191 if (.not.ma_push_get(mt_dbl,nbf*(iha+ipa) 192 1 ,"sorted MO coeffs", 193 2 l_movecs_orb,k_movecs_orb)) 194 3 call errquit('tce_mo2e_zone: MA problem 1',0, 195 2 BASIS_ERR) 196 call dfill(nbf*(iha+ipa),0.0d0, dbl_mb(k_movecs_orb), 1) 197 do i=1,iha 198 do isigma1=1,nbf 199 dbl_mb(k_movecs_orb+(i-1)*nbf+isigma1-1)= 200 & dbl_mb(k_movecs_sorted+(i-1)*nbf+isigma1-1) 201 enddo 202 enddo 203 do i=iha+1,iha+ipa 204 do isigma1=1,nbf 205 dbl_mb(k_movecs_orb+(i-1)*nbf+isigma1-1)= 206 & dbl_mb(k_movecs_sorted+(i+ihb-1)*nbf+isigma1-1) 207 enddo 208 enddo 209c 210c 211 call int_mem_2e4c(work1,work2) 212 if (.not.ma_push_get(mt_dbl,work1,'work1',l_work1,k_work1)) 213 1 call errquit('tce_ao2e: MA problem work1',0,MA_ERR) 214 if (.not.ma_push_get(mt_dbl,work2,'work2',l_work2,k_work2)) 215 1 call errquit('tce_ao2e: MA problem work2',1,MA_ERR) 216c 217c 218c 219c 4af file formed here 220c 221c 222 cpu1 = - util_cpusec() 223 wall1 = - util_wallsec() 224c 225 nprocs = GA_NNODES() 226 count = 0 227 next = NXTASK(nprocs, 1) 228 DO azone1 = 1,atpart !nu 229 DO azone2 = azone1,atpart !mu 230 DO azone3 = 1,atpart !sigma 231 DO azone4 = azone3,atpart !rho 232 IF (next.eq.count) THEN 233c --------------------------- 234 size_4a = nalength(azone1)*nalength(azone2)* 235 1 nalength(azone3)*nalength(azone4) 236c *** debug *** 237c write(6,555) ga_nodeid(),azone1,azone2,azone3,azone4,size_4a 238c call util_flush(6) 239c cpu1 = - util_cpusec() 240c wall1 = - util_wallsec() 241c ************* 242 if(.not.ma_push_get(mt_dbl,size_4a,'4a',l_4a,k_4a)) 243 1 call errquit('tce_4af_zones1: MA problem',0,MA_ERR) 244 call dfill(size_4a, 0.0d0, dbl_mb(k_4a), 1) 245 shift_mu = 0 246 do mu = a2length(azone2)+1,a2length(azone2+1) 247 if (.not.bas_cn2bfr(ao_bas_han,mu,mu_lo,mu_hi)) 248 1 call errquit('tce_ao2e: basis fn range problem 1',0, 249 2 BASIS_ERR) 250 mu_range = mu_hi - mu_lo + 1 251 shift_nu = 0 252 do nu = a2length(azone1)+1,a2length(azone1+1) 253 if (.not.bas_cn2bfr(ao_bas_han,nu,nu_lo,nu_hi)) 254 1 call errquit('tce_ao2e: basis fn range problem 1',0, 255 2 BASIS_ERR) 256 nu_range = nu_hi - nu_lo + 1 257 shift_rho = 0 258 do rho = a2length(azone4)+1,a2length(azone4+1) 259 if (.not.bas_cn2bfr(ao_bas_han,rho,rho_lo,rho_hi)) 260 1 call errquit('tce_ao2e: basis fn range problem 1',0, 261 2 BASIS_ERR) 262 rho_range = rho_hi - rho_lo + 1 263 shift_sigma = 0 264 do sigma = a2length(azone3)+1,a2length(azone3+1) 265 if (.not.bas_cn2bfr(ao_bas_han,sigma,sigma_lo,sigma_hi)) 266 1 call errquit('tce_ao2e: basis fn range problem 1',0, 267 2 BASIS_ERR) 268 sigma_range = sigma_hi - sigma_lo + 1 269 if (schwarz_shell(rho,sigma)*schwarz_shell(mu,nu) 270 1 .ge. tol2e) then 271cccx call dfill(work1, 0.0d0, dbl_mb(k_work1), 1) 272cccx call dfill(work2, 0.0d0, dbl_mb(k_work2), 1) 273c *** debug *** 274c cpu2 = - util_cpusec() 275c wall2 = - util_wallsec() 276c ************************************** 277 call int_2e4c(ao_bas_han,mu,nu,ao_bas_han,rho,sigma, 278 1 work2,dbl_mb(k_work2),work1,dbl_mb(k_work1)) 279 i=0 280 do mu1 = 1,mu_range 281 do nu1 = 1,nu_range 282 do rho1 = 1,rho_range 283 do sigma1 = 1,sigma_range 284 i=i+1 285 inu1=nu1+shift_nu 286 isigma1=sigma1+shift_sigma 287 imu1=mu1+shift_mu 288 irho1=rho1+shift_rho 289c (isigma1,irho1|inu1, imu1) 290 ipos1=(((imu1-1)*nalength(azone1)+inu1-1)* 291 1 nalength(azone4)+irho1-1)*nalength(azone3) 292 2 +isigma1 293 dbl_mb(k_4a+ipos1-1)=dbl_mb(k_work1+i-1) 294 enddo 295 enddo 296 enddo 297 enddo 298c **** debug **** 299c cpu3 = cpu3 + util_cpusec() 300c wall3 = wall3 + util_wallsec() 301c xtot2=xtot2+cpu3 302c *************************** 303 end if !schwarz screening 304 shift_sigma = shift_sigma + sigma_range 305 enddo !sigma 306 shift_rho = shift_rho + rho_range 307 enddo !rho 308 shift_nu = shift_nu + nu_range 309 enddo !nu 310 shift_mu = shift_mu + mu_range 311 enddo !mu 312c *** debug **** 313c cpu1 = cpu1 + util_cpusec() 314c wall1 = wall1 + util_wallsec() 315c write(6,556) ga_nodeid(),azone1,azone2,azone3,azone4,cpu1,wall1 316c call util_flush(6) 317c ************** 318c 319c fixing offsets and sf_writing 320 key_4af=azone4 - 1 + atpart * (azone3 - 1 + 321 & atpart * (azone2 - 1 + atpart * (azone1 - 1))) 322 call tce_hash(int_mb(k_4af_offset),key_4af,offset_4af) 323 if(idiskl) then 324 if (sf_write(d_4af,dfloat(bytes)*dfloat(offset_4af), 325 1 dfloat(bytes)*dfloat(size_4a),dbl_mb(k_4a),request).ne.0) 326 1 THEN 327 write(6,8155)ga_nodeid(),key_4af,offset_4af,size_4a 328 call errquit('zones put: sf problem2',1,DISK_ERR) 329 end if 330 if (sf_wait(request).ne.0) 331 1 call errquit('zones put: sf problem3',2,DISK_ERR) 332 else 333 call ga_put(d_4af,offset_4af+1,offset_4af+size_4a,1,1, 334 1 dbl_mb(k_4a),size_4a) 335 end if 336c closing l_4a file 337 if (.not.ma_pop_stack(l_4a)) 338 1 call errquit('tce_mo2e_4af2: l_4a',15,MA_ERR) 339c --------------------------- 340 next = NXTASK(nprocs, 1) 341 END IF 342 count = count + 1 343 ENDDO !azone4 344 ENDDO !azone3 345 ENDDO !azone2 346 ENDDO !azone1 347c 348c 349c 350c 351c 352c 353c 354c 355 call ga_sync() 356 next = nxtask(-nprocs,1) 357c 358c 359c 360c 361c *** debug ********** 362c write(6,*)'first part OK',ga_nodeid() 363c call util_flush(6) 364c ******************** 365c 366c 367c 368c 369 nprocs = GA_NNODES() 370 count = 0 371 next = NXTASK(nprocs, 1) 372 DO g3b = 1,noa+nva !k 373 DO g4b = g3b,noa+nva !l 374 DO azone1 = 1,atpart !nu 375 DO azone2 = azone1,atpart !mu 376 IF (next.eq.count) THEN 377c 378c *** debug *** 379c write(6,777)ga_nodeid(),g3b,g4b,azone1,azone2 380c call util_flush(6) 381c ************* 382 cpu1 = - util_cpusec() 383 wall1 = - util_wallsec() 384c 385 size_2g2a=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1) 386 1 *nalength(azone1)*nalength(azone2) 387 if (.not.ma_push_get(mt_dbl,size_2g2a,'2g2a',l_2g2a,k_2g2a)) 388 1 call errquit('tce_r2_divide1: MA problem',0,MA_ERR) 389 call dfill(size_2g2a, 0.0d0, dbl_mb(k_2g2a), 1) 390c 391 DO azone3 = 1,atpart !sigma 392 DO azone4 = azone3,atpart !rho 393 size_4a = nalength(azone1)*nalength(azone2)* 394 1 nalength(azone3)*nalength(azone4) 395 if(.not.ma_push_get(mt_dbl,size_4a,'4a',l_4a,k_4a)) 396 1 call errquit('tce_r2_divide2: MA problem',0,MA_ERR) 397cccx call dfill(size_4a, 0.0d0, dbl_mb(k_4a), 1) 398c 399c 400 key_4af=azone4 - 1 + atpart * (azone3 - 1 + 401 & atpart * (azone2 - 1 + atpart * (azone1 - 1))) 402 call tce_hash(int_mb(k_4af_offset),key_4af,offset_4af) 403 if(idiskl) then 404 if (sf_read(d_4af,dfloat(bytes)*dfloat(offset_4af), 405 1 dfloat(bytes)*dfloat(size_4a),dbl_mb(k_4a),request).ne.0) 406 1 call errquit('zones get2: sf problem',1,DISK_ERR) 407 if (sf_wait(request).ne.0) 408 1 call errquit('zones get3: sf problem',2,DISK_ERR) 409 else 410 call ga_get(d_4af,offset_4af+1,offset_4af+size_4a,1,1, 411 1 dbl_mb(k_4a),size_4a) 412 end if 413c 414c 415c 416c 417c now forming auxiliary matrix of orbital coeff. 418c (g4 g3 | sigma rho) = C(g4 simga)*C(g3 rho) 419 size_2g2z=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1) 420 1 *nalength(azone3)*nalength(azone4) 421 if (.not.ma_push_get(mt_dbl,size_2g2z,'2g2z',l_2g2z,k_2g2z)) 422 1 call errquit('tce_r2_divide3: MA problem',0,MA_ERR) 423ccxkk call dfill(size_2g2z, 0.0d0, dbl_mb(k_2g2z), 1) 424 425 if(azone3.ne.azone4) then 426 i = 0 427 tot_azone3_sh=tot_zone(azone3) 428 tot_azone4_sh=tot_zone(azone4) 429 do irho1 = 1,nalength(azone4) 430 do isigma1 = 1,nalength(azone3) 431 do igi3 = 1,int_mb(k_range_alpha+g3b-1) 432 do igi4 = 1,int_mb(k_range_alpha+g4b-1) 433 i = i+1 434 ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone3_sh 435 & +isigma1 436 ipos2=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone4_sh 437 & +irho1 438 ipos3=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone4_sh 439 & +irho1 440 ipos4=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone3_sh 441 & +isigma1 442 dbl_mb(k_2g2z+i-1)=dbl_mb(k_movecs_orb+ipos1-1)* 443 & dbl_mb(k_movecs_orb+ipos2-1) 444 & +dbl_mb(k_movecs_orb+ipos3-1)* 445 & dbl_mb(k_movecs_orb+ipos4-1) 446 enddo 447 enddo 448 enddo 449 enddo 450 end if !azone3.ne.azone4 451 if(azone3.eq.azone4) then 452 i = 0 453 tot_azone3_sh=tot_zone(azone3) 454 tot_azone4_sh=tot_zone(azone4) 455 do irho1 = 1,nalength(azone4) 456 do isigma1 = 1,nalength(azone3) 457 do igi3 = 1,int_mb(k_range_alpha+g3b-1) 458 do igi4 = 1,int_mb(k_range_alpha+g4b-1) 459 i = i+1 460 ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone3_sh 461 & +isigma1 462 ipos2=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone4_sh 463 & +irho1 464 dbl_mb(k_2g2z+i-1)=dbl_mb(k_movecs_orb+ipos1-1)* 465 & dbl_mb(k_movecs_orb+ipos2-1) 466 enddo 467 enddo 468 enddo 469 enddo 470 end if !azone3.eq.azone4 471c now: dgemming 472 call dgemm('N','N', 473 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1), !m 474 1 nalength(azone1)*nalength(azone2), !n 475 1 nalength(azone3)*nalength(azone4), !k 476 1 1.0d0, 477 1 dbl_mb(k_2g2z), !A 478 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1), !lda 479 1 dbl_mb(k_4a),nalength(azone3)*nalength(azone4), ! B,ldb 480 1 1.0d0,dbl_mb(k_2g2a), 481 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)) 482c 483c 484 if (.not.ma_pop_stack(l_2g2z)) 485 1 call errquit('tce_mo2e_trans_zones: l_2g2z',15,MA_ERR) 486c 487 if (.not.ma_pop_stack(l_4a)) 488 1 call errquit('tce_mo2e_trans_zones: l_4a',15,MA_ERR) 489 ENDDO !azone4 490c one step ahead 491 ENDDO !azone3 492c 493 key_2a2m=azone2 - 1 + atpart * (azone1 - 1 + 494 & atpart * (g4b - 1 + (noa+nva) * (g3b - 1))) 495 call tce_hash(int_mb(k_2a2m_offset),key_2a2m,offset_2a2m) 496 if(idiskl) then 497 if (sf_write(d_2a2m,dfloat(bytes)*dfloat(offset_2a2m), 498 1 dfloat(bytes)*dfloat(size_2g2a),dbl_mb(k_2g2a),request).ne.0) 499 2 call errquit('zones put: sf problem21',1,DISK_ERR) 500 if (sf_wait(request).ne.0) 501 1 call errquit('zones put: sf problem31',2,DISK_ERR) 502 else 503 call ga_put(d_2a2m,offset_2a2m+1,offset_2a2m+size_2g2a,1,1, 504 1 dbl_mb(k_2g2a),size_2g2a) 505 end if 506c 507 if (.not.ma_pop_stack(l_2g2a)) 508 1 call errquit('tce_mo2e_trans_zones: MA problem',15,MA_ERR) 509c 510 next = NXTASK(nprocs, 1) 511 END IF 512 count = count + 1 513 ENDDO !azone2 514 ENDDO !azone1 515 ENDDO !g4b 516 ENDDO !g3b 517c 518c 519c 520c 521 call ga_sync() 522 next = nxtask(-nprocs,1) 523c 524c 525c 526c 527c 528c 529c 530 nprocs = GA_NNODES() 531 count = 0 532 next = NXTASK(nprocs, 1) 533 DO g3b = 1,noa+nva !k 534 DO g4b = g3b,noa+nva !l 535c 536c write(6,*)'g3b-g4b',g3b,g4b,ga_nodeid() 537c call util_flush(6) 538c 539c g2 g1 do loops 540c open (g4 g3 | all symmetry allowed g2 g1) - equally good we can split it into 541c pieces 542c (k,l|i,j) pieces 543c 544c calculate point_pair index here 545c 546 ixi=noa+nva-g3b+1 547 jxi=noa+nva-g4b+1 548 point_pair=((noa+nva)*(noa+nva+1))/2-((ixi-1)*ixi)/2-jxi+1 549 size_stripe=int_mb(k_gpair+point_pair) 550 xoffset_p34 = int_mb(k_gpair+len_pair+point_pair) 551c 552c offset for blocking the (nu mu | g2 g1 ) = C*C matrix 553c 554 do i=1,1000 555 xoffset_g12(i) = 0 556 pointer_g12(i) = 0 557 xoffset_size_p1p2(i) = 0 558 enddo 559 imaxp12=0 560cccx size_temp=0 561 size_temp_4g=0 562 xoffset_temp=0 563 xoffset_temp_4g=0 564 size_p1p2=0 565 i=1 566 iguru=0 567 DO g1b = 1,noa+nva !l 568 DO g2b = g1b,noa+nva !k 569 iguru=iguru+1 570 IF (int_mb(k_spin_alpha+g3b-1)+int_mb(k_spin_alpha+g4b-1).eq. 571 & int_mb(k_spin_alpha+g1b-1)+int_mb(k_spin_alpha+g2b-1)) THEN 572 IF (ieor(int_mb(k_sym_alpha+g3b-1),ieor(int_mb(k_sym_alpha+g4b-1) 573 & ,ieor(int_mb(k_sym_alpha+g1b-1),int_mb(k_sym_alpha+g2b-1)))) .eq. 574 & irrep_v) THEN 575 IROW=INDEX_PAIR(g4b,g3b) 576 ICOL=INDEX_PAIR(g2b,g1b) 577 IF(IROW.GE.ICOL) THEN 578 size_temp_4g=size_temp_4g+int_mb(k_range_alpha+g2b-1) 579 1 *int_mb(k_range_alpha+g1b-1) 580 2 *int_mb(k_range_alpha+g3b-1) 581 3 *int_mb(k_range_alpha+g4b-1) 582 size_p1p2=size_p1p2+int_mb(k_range_alpha+g2b-1) 583 1 *int_mb(k_range_alpha+g1b-1) 584 if(size_temp_4g.gt.max_size_temp) then 585 xoffset_g12(i)=xoffset_temp_4g 586 xoffset_size_p1p2(i)=size_p1p2 587 pointer_g12(i)=iguru 588 xoffset_temp_4g=xoffset_temp_4g+size_temp_4g 589 size_temp_4g=0 590 size_p1p2=0 591 imaxp12=i 592 i=i+1 593 end if 594 END IF 595 END IF 596 END IF 597 ENDDO 598 ENDDO 599c 600 if(size_temp_4g.ne.0) then 601 xoffset_g12(i)=xoffset_temp_4g 602 xoffset_size_p1p2(i)=size_p1p2 603 pointer_g12(i)=iguru 604 imaxp12=i 605 end if 606c 607 if(i.gt.1000) 608 1 call errquit('tce_zone: xoffset-size-problem',0,MA_ERR) 609c 610c 611 do i =1, imaxp12 612c 613 IF (next.eq.count) THEN ! parallel job here 614c 615 if(i.eq.1) then 616 ilogu=1 617 else 618 ilogu=pointer_g12(i-1)+1 619 end if 620 ihigu=pointer_g12(i) 621 size_g12p=int_mb(k_range_alpha+g4b-1)* 622 1 int_mb(k_range_alpha+g3b-1)* 623 1 xoffset_size_p1p2(i) 624 if (.not.ma_push_get(mt_dbl,size_g12p,'g12piece', 625 1 l_g12piece,k_g12piece)) 626 1 call errquit('tce_zone: MA g12-piece ',0,MA_ERR) 627 call dfill(size_g12p, 0.0d0, dbl_mb(k_g12piece), 1) 628c 629c ------------------------- 630c azone1 azone2 loop -starts here 631c 632 DO azone1 = 1,atpart !nu 633 DO azone2 = azone1,atpart !mu 634 size_2g2a=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1) 635 1 *nalength(azone1)*nalength(azone2) 636 if (.not.ma_push_get(mt_dbl,size_2g2a,'2g2a',l_2g2a,k_2g2a)) 637 1 call errquit('tce_r2_divide1: MA problem',0,MA_ERR) 638c fixing offsets and sf_writing 639 key_2a2m=azone2 - 1 + atpart * (azone1 - 1 + 640 & atpart * (g4b - 1 + (noa+nva) * (g3b - 1))) 641 call tce_hash(int_mb(k_2a2m_offset),key_2a2m,offset_2a2m) 642 if(idiskl) then 643 if (sf_read(d_2a2m,dfloat(bytes)*dfloat(offset_2a2m), 644 1 dfloat(bytes)*dfloat(size_2g2a),dbl_mb(k_2g2a),request).ne.0) 645 2 call errquit('zones put: sf problem22',1,DISK_ERR) 646 if (sf_wait(request).ne.0) 647 1 call errquit('zones put: sf problem32',2,DISK_ERR) 648 else 649 call ga_get(d_2a2m,offset_2a2m+1,offset_2a2m+size_2g2a,1,1, 650 1 dbl_mb(k_2g2a),size_2g2a) 651 end if 652c -------------------------- 653 istart=0 654 iguru=0 655 slice_offset=0 656 DO g1b = 1,noa+nva !l 657 DO g2b = g1b,noa+nva !k 658 iguru=iguru+1 659 IF (int_mb(k_spin_alpha+g3b-1)+int_mb(k_spin_alpha+g4b-1).eq. 660 & int_mb(k_spin_alpha+g1b-1)+int_mb(k_spin_alpha+g2b-1)) THEN 661 IF (ieor(int_mb(k_sym_alpha+g3b-1),ieor(int_mb(k_sym_alpha+g4b-1) 662 & ,ieor(int_mb(k_sym_alpha+g1b-1),int_mb(k_sym_alpha+g2b-1)))) .eq. 663 & irrep_v) THEN 664 IROW=INDEX_PAIR(g4b,g3b) 665 ICOL=INDEX_PAIR(g2b,g1b) 666 IF(IROW.GE.ICOL) THEN 667c l_g4321 ==> ([g4][g3]|[g2][g1]) 668c now forming auxiliary matrix of orbital coeff. 669c ( nu mu | g2 g1 ) = C(nu g2) C(mu g1) 670c 671c !--------------------------------------------------------------------- 672 IF((iguru.ge.ilogu).and.(iguru.le.ihigu)) THEN 673c otworz tutaj C(nu,mu,g2,g1) 674 if (.not.ma_push_get(mt_dbl,nalength(azone1)*nalength(azone2)* 675 1 int_mb(k_range_alpha+g2b-1)*int_mb(k_range_alpha+g1b-1), 676 1 '2g2z',l_2g2z,k_2g2z)) 677 1 call errquit('tce_zone: MA problem 2g2z-b',0,MA_ERR) 678 if(azone1.ne.azone2) then 679 ii = 0 680 tot_azone1_sh=tot_zone(azone1) 681 tot_azone2_sh=tot_zone(azone2) 682 do igi1 = 1,int_mb(k_range_alpha+g1b-1) 683 do igi2 = 1,int_mb(k_range_alpha+g2b-1) 684 do imu1 = 1,nalength(azone2) 685 do inu1 = 1,nalength(azone1) 686 ii = ii+1 687 ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone1_sh 688 & +inu1 689 ipos2=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone2_sh 690 & +imu1 691 ipos3=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone2_sh 692 & +imu1 693 ipos4=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone1_sh 694 & +inu1 695 dbl_mb(k_2g2z+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)* 696 & dbl_mb(k_movecs_orb+ipos2-1) 697 & +dbl_mb(k_movecs_orb+ipos3-1)* 698 & dbl_mb(k_movecs_orb+ipos4-1) 699 enddo 700 enddo 701 enddo 702 enddo 703c offset_2g2z=offset_2g2z+int_mb(k_range_alpha+g2b-1) 704c 1 *int_mb(k_range_alpha+g1b-1) 705c 1 *nalength(azone1)*nalength(azone2) 706 end if !azone1.ne.azone2 707 if(azone1.eq.azone2) then 708 ii = 0 709 tot_azone1_sh=tot_zone(azone1) 710 tot_azone2_sh=tot_zone(azone2) 711 do igi1 = 1,int_mb(k_range_alpha+g1b-1) 712 do igi2 = 1,int_mb(k_range_alpha+g2b-1) 713 do imu1 = 1,nalength(azone2) 714 do inu1 = 1,nalength(azone1) 715 ii = ii+1 716 ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone1_sh 717 & +inu1 718 ipos2=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone2_sh 719 & +imu1 720 dbl_mb(k_2g2z+ii-1)=dbl_mb(k_movecs_orb+ipos1-1)* 721 & dbl_mb(k_movecs_orb+ipos2-1) 722 enddo 723 enddo 724 enddo 725 enddo 726c offset_2g2z=offset_2g2z+int_mb(k_range_alpha+g2b-1) 727c 1 *int_mb(k_range_alpha+g1b-1) 728c 1 *nalength(azone1)*nalength(azone2) 729 end if !azone1.eq.azone2 730c !--------------------------------------------------------------------- 731c 732c 733 call dgemm('N','N', 734 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1), !m 735 1 int_mb(k_range_alpha+g2b-1)*int_mb(k_range_alpha+g1b-1), !n 736 3 nalength(azone1)*nalength(azone2), !k 737 4 1.0d0,dbl_mb(k_2g2a), 738 5 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1), 739 6 dbl_mb(k_2g2z),nalength(azone1)*nalength(azone2), 740 7 1.0d0,dbl_mb(k_g12piece+slice_offset), 741 8 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)) 742c 743 slice_offset=slice_offset+ 744 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 745 1 int_mb(k_range_alpha+g1b-1)*int_mb(k_range_alpha+g2b-1) 746c 747 if (.not.ma_pop_stack(l_2g2z)) 748 1 call errquit('tce_mo2e_trans_zones: l_2g2z',15,MA_ERR) 749c 750 END IF ! iguru 751 END IF ! irow.gt.icol 752 END IF ! symmetry 753 END IF ! spin 754 ENDDO ! g2b-loop ends up here 755 ENDDO ! g1b-loop ends up here 756c 757c 758c azone1 azone2 loop ends up here 759c 760 if (.not.ma_pop_stack(l_2g2a)) 761 1 call errquit('tce_mo2e_trans_zones: MA problem',15,MA_ERR) 762c 763 ENDDO !azone2 764 ENDDO !azone1 765c now put here put_block 766 call put_block(d_v2,dbl_mb(k_g12piece),size_g12p, 767 & xoffset_p34+xoffset_g12(i)) 768 istart=0 769c 770cccx if (.not.ma_pop_stack(l_2g2z)) 771cccx 1 call errquit('tce_mo2e_trans_zones: l_2g2z',15,MA_ERR) 772 if (.not.ma_pop_stack(l_g12piece)) 773 1 call errquit('tce_mo2e_trans_zones: l_g12piece',15,MA_ERR) 774 775 next = NXTASK(nprocs, 1) 776 END IF 777 count = count + 1 778c 779 ENDDO ! i = 1,imaxp12 780c 781c 782c 783cccx next = NXTASK(nprocs, 1) 784cccx END IF 785cccx count = count + 1 786 ENDDO !g4b 787 ENDDO !g3b 788c 789c 790c 791c 792c 793 call ga_sync() 794 next = nxtask(-nprocs,1) 795c 796c 797c if(nodezero) then 798c write(6,*)'DONE --- DONE ---- DONE ---- DONE' 799c end if 800c 801 if (.not.ma_pop_stack(l_work2)) 802 1 call errquit('tce_ao2e: MA problem',14,MA_ERR) 803 if (.not.ma_pop_stack(l_work1)) 804 1 call errquit('tce_ao2e: MA problem',15,MA_ERR) 805c 806 if (.not.ma_pop_stack(l_movecs_orb)) 807 1 call errquit('tce_ao2e: MA problem',15,MA_ERR) 808c 809 if (.not.ma_pop_stack(l_gpair)) 810 1 call errquit('tce_ao2e: MA problem',15,MA_ERR) 811c 812c 813c 814 if(idiskl) then 815c if (.not.sf_destroy(d_2a2m)) 816c 1 call errquit('tce_sf_destroy22: sf problem',15,MA_ERR) 817c if (parallel) call ga_sync() 818c if (.not.sf_destroy(d_4af)) 819c 1 call errquit('tce_sf_destroy33: sf problem',15,MA_ERR) 820c if (parallel) call ga_sync() 821 else 822 call deletefile(d_2a2m) 823 call ga_sync() 824 call deletefile(d_4af) 825 call ga_sync() 826 end if 827c 828c 829 if (.not.ma_pop_stack(l_2a2m_offset)) 830 1 call errquit('tce_ao2e_x: MA problem',15,MA_ERR) 831c 832 if (.not.ma_pop_stack(l_4af_offset)) 833 1 call errquit('tce_ao2e_y: MA problem',15,MA_ERR) 834c 835 call ga_sync() 836c *** debug *** 837c 800 format('DGEMM1 MAX',i5,2x,3f15.5) 838c 801 format('DGEMM2 ',i5,2x,3f15.5) 839 8155 format('FU',i6,2x,3i20) 840 9000 format('PART1',i4,1x,'Cpu wall ',2(f17.12,1x),3x,'g4b g3b',2i5) 841c 9001 format('PART2',i4,1x,'Cpu wall ',2(f17.12,1x),3x,'g4b g3b',2i5) 842c 9003 format('PART1-4a',i4,1x,'Cpu wall ',2(f17.12,1x)) 843c 9004 format('PART1-2g2z',i4,1x,'Cpu wall ',2(f17.12,1x)) 844c 9005 format('PART1-dgemm',i4,1x,'Cpu wall ',2(f17.12,1x)) 845c 9010 format(' P1-mnrs',i3,1x,2i5,1x,2i5,1x,'Cpu wall ',2(f17.12,1x)) 846 555 format('atom loop ',2x,i5,3x,2i5,3x,2i5,i12) 847 556 format('atom time',2x,i5,3x,2i5,3x,2i5,'Cpu wall ',2(f12.7,1x)) 848 777 format('main do loop ',2x,i5,3x,2i5,3x,2i5) 849 775 format('main loop step1 ',2x,i5,3x,2i5,3x,2i5) 850 776 format('main loop step2 ',2x,i5,3x,2i5,3x,2i5) 851 778 format('PART1',2x,i5,3x,2i5,3x,2i5,2x,'Cpu wall ',2(f17.12,1x)) 852 779 format('PART2',2x,i5,3x,2i5,3x,2i5,2x,'Cpu wall ',2(f17.12,1x)) 853 780 format('ADD BLOCK',2x,i5,3x,2i5,3x,2i5,2x,'Cpu wall ', 854 & 2(f17.12,1x)) 855c ************* 856c 857 RETURN 858 END 859c 860c 861c 862 SUBROUTINE tce_4a_offset(l_a_offset,k_a_offset,size) 863C $Id$ 864C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 865C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 866 IMPLICIT NONE 867#include "global.fh" 868#include "mafdecls.fh" 869#include "sym.fh" 870#include "errquit.fh" 871#include "tce.fh" 872#include "tce_main.fh" 873 INTEGER l_a_offset 874 INTEGER k_a_offset 875 INTEGER size 876 INTEGER length 877 INTEGER addr 878 INTEGER mu 879 INTEGER nu 880 INTEGER rho 881 INTEGER sigma 882 INTEGER azone1,azone2,azone3,azone4 883 length = 0 884 DO azone1 = 1,atpart !nu 885 DO azone2 = azone1,atpart !mu 886 DO azone3 = 1,atpart !sigma 887 DO azone4 = azone3,atpart !rho 888 length = length + 1 889 END DO 890 END DO 891 END DO 892 END DO 893 IF (.not.MA_PUSH_GET(mt_int,2*length+1,'noname',l_a_offset,k_a_off 894 &set)) CALL ERRQUIT('tce_t2_offset',0,MA_ERR) 895 int_mb(k_a_offset) = length 896 addr = 0 897 size = 0 898 DO azone1 = 1,atpart !nu 899 DO azone2 = azone1,atpart !mu 900 DO azone3 = 1,atpart !sigma 901 DO azone4 = azone3,atpart !rho 902 addr = addr + 1 903 int_mb(k_a_offset+addr) = azone4 - 1 + atpart * (azone3 - 1 + 904 & atpart * (azone2 - 1 + atpart * (azone1 - 1))) 905 int_mb(k_a_offset+length+addr) = size 906 size = size + nalength(azone1) * nalength(azone2) * 907 & nalength(azone3) * nalength(azone4) 908 END DO 909 END DO 910 END DO 911 END DO 912 RETURN 913 END 914c 915c 916c 917c 918 SUBROUTINE tce_2a2m_offset(l_a_offset,k_a_offset,size) 919C $Id$ 920C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 921C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 922 IMPLICIT NONE 923#include "global.fh" 924#include "mafdecls.fh" 925#include "sym.fh" 926#include "errquit.fh" 927#include "tce.fh" 928#include "tce_main.fh" 929 INTEGER l_a_offset 930 INTEGER k_a_offset 931 INTEGER size 932 INTEGER length 933 INTEGER addr 934 INTEGER g3b,g4b 935 INTEGER azone1,azone2 936 length = 0 937 DO g3b = 1,noa+nva !k 938 DO g4b = g3b,noa+nva !l 939 DO azone1 = 1,atpart !nu 940 DO azone2 = azone1,atpart !mu 941 length = length + 1 942 END DO 943 END DO 944 END DO 945 END DO 946 IF (.not.MA_PUSH_GET(mt_int,2*length+1,'noname',l_a_offset,k_a_off 947 &set)) CALL ERRQUIT('tce_t2_offset',0,MA_ERR) 948 int_mb(k_a_offset) = length 949 addr = 0 950 size = 0 951 DO g3b = 1,noa+nva !k 952 DO g4b = g3b,noa+nva !l 953 DO azone1 = 1,atpart !nu 954 DO azone2 = azone1,atpart !mu 955 addr = addr + 1 956 int_mb(k_a_offset+addr) = azone2 - 1 + atpart * (azone1 - 1 + 957 & atpart * (g4b - 1 + (noa+nva) * (g3b - 1))) 958 int_mb(k_a_offset+length+addr) = size 959 size = size + 960 & int_mb(k_range_alpha+g3b-1) * int_mb(k_range_alpha+g4b-1) 961 & * nalength(azone1) * nalength(azone2) 962 END DO 963 END DO 964 END DO 965 END DO 966 RETURN 967 END 968 969