1 SUBROUTINE tce_mo2e_zones_4a_disk_ga_act(rtdb,d_v2, 2 1 kax_v2_alpha_offset, 3 1 size_2e) 4C $Id$ 5C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 6C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) 7C t ( p1 p2 h3 h4 )_t 8 IMPLICIT NONE 9#include "rtdb.fh" 10#include "global.fh" 11#include "mafdecls.fh" 12#include "stdio.fh" 13#include "util.fh" 14#include "bas.fh" 15#include "schwarz.fh" 16#include "sym.fh" 17#include "sf.fh" 18#include "errquit.fh" 19#include "tce.fh" 20#include "tce_main.fh" 21c 22c written by K. Kowalski 23c 24c 25c max. number of p2 groups =200 26c 27c 28 integer rtdb ! Run-time database 29 integer d_v2 ! MO integrals 30 integer kax_v2_alpha_offset ! MO integrals offset 31 integer size_2e ! 2e file size 32c 33 INTEGER size_2g2a,l_2g2a,k_2g2a 34 INTEGER azone1,azone2,azone3,azone4 35 INTEGER g1b,g2b,g3b,g4b 36 INTEGER igi1,igi2,igi3,igi4 37 INTEGER ii,i,j,k,l,N,ipos1,ipos2,ipos3,ipos4 38 INTEGER del1,del2,p1rel,p2rel 39 INTEGER size_4a,l_4a,k_4a 40c 41 integer mu,nu,rho,sigma 42 integer mu_lo,mu_hi 43 integer nu_lo,nu_hi 44 integer rho_lo,rho_hi 45 integer sigma_lo,sigma_hi 46 integer mu_range 47 integer nu_range 48 integer rho_range 49 integer sigma_range 50 integer mu1,nu1,rho1,sigma1 51 integer shift_mu,shift_nu 52 integer shift_rho,shift_sigma 53 integer work1,work2 ! Work array sizes 54 integer l_work1,k_work1 ! Work array 1 55 integer l_work2,k_work2 ! Work array 2 56 integer imu1,inu1,irho1,isigma1 57c 58 integer l_movecs_orb,k_movecs_orb 59 integer l_gpair,k_gpair 60 integer len_pair,g12_shift 61c ATTENTION,ACHTUNG,UWAGA 2000 - max # of CPU 62c 63 integer size_2g2z,l_2g2z,k_2g2z 64 integer tot_azone1_sh,tot_azone2_sh 65 integer tot_azone3_sh,tot_azone4_sh 66 integer ixi,jxi,point_pair 67 integer size_stripe,l_p34,k_p34 68 integer addr,xoffset_34 69 integer size_g3g4,xoffset_p34 70 integer size_g4321,k_g4321,l_g4321 71c 72 integer l_integral,l_coeff 73 integer k_integral,k_coeff 74 integer size_ic,size_icc,size_integral,size_coeff,max_na 75 integer l_aux,k_aux,size_aux,iend 76c 77 integer iha,ihb !number of corr. alpha, beta holes 78 integer ipa,ipb !number of corr. alpha, beta particles 79 integer INDEX_PAIR,icol,irow 80c compression 81 integer xoffset_g12(20000),xoffset_size(20000) 82 integer xoffset_size_p1p2(20000) 83 integer key_start(20000),key_end(20000) 84 integer size_temp,size_temp_4g,xoffset_temp_4g,size_p1p2 85 integer max_size_temp,xoffset_temp,iclose,iopen,offset_2g2z 86 integer imaxp12,istart,ibuba,sumx 87 double precision wall,cpu,wall1,cpu1,wall2,cpu2,wall3,cpu3 88c *** debug *** 89 double precision xtot1,xtot2,xtot3 90c ************* 91 double precision tot_zone(20000) 92 integer l_g12piece,k_g12piece,size_g12p 93c - exascale --- 94 LOGICAL is_active_4_o 95c -------------- 96 integer l_4af_offset,k_4af_offset,d_4af 97 integer sf_chunk,request 98 integer key_4af,offset_4af,size_4af 99 character*255 filename 100c 101 logical parallel 102c 103 INTEGER length 104 INTEGER next 105 INTEGER nprocs 106 INTEGER count 107 integer nxtask 108 external nxtask 109 logical nodezero 110 logical idiskl 111c 112c 113c 114c 115cccx parallel = (ga_nnodes().gt.1) 116 parallel = .true. 117c 118 if(idisk.eq.0) then 119 idiskl=.false. 120 else 121 idiskl=.true. 122 end if 123c 124c 125cccx max_size_temp=100000000 126 max_size_temp=imaxsize**4 127c 128c 129c 130 do ii=1,20000 131 tot_zone(ii)=0.0d0 132 enddo 133 if(atpart.gt.20000) 134 & call errquit('tce_zones: atpart too big',1,MA_ERR) 135 sumx=0 136 do ii=1,atpart 137 tot_zone(ii)=sumx 138 sumx=sumx+nalength(ii) 139 enddo 140c 141 nodezero=(ga_nodeid().eq.0) 142c 143c 144c this module is called only if intorb = .true. 145c N is the number of correlated orbitals 146 N = nmo(1) - nfc(1) - nfv(1) 147 iha = nocc(1)-nfc(1) 148 ihb = nocc(ipol)-nfc(ipol) 149 ipa = nmo(1)-nocc(1)-nfv(1) 150 ipb = nmo(ipol)-nocc(ipol)-nfv(ipol) 151c 152c Offset for 4a file 153c 154cc sf_chunk=(imaxsize+10)**4 155 sf_chunk=(imaxsize)**4 156 if(.not.idiskl) then 157c GA is assumed 158 call tce_4a_offset(l_4af_offset,k_4af_offset,size_4af) 159 else 160cccx call tce_filename('4aint',filename) 161 call tce_4a_offset(l_4af_offset,k_4af_offset,size_4af) 162cccx call util_file_name('4aintx',.true.,.false.,filename) 163 call util_file_name('4aintx',.false.,.false.,filename) 164 end if 165 if(idiskl) then 166 if(.not.parallel) 167 1 call errquit('sf only for parallel runs',1,DISK_ERR) 168 if(parallel) call ga_sync() 169 if (sf_create(filename,dble(bytes)*dble(size_4af), 170 1 dble(bytes)*dble(size_4af),sf_chunk,d_4af).ne.0) 171 2 call errquit('4-index: sf problem',0,DISK_ERR) 172ccx if (parallel) then 173ccx if (sf_close(d_4af).ne.0) 174ccx 1 call errquit('createfile: sf problem',1,DISK_ERR) 175ccx call ga_sync() 176ccx end if 177 else 178 call createfile(filename,d_4af,size_4af) 179 call reconcilefile(d_4af,size_4af) 180 call ga_zero(d_4af) 181 end if 182c 183c 184c *** debug *** 185c write(6,*)'step-1' 186c call util_flush(6) 187c ************* 188c 189c Pair's structure of the integral file 190 call tce_mo2e_pairs_act(l_gpair,k_gpair,len_pair) 191 len_pair = int_mb(k_gpair) 192c 193c *** debug *** 194c write(6,*)'step-0' 195c call util_flush(6) 196c ************* 197c 198c alpha orbitals only 199c 200 if (.not.ma_push_get(mt_dbl,nbf*(iha+ipa) 201 1 ,"sorted MO coeffs", 202 2 l_movecs_orb,k_movecs_orb)) 203 3 call errquit('tce_mo2e_zone: MA problem 1',0, 204 2 BASIS_ERR) 205 call dfill(nbf*(iha+ipa),0.0d0, dbl_mb(k_movecs_orb), 1) 206 do i=1,iha 207 do isigma1=1,nbf 208 dbl_mb(k_movecs_orb+(i-1)*nbf+isigma1-1)= 209 & dbl_mb(k_movecs_sorted+(i-1)*nbf+isigma1-1) 210 enddo 211 enddo 212 do i=iha+1,iha+ipa 213 do isigma1=1,nbf 214 dbl_mb(k_movecs_orb+(i-1)*nbf+isigma1-1)= 215 & dbl_mb(k_movecs_sorted+(i+ihb-1)*nbf+isigma1-1) 216 enddo 217 enddo 218c 219c 220 call int_mem_2e4c(work1,work2) 221 if (.not.ma_push_get(mt_dbl,work1,'work1',l_work1,k_work1)) 222 1 call errquit('tce_ao2e: MA problem work1',0,MA_ERR) 223 if (.not.ma_push_get(mt_dbl,work2,'work2',l_work2,k_work2)) 224 1 call errquit('tce_ao2e: MA problem work2',1,MA_ERR) 225c 226c 227c *** debug *** 228c write(6,*)'step1' 229c call util_flush(6) 230c ************* 231c 232c 4af file formed here 233c 234c 235 cpu1 = - util_cpusec() 236 wall1 = - util_wallsec() 237c 238 nprocs = GA_NNODES() 239 count = 0 240 next = NXTASK(nprocs, 1) 241 DO azone1 = 1,atpart !nu 242 DO azone2 = azone1,atpart !mu 243 DO azone3 = 1,atpart !sigma 244 DO azone4 = azone3,atpart !rho 245 IF (next.eq.count) THEN 246c --------------------------- 247 size_4a = nalength(azone1)*nalength(azone2)* 248 1 nalength(azone3)*nalength(azone4) 249 if(.not.ma_push_get(mt_dbl,size_4a,'4a',l_4a,k_4a)) 250 1 call errquit('tce_4af_zones1: MA problem',0,MA_ERR) 251 call dfill(size_4a, 0.0d0, dbl_mb(k_4a), 1) 252 shift_mu = 0 253 do mu = a2length(azone2)+1,a2length(azone2+1) 254 if (.not.bas_cn2bfr(ao_bas_han,mu,mu_lo,mu_hi)) 255 1 call errquit('tce_ao2e: basis fn range problem 1',0, 256 2 BASIS_ERR) 257 mu_range = mu_hi - mu_lo + 1 258 shift_nu = 0 259 do nu = a2length(azone1)+1,a2length(azone1+1) 260 if (.not.bas_cn2bfr(ao_bas_han,nu,nu_lo,nu_hi)) 261 1 call errquit('tce_ao2e: basis fn range problem 1',0, 262 2 BASIS_ERR) 263 nu_range = nu_hi - nu_lo + 1 264 shift_rho = 0 265 do rho = a2length(azone4)+1,a2length(azone4+1) 266 if (.not.bas_cn2bfr(ao_bas_han,rho,rho_lo,rho_hi)) 267 1 call errquit('tce_ao2e: basis fn range problem 1',0, 268 2 BASIS_ERR) 269 rho_range = rho_hi - rho_lo + 1 270 shift_sigma = 0 271 do sigma = a2length(azone3)+1,a2length(azone3+1) 272 if (.not.bas_cn2bfr(ao_bas_han,sigma,sigma_lo,sigma_hi)) 273 1 call errquit('tce_ao2e: basis fn range problem 1',0, 274 2 BASIS_ERR) 275 sigma_range = sigma_hi - sigma_lo + 1 276 if (schwarz_shell(rho,sigma)*schwarz_shell(mu,nu) 277 1 .ge. tol2e) then 278cccx call dfill(work1, 0.0d0, dbl_mb(k_work1), 1) 279cccx call dfill(work2, 0.0d0, dbl_mb(k_work2), 1) 280 call int_2e4c(ao_bas_han,mu,nu,ao_bas_han,rho,sigma, 281 1 work2,dbl_mb(k_work2),work1,dbl_mb(k_work1)) 282c 283 i=0 284 do mu1 = 1,mu_range 285 do nu1 = 1,nu_range 286 do rho1 = 1,rho_range 287 do sigma1 = 1,sigma_range 288 i=i+1 289 inu1=nu1+shift_nu 290 isigma1=sigma1+shift_sigma 291 imu1=mu1+shift_mu 292 irho1=rho1+shift_rho 293c (isigma1,irho1|inu1, imu1) 294 ipos1=(((imu1-1)*nalength(azone1)+inu1-1)* 295 1 nalength(azone4)+irho1-1)*nalength(azone3) 296 2 +isigma1 297 dbl_mb(k_4a+ipos1-1)=dbl_mb(k_work1+i-1) 298 enddo 299 enddo 300 enddo 301 enddo 302 end if !schwarz screening 303 shift_sigma = shift_sigma + sigma_range 304 enddo !sigma 305 shift_rho = shift_rho + rho_range 306 enddo !rho 307 shift_nu = shift_nu + nu_range 308 enddo !nu 309 shift_mu = shift_mu + mu_range 310 enddo !mu 311c 312c fixing offsets and sf_writing 313 key_4af=azone4 - 1 + atpart * (azone3 - 1 + 314 & atpart * (azone2 - 1 + atpart * (azone1 - 1))) 315 call tce_hash(int_mb(k_4af_offset),key_4af,offset_4af) 316 if(idiskl) then 317 if (sf_write(d_4af,dble(bytes)*dble(offset_4af), 318 1 dble(bytes)*dble(size_4a),dbl_mb(k_4a),request).ne.0) 319 2 call errquit('zones put: sf problem2',1,DISK_ERR) 320 if (sf_wait(request).ne.0) 321 1 call errquit('zones put: sf problem3',2,DISK_ERR) 322 else 323 call ga_put(d_4af,offset_4af+1,offset_4af+size_4a,1,1, 324 1 dbl_mb(k_4a),size_4a) 325 end if 326c closing l_4a file 327 if (.not.ma_pop_stack(l_4a)) 328 1 call errquit('tcc_mo2e_4af2: l_4a',15,MA_ERR) 329c --------------------------- 330 next = NXTASK(nprocs, 1) 331 END IF 332 count = count + 1 333 ENDDO !azone4 334 ENDDO !azone3 335 ENDDO !azone2 336 ENDDO !azone1 337c 338c 339c 340 call ga_sync() 341c 342c 343c 344 call ga_sync() 345c 346c 347c *** debug *** 348c write(6,*)'step2' 349c call util_flush(6) 350c ************* 351c 352c 353 next = nxtask(-nprocs,1) 354c 355c 356 if(.not.idiskl) then 357 call reconcilefile(d_4af,size_4af) 358 end if 359c 360c 361c 362 max_na=0 363 do ixi=1,atpart 364 if(nalength(ixi).gt.max_na) max_na=nalength(ixi) 365 enddo 366 size_ic=2*((max_na)**4)+1 367 size_icc=tile_dim*max_na 368c 369c 370c 371 if (.not.ma_push_get(mt_dbl,size_ic,'l_int', 372 1 l_integral,k_integral)) 373 1 call errquit('tce_4s: MA problem l_int',0,MA_ERR) 374c 375 if (.not.ma_push_get(mt_dbl,size_icc,'l_coeff', 376 1 l_coeff,k_coeff)) 377 1 call errquit('tce_4s: MA problem l_coeff',0,MA_ERR) 378c 379c 380c 381c 382 nprocs = GA_NNODES() 383 count = 0 384 next = NXTASK(nprocs, 1) 385 DO g3b = 1,noa+nva !k 386 DO g4b = g3b,noa+nva !l 387 DO azone1 = 1,atpart !nu 388 DO azone2 = azone1,atpart !mu 389 IF (next.eq.count) THEN 390c 391 cpu1 = - util_cpusec() 392 wall1 = - util_wallsec() 393c 394 size_2g2a=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1) 395 1 *nalength(azone1)*nalength(azone2) 396 if (.not.ma_push_get(mt_dbl,size_2g2a,'2g2a',l_2g2a,k_2g2a)) 397 1 call errquit('tce_r2_divide1: MA problem',0,MA_ERR) 398 call dfill(size_2g2a, 0.0d0, dbl_mb(k_2g2a), 1) 399c *** debug *** 400c write(6,*)'step3' 401c call util_flush(6) 402c ************* 403c 404 DO azone3 = 1,atpart !sigma 405 DO azone4 = azone3,atpart !rho 406 size_4a = nalength(azone1)*nalength(azone2)* 407 1 nalength(azone3)*nalength(azone4) 408 if(.not.ma_push_get(mt_dbl,size_4a,'4a',l_4a,k_4a)) 409 1 call errquit('tce_r2_divide2: MA problem',0,MA_ERR) 410cccx call dfill(size_4a, 0.0d0, dbl_mb(k_4a), 1) 411c 412c 413c 414c 415 key_4af=azone4 - 1 + atpart * (azone3 - 1 + 416 & atpart * (azone2 - 1 + atpart * (azone1 - 1))) 417 call tce_hash(int_mb(k_4af_offset),key_4af,offset_4af) 418 if(idiskl) then 419 if (sf_read(d_4af,dble(bytes)*dble(offset_4af), 420 1 dble(bytes)*dble(size_4a),dbl_mb(k_4a),request).ne.0) 421 1 call errquit('zones get2: sf problem',1,DISK_ERR) 422 if (sf_wait(request).ne.0) 423 1 call errquit('zones get3: sf problem',2,DISK_ERR) 424 else 425 call ga_get(d_4af,offset_4af+1,offset_4af+size_4a,1,1, 426 1 dbl_mb(k_4a),size_4a) 427 end if 428c 429c 430 tot_azone3_sh=tot_zone(azone3) 431 tot_azone4_sh=tot_zone(azone4) 432c 433 if(azone3.ne.azone4) then !!============azone3.ne.azone4 434c --- C_[g3][sigma] 435 i = 0 436 do isigma1 = 1,nalength(azone3) 437 do igi3 = 1,int_mb(k_range_alpha+g3b-1) 438 i = i+1 439 ipos1=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone3_sh 440 & +isigma1 441 dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1) 442 enddo 443 enddo 444c --- C_[g3][sigma]([sigma]<[rho]|[nu] [mu]) 445c zone3 zone3 zone4 446 call dgemm('N','N', 447 1 int_mb(k_range_alpha+g3b-1), !m 448 1 nalength(azone1)*nalength(azone2)*nalength(azone4), !n 449 1 nalength(azone3), !k 450 1 1.0d0, 451 1 dbl_mb(k_coeff), !A 452 1 int_mb(k_range_alpha+g3b-1), !lda 453 1 dbl_mb(k_4a),nalength(azone3), ! B,ldb 454 1 0.0d0,dbl_mb(k_integral), 455 1 int_mb(k_range_alpha+g3b-1)) 456c transposition ([g3][rho]|[nu][mu]) => ([rho][g3]|[nu][mu]) 457c 458 size_aux=int_mb(k_range_alpha+g3b-1)*nalength(azone4)* 459 1 nalength(azone1)*nalength(azone2) 460 if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux)) 461 1 call errquit('tce_aux1: MA problem',0,MA_ERR) 462 CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux), 463 & int_mb(k_range_alpha+g3b-1),nalength(azone4), 464 & nalength(azone1),nalength(azone2), 465 &2,1,3,4,1.0d0) 466 do i=1,size_aux 467 dbl_mb(k_integral+i-1)=dbl_mb(k_aux+i-1) 468 enddo 469 if (.not.ma_pop_stack(l_aux)) 470 1 call errquit('l_aux1',15,MA_ERR) 471c C([g4][azone4=rho]) 472 i = 0 473 do irho1 = 1,nalength(azone4) 474 do igi4 = 1,int_mb(k_range_alpha+g4b-1) 475 i = i+1 476 ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone4_sh 477 & +irho1 478 dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1) 479 enddo 480 enddo 481c C([g4][azone4=rho])([rho][g3]|[azone1][azone2]) 482 call dgemm('N','N', 483 1 int_mb(k_range_alpha+g4b-1), !m 484 1 nalength(azone1)*nalength(azone2)*int_mb(k_range_alpha+g3b-1), !n 485 1 nalength(azone4), !k 486 1 1.0d0, 487 1 dbl_mb(k_coeff), !A 488 1 int_mb(k_range_alpha+g4b-1), !lda 489 1 dbl_mb(k_integral),nalength(azone4), ! B,ldb 490 1 1.0d0,dbl_mb(k_2g2a), 491 1 int_mb(k_range_alpha+g4b-1)) 492c done with first part of azone3.ne.azone4 case ---------------- 493c transposition of atomic integrals 494c ([sigma=azone3][rho=azone4]|[nu][mu]) ==> 495c ([rho][sigma]|[nu][mu]) 496 size_aux=size_4a 497 if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux)) 498 1 call errquit('tce_aux2: MA problem',0,MA_ERR) 499 CALL TCE_SORT_4KG_(dbl_mb(k_4a),dbl_mb(k_aux), 500 & nalength(azone3),nalength(azone4), 501 & nalength(azone1),nalength(azone2), 502 &2,1,3,4,1.0d0) 503ccx if (.not.ma_pop_stack(l_aux)) 504ccx 1 call errquit('l_aux2',15,MA_ERR) ! see after dgemm 505c C([g3][azone4=rho]) 506 i = 0 507 do irho1 = 1,nalength(azone4) 508 do igi3 = 1,int_mb(k_range_alpha+g3b-1) 509 i = i+1 510 ipos1=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone4_sh 511 & +irho1 512 dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1) 513 enddo 514 enddo 515c C([g3][azone4=rho])([rho=azone4][sigma=azone3]|[nu][mu]) 516 call dgemm('N','N', 517 1 int_mb(k_range_alpha+g3b-1), !m 518 1 nalength(azone1)*nalength(azone2)*nalength(azone3), !n 519 1 nalength(azone4), !k 520 1 1.0d0, 521 1 dbl_mb(k_coeff), !A 522 1 int_mb(k_range_alpha+g3b-1), !lda 523 1 dbl_mb(k_aux),nalength(azone4), ! B,ldb 524 1 0.0d0,dbl_mb(k_integral), 525 1 int_mb(k_range_alpha+g3b-1)) 526c 527 if (.not.ma_pop_stack(l_aux)) 528 1 call errquit('l_aux2',15,MA_ERR) 529c transposition ([g3][sigma]|[nu][mu]) => ([sigma][g3]|[nu][mu]) 530c 531 size_aux=int_mb(k_range_alpha+g3b-1)*nalength(azone3)* 532 1 nalength(azone1)*nalength(azone2) 533 if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux)) 534 1 call errquit('tce_aux1: MA problem',0,MA_ERR) 535 CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux), 536 & int_mb(k_range_alpha+g3b-1),nalength(azone3), 537 & nalength(azone1),nalength(azone2), 538 &2,1,3,4,1.0d0) 539 do i=1,size_aux 540 dbl_mb(k_integral+i-1)=dbl_mb(k_aux+i-1) 541 enddo 542 if (.not.ma_pop_stack(l_aux)) 543 1 call errquit('l_aux1',15,MA_ERR) 544c C([g4][azone3=sigma]) 545 i = 0 546 do isigma1 = 1,nalength(azone3) 547 do igi4 = 1,int_mb(k_range_alpha+g4b-1) 548 i = i+1 549 ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone3_sh 550 & +isigma1 551 dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1) 552 enddo 553 enddo 554c C([g4][azone3=sigma])([sigma][g3]|[azone1][azone2]) 555 call dgemm('N','N', 556 1 int_mb(k_range_alpha+g4b-1), !m 557 1 nalength(azone1)*nalength(azone2)*int_mb(k_range_alpha+g3b-1), !n 558 1 nalength(azone3), !k 559 1 1.0d0, 560 1 dbl_mb(k_coeff), !A 561 1 int_mb(k_range_alpha+g4b-1), !lda 562 1 dbl_mb(k_integral),nalength(azone3), ! B,ldb 563 1 1.0d0,dbl_mb(k_2g2a), 564 1 int_mb(k_range_alpha+g4b-1)) 565 end if !azone3.ne.azone4 566c 567c 568c 569 if(azone3.eq.azone4) then 570c --- C_[g3][sigma] 571 i = 0 572 do isigma1 = 1,nalength(azone3) 573 do igi3 = 1,int_mb(k_range_alpha+g3b-1) 574 i = i+1 575 ipos1=(int_mb(k_offset_alpha+g3b-1)+igi3-1)*nbf+tot_azone3_sh 576 & +isigma1 577 dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1) 578 enddo 579 enddo 580c --- C_[g3][sigma]([sigma]=[rho]|[nu] [mu]) 581c zone3 zone3 zone4 582 call dgemm('N','N', 583 1 int_mb(k_range_alpha+g3b-1), !m 584 1 nalength(azone1)*nalength(azone2)*nalength(azone4), !n 585 1 nalength(azone3), !k 586 1 1.0d0, 587 1 dbl_mb(k_coeff), !A 588 1 int_mb(k_range_alpha+g3b-1), !lda 589 1 dbl_mb(k_4a),nalength(azone3), ! B,ldb 590 1 0.0d0,dbl_mb(k_integral), 591 1 int_mb(k_range_alpha+g3b-1)) 592c transposition ([g3][rho]|[nu][mu]) => ([rho][g3]|[nu][mu]) 593c 594 size_aux=int_mb(k_range_alpha+g3b-1)*nalength(azone4)* 595 1 nalength(azone1)*nalength(azone2) 596 if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux)) 597 1 call errquit('tce_aux1: MA problem',0,MA_ERR) 598 CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux), 599 & int_mb(k_range_alpha+g3b-1),nalength(azone4), 600 & nalength(azone1),nalength(azone2), 601 &2,1,3,4,1.0d0) 602 do i=1,size_aux 603 dbl_mb(k_integral+i-1)=dbl_mb(k_aux+i-1) 604 enddo 605 if (.not.ma_pop_stack(l_aux)) 606 1 call errquit('l_aux1',15,MA_ERR) 607c C([g4][azone4=rho]) 608 i = 0 609 do irho1 = 1,nalength(azone4) 610 do igi4 = 1,int_mb(k_range_alpha+g4b-1) 611 i = i+1 612 ipos1=(int_mb(k_offset_alpha+g4b-1)+igi4-1)*nbf+tot_azone4_sh 613 & +irho1 614 dbl_mb(k_coeff+i-1)=dbl_mb(k_movecs_orb+ipos1-1) 615 enddo 616 enddo 617c C([g4][azone4=rho])([rho][g3]|[azone1][azone2]) 618 call dgemm('N','N', 619 1 int_mb(k_range_alpha+g4b-1), !m 620 1 nalength(azone1)*nalength(azone2)*int_mb(k_range_alpha+g3b-1), !n 621 1 nalength(azone4), !k 622 1 1.0d0, 623 1 dbl_mb(k_coeff), !A 624 1 int_mb(k_range_alpha+g4b-1), !lda 625 1 dbl_mb(k_integral),nalength(azone4), ! B,ldb 626 1 1.0d0,dbl_mb(k_2g2a), 627 1 int_mb(k_range_alpha+g4b-1)) 628 end if !azone3.eq.azone4 629c 630c 631c 632 if (.not.ma_pop_stack(l_4a)) 633 1 call errquit('tcc_mo2e_trans_zones: l_4a',15,MA_ERR) 634 ENDDO !azone4 635c one step ahead 636 ENDDO !azone3 637c 638c 639c 640c 641c *** debug *** 642c write(6,*)'step4' 643c call util_flush(6) 644c ************* 645c 646c 647c g2 g1 do loops 648c open (g4 g3 | all symmetry allowed g2 g1) - equally good we can split it into 649c pieces 650c (k,l|i,j) pieces 651c 652c calculate point_pair index here 653 ixi=noa+nva-g3b+1 654 jxi=noa+nva-g4b+1 655 point_pair=((noa+nva)*(noa+nva+1))/2-((ixi-1)*ixi)/2-jxi+1 656 size_stripe=int_mb(k_gpair+point_pair) 657 xoffset_p34 = int_mb(k_gpair+len_pair+point_pair) 658 addr=int_mb(k_gpair+2*len_pair+point_pair) 659c 660c - exascale --- 661 if((size_stripe.eq.0).and.(xoffset_p34.eq.0)) go to 300 662c --- 663c 664c 665c offset for blocking the (nu mu | g2 g1 ) = C*C matrix 666c 667 do i=1,20000 668 key_start(i) = 0 669 key_end(i) = 0 670 xoffset_g12(i) = 0 671 xoffset_size(i) = 0 672 xoffset_size_p1p2(i) = 0 673 enddo 674 imaxp12=0 675 size_temp=0 676 size_temp_4g=0 677 xoffset_temp=0 678 xoffset_temp_4g=0 679 size_p1p2=0 680 i=1 681 DO g1b = 1,noa+nva !l 682 DO g2b = g1b,noa+nva !k 683c - exascale --- 684 IF(.not.((g3b.gt.noa).and.(g4b.gt.noa).and.(g1b.gt.noa).and. 685 & (g2b.gt.noa).and.(.not.is_active_4_o(g3b,g4b,g1b,g2b)))) THEN 686c --- 687 IF (int_mb(k_spin_alpha+g3b-1)+int_mb(k_spin_alpha+g4b-1).eq. 688 & int_mb(k_spin_alpha+g1b-1)+int_mb(k_spin_alpha+g2b-1)) THEN 689 IF (ieor(int_mb(k_sym_alpha+g3b-1),ieor(int_mb(k_sym_alpha+g4b-1) 690 & ,ieor(int_mb(k_sym_alpha+g1b-1),int_mb(k_sym_alpha+g2b-1)))) .eq. 691 & irrep_v) THEN 692 IROW=INDEX_PAIR(g4b,g3b) 693 ICOL=INDEX_PAIR(g2b,g1b) 694 IF(IROW.GE.ICOL) THEN 695 if(size_temp_4g.eq.0) then 696 key_start(i)=icol 697 end if 698 size_temp=size_temp+int_mb(k_range_alpha+g2b-1) 699 1 *int_mb(k_range_alpha+g1b-1) 700 1 *nalength(azone1)*nalength(azone2) 701 size_temp_4g=size_temp_4g+int_mb(k_range_alpha+g2b-1) 702 1 *int_mb(k_range_alpha+g1b-1) 703 2 *int_mb(k_range_alpha+g3b-1) 704 3 *int_mb(k_range_alpha+g4b-1) 705 size_p1p2=size_p1p2+int_mb(k_range_alpha+g2b-1) 706 1 *int_mb(k_range_alpha+g1b-1) 707 ibuba=icol 708c 709c if(size_temp.gt.max_size_temp) then 710c has to be fully consistent with later part 711c 712 if(size_temp_4g.gt.max_size_temp) then 713 xoffset_g12(i)=xoffset_temp_4g 714 xoffset_size(i)=size_temp 715 xoffset_size_p1p2(i)=size_p1p2 716 xoffset_temp=xoffset_temp+size_temp 717 xoffset_temp_4g=xoffset_temp_4g+size_temp_4g 718 key_end(i)=icol 719 size_temp=0 720 size_temp_4g=0 721 size_p1p2=0 722 imaxp12=i 723 i=i+1 724 end if 725 END IF 726 END IF 727 END IF 728c --- exascale - 729 END IF 730c -------------- 731 ENDDO 732 ENDDO 733c 734 if(size_temp_4g.ne.0) then 735 xoffset_g12(i)=xoffset_temp_4g 736 xoffset_size(i)=size_temp 737 xoffset_size_p1p2(i)=size_p1p2 738 key_end(i)=ibuba 739 imaxp12=i 740 end if 741c 742 if(i.gt.20000) 743 1 call errquit('tce_zone: xoffset-size-problem',0,MA_ERR) 744c 745c *** debug *** 746c write(6,*)'step5' 747c call util_flush(6) 748c ************* 749c 750 do i =1, imaxp12 751 size_g12p=int_mb(k_range_alpha+g4b-1)* 752 1 int_mb(k_range_alpha+g3b-1)* 753 1 xoffset_size_p1p2(i) 754 if (.not.ma_push_get(mt_dbl,size_g12p,'g12piece', 755 1 l_g12piece,k_g12piece)) 756 1 call errquit('tce_zone: MA g12-piece ',0,MA_ERR) 757c TRY IF THIS CAN BE BY-PASSED 758 call dfill(size_g12p, 0.0d0, dbl_mb(k_g12piece), 1) 759c 760c 761c 762c 763 offset_2g2z=0 ! 764c 765c 766c 767 iend=0 768 istart=0 769 DO g1b = 1,noa+nva !l 770 DO g2b = g1b,noa+nva !k 771 ICOL=INDEX_PAIR(g2b,g1b) 772 IF(iend.eq.1) go to 4444 773 if(istart.ne.1) then 774 IF(ICOL.eq.key_start(i)) istart=1 775 end if 776 if(icol.eq.key_end(i)) iend=1 777 IF(istart.eq.1) THEN 778c - exascale --- 779 IF(.not.((g3b.gt.noa).and.(g4b.gt.noa).and.(g1b.gt.noa).and. 780 & (g2b.gt.noa).and.(.not.is_active_4_o(g3b,g4b,g1b,g2b)))) THEN 781c --- 782 IF (int_mb(k_spin_alpha+g3b-1)+int_mb(k_spin_alpha+g4b-1).eq. 783 & int_mb(k_spin_alpha+g1b-1)+int_mb(k_spin_alpha+g2b-1)) THEN 784 IF (ieor(int_mb(k_sym_alpha+g3b-1),ieor(int_mb(k_sym_alpha+g4b-1) 785 & ,ieor(int_mb(k_sym_alpha+g1b-1),int_mb(k_sym_alpha+g2b-1)))) .eq. 786 & irrep_v) THEN 787 IROW=INDEX_PAIR(g4b,g3b) 788 ICOL=INDEX_PAIR(g2b,g1b) 789 IF(IROW.GE.ICOL) THEN 790 tot_azone1_sh=tot_zone(azone1) 791 tot_azone2_sh=tot_zone(azone2) 792 if(azone1.ne.azone2) then !----------------- 793c C_([mu-azone2][g2]) 794 ii=0 795 do igi2 = 1,int_mb(k_range_alpha+g2b-1) 796 do imu1 = 1,nalength(azone2) 797 ii = ii+1 798 ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone2_sh 799 & +imu1 800 dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1) 801 enddo 802 enddo 803c ([g4][g3]|[nu-azone1][mu-azone2])*C_([mu-azone2][g2]) 804 call dgemm('N','N', 805 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 806 1 nalength(azone1), !m 807 1 int_mb(k_range_alpha+g2b-1), !n 808 3 nalength(azone2), !k 809 4 1.0d0,dbl_mb(k_2g2a), 810 5 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 811 5 nalength(azone1), 812 6 dbl_mb(k_coeff),nalength(azone2), 813 7 0.0d0,dbl_mb(k_integral), 814 8 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 815 8 nalength(azone1)) 816c transposition ([g4][g3]|[nu-azone1][g2])=>([g4][g3]|[g2][nu-azone1]) 817 size_aux=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1) 818 1 *int_mb(k_range_alpha+g2b-1)*nalength(azone1) 819 if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux)) 820 1 call errquit('tce_aux2: MA problem',0,MA_ERR) 821 CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux), 822 &int_mb(k_range_alpha+g4b-1),int_mb(k_range_alpha+g3b-1), 823 &nalength(azone1),int_mb(k_range_alpha+g2b-1), 824 &1,2,4,3,1.0d0) 825 do ii=1,size_aux 826 dbl_mb(k_integral+ii-1)=dbl_mb(k_aux+ii-1) 827 enddo 828 if (.not.ma_pop_stack(l_aux)) 829 1 call errquit('l_aux2',15,MA_ERR) 830c C_([nu-azone1][g1]) 831 ii=0 832 do igi1 = 1,int_mb(k_range_alpha+g1b-1) 833 do inu1 = 1,nalength(azone1) 834 ii = ii+1 835 ipos1=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone1_sh 836 & +inu1 837 dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1) 838 enddo 839 enddo 840c ([g4][g3]|[g2][nu-azone1])*C_([nu-azone1][g1]) 841 call dgemm('N','N', 842 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 843 1 int_mb(k_range_alpha+g2b-1), !m 844 1 int_mb(k_range_alpha+g1b-1), !n 845 3 nalength(azone1), !k 846 4 1.0d0,dbl_mb(k_integral), 847 5 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 848 5 int_mb(k_range_alpha+g2b-1), 849 6 dbl_mb(k_coeff),nalength(azone1), 850 7 1.0d0,dbl_mb(k_g12piece+offset_2g2z), 851 8 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 852 8 int_mb(k_range_alpha+g2b-1)) 853c done with first part of azone3.ne.azone4 case ---------------- 854c transposition of atomic integrals 855c ([g4][g3]|[azone1-nu][azone2-mu]) ==> 856c ([g4][g3]|[azone2-mu][azone1-nu]) 857 size_aux=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1) 858 1 *nalength(azone1)*nalength(azone2) 859 if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux)) 860 1 call errquit('tce_aux2: MA problem',0,MA_ERR) 861 CALL TCE_SORT_4KG_(dbl_mb(k_2g2a),dbl_mb(k_aux), 862 & int_mb(k_range_alpha+g4b-1),int_mb(k_range_alpha+g3b-1), 863 & nalength(azone1),nalength(azone2), 864 & 1,2,4,3,1.0d0) 865ccx if (.not.ma_pop_stack(l_aux)) 866ccx 1 call errquit('l_aux2',15,MA_ERR) ! see after dgemm 867c C([azone1-nu][g2]) 868 ii=0 869 do igi2 = 1,int_mb(k_range_alpha+g2b-1) 870 do inu1 = 1,nalength(azone1) 871 ii = ii+1 872 ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone1_sh 873 & +inu1 874 dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1) 875 enddo 876 enddo 877c ([g4][g3]|[azone2-mu][azone1-nu])*C([azone1-nu][g2]) 878 call dgemm('N','N', 879 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 880 1 nalength(azone2), !m 881 1 int_mb(k_range_alpha+g2b-1), !n 882 3 nalength(azone1), !k 883 4 1.0d0,dbl_mb(k_aux), 884 5 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 885 5 nalength(azone2), 886 6 dbl_mb(k_coeff),nalength(azone1), 887 7 0.0d0,dbl_mb(k_integral), 888 8 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 889 8 nalength(azone2)) 890c 891 if (.not.ma_pop_stack(l_aux)) 892 1 call errquit('l_aux2',15,MA_ERR) 893c transposition ([g4][g3]|[azone2-mu][g2])=>([g4][g3]|[g2][azone2-mu]) 894 size_aux=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1) 895 1 *int_mb(k_range_alpha+g2b-1)*nalength(azone2) 896 if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux)) 897 1 call errquit('tce_aux2: MA problem',0,MA_ERR) 898 CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux), 899 &int_mb(k_range_alpha+g4b-1),int_mb(k_range_alpha+g3b-1), 900 &nalength(azone2),int_mb(k_range_alpha+g2b-1), 901 &1,2,4,3,1.0d0) 902 do ii=1,size_aux 903 dbl_mb(k_integral+ii-1)=dbl_mb(k_aux+ii-1) 904 enddo 905 if (.not.ma_pop_stack(l_aux)) 906 1 call errquit('l_aux2',15,MA_ERR) 907c C([azone2-mu][g1]) 908 ii=0 909 do igi1 = 1,int_mb(k_range_alpha+g1b-1) 910 do imu1 = 1,nalength(azone2) 911 ii = ii+1 912 ipos1=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone2_sh 913 & +imu1 914 dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1) 915 enddo 916 enddo 917c ([g4][g3]|[g2][azone2-mu])*C([azone2-mu][g1]) 918 call dgemm('N','N', 919 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 920 1 int_mb(k_range_alpha+g2b-1), !m 921 1 int_mb(k_range_alpha+g1b-1), !n 922 3 nalength(azone2), !k 923 4 1.0d0,dbl_mb(k_integral), 924 5 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 925 5 int_mb(k_range_alpha+g2b-1), 926 6 dbl_mb(k_coeff),nalength(azone2), 927 7 1.0d0,dbl_mb(k_g12piece+offset_2g2z), 928 8 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 929 8 int_mb(k_range_alpha+g2b-1)) 930c 931 offset_2g2z=offset_2g2z+int_mb(k_range_alpha+g4b-1)* 932 1 int_mb(k_range_alpha+g3b-1)*int_mb(k_range_alpha+g2b-1)* 933 1 int_mb(k_range_alpha+g1b-1) 934 end if ! azone1.ne.azone2 !----------------- 935c 936c second part 937c 938 if(azone1.eq.azone2) then 939c C_([mu-azone2][g2]) 940 ii=0 941 do igi2 = 1,int_mb(k_range_alpha+g2b-1) 942 do imu1 = 1,nalength(azone2) 943 ii = ii+1 944 ipos1=(int_mb(k_offset_alpha+g2b-1)+igi2-1)*nbf+tot_azone2_sh 945 & +imu1 946 dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1) 947 enddo 948 enddo 949c ([g4][g3]|[nu-azone1][mu-azone2])*C_([mu-azone2][g2]) 950 call dgemm('N','N', 951 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 952 1 nalength(azone1), !m 953 1 int_mb(k_range_alpha+g2b-1), !n 954 3 nalength(azone2), !k 955 4 1.0d0,dbl_mb(k_2g2a), 956 5 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 957 5 nalength(azone1), 958 6 dbl_mb(k_coeff),nalength(azone2), 959 7 0.0d0,dbl_mb(k_integral), 960 8 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 961 8 nalength(azone1)) 962c transposition ([g4][g3]|[nu-azone1][g2])=>([g4][g3]|[g2][nu-azone1]) 963 size_aux=int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1) 964 1 *int_mb(k_range_alpha+g2b-1)*nalength(azone1) 965 if(.not.ma_push_get(mt_dbl,size_aux,'aux',l_aux,k_aux)) 966 1 call errquit('tce_aux2: MA problem',0,MA_ERR) 967 CALL TCE_SORT_4KG_(dbl_mb(k_integral),dbl_mb(k_aux), 968 &int_mb(k_range_alpha+g4b-1),int_mb(k_range_alpha+g3b-1), 969 &nalength(azone1),int_mb(k_range_alpha+g2b-1), 970 &1,2,4,3,1.0d0) 971 do ii=1,size_aux 972 dbl_mb(k_integral+ii-1)=dbl_mb(k_aux+ii-1) 973 enddo 974 if (.not.ma_pop_stack(l_aux)) 975 1 call errquit('l_aux2',15,MA_ERR) 976c C_([nu-azone1][g1]) 977 ii=0 978 do igi1 = 1,int_mb(k_range_alpha+g1b-1) 979 do inu1 = 1,nalength(azone1) 980 ii = ii+1 981 ipos1=(int_mb(k_offset_alpha+g1b-1)+igi1-1)*nbf+tot_azone1_sh 982 & +inu1 983 dbl_mb(k_coeff+ii-1)=dbl_mb(k_movecs_orb+ipos1-1) 984 enddo 985 enddo 986c ([g4][g3]|[g2][nu-azone1])*C_([nu-azone1][g1]) 987 call dgemm('N','N', 988 1 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 989 1 int_mb(k_range_alpha+g2b-1), !m 990 1 int_mb(k_range_alpha+g1b-1), !n 991 3 nalength(azone1), !k 992 4 1.0d0,dbl_mb(k_integral), 993 5 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 994 5 int_mb(k_range_alpha+g2b-1), 995 6 dbl_mb(k_coeff),nalength(azone1), 996 7 1.0d0,dbl_mb(k_g12piece+offset_2g2z), 997 8 int_mb(k_range_alpha+g4b-1)*int_mb(k_range_alpha+g3b-1)* 998 8 int_mb(k_range_alpha+g2b-1)) 999c 1000 offset_2g2z=offset_2g2z+int_mb(k_range_alpha+g4b-1)* 1001 1 int_mb(k_range_alpha+g3b-1)*int_mb(k_range_alpha+g2b-1)* 1002 1 int_mb(k_range_alpha+g1b-1) 1003c 1004 end if !azone1.eq.azone2 1005c !--------------------------------------------------------------------- 1006 END IF ! irow.gt.icol 1007 END IF ! symmetry 1008 END IF ! spin 1009c --- exascale - 1010 END IF 1011c -------------- 1012 END IF ! istart 1013 ENDDO ! g2b-loop ends up here 1014 ENDDO ! g1b-loop ends up here 1015c 1016 4444 continue 1017c 1018c now put here add_block 1019c *** debug *** 1020c write(6,*)'step5a' 1021c call util_flush(6) 1022c ************* 1023 call add_block(d_v2,dbl_mb(k_g12piece),size_g12p, 1024 & xoffset_p34+xoffset_g12(i)) 1025c *** debug *** 1026c write(6,*)'step5b' 1027c call util_flush(6) 1028c ************* 1029 istart=0 1030c 1031c 1032cccx if (.not.ma_pop_stack(l_2g2z)) 1033cccx 1 call errquit('tcc_mo2e_trans_zones: l_2g2z',15,MA_ERR) 1034 if (.not.ma_pop_stack(l_g12piece)) 1035 1 call errquit('tcc_mo2e_trans_zones: l_g12piece',15,MA_ERR) 1036 ENDDO ! i = 1,imaxp12 1037c 1038c *** debug *** 1039c write(6,*)'step100' 1040c call util_flush(6) 1041c ************* 1042c 1043c - exascale --- 1044 300 continue 1045c --- 1046c 1047 1048 if (.not.ma_pop_stack(l_2g2a)) 1049 1 call errquit('tcc_mo2e_trans_zones: MA problem',15,MA_ERR) 1050c 1051c 1052 next = NXTASK(nprocs, 1) 1053 END IF 1054 count = count + 1 1055 ENDDO !azone2 1056 ENDDO !azone1 1057 ENDDO !g4b 1058 ENDDO !g3b 1059c 1060 next = nxtask(-nprocs,1) 1061c 1062 call reconcilefile(d_v2,size_2e) 1063c 1064c 1065 if (.not.ma_pop_stack(l_coeff)) 1066 1 call errquit('tcc_off_4a: MA problem',15,MA_ERR) 1067c 1068 if (.not.ma_pop_stack(l_integral)) 1069 1 call errquit('tcc_off_4a: MA problem',15,MA_ERR) 1070c 1071c 1072c *** debug *** 1073c 1074c if(nodezero) then 1075c write(6,*)'DONE --- DONE ---- DONE ---- DONE' 1076c end if 1077c 1078c ************* 1079 if (.not.ma_pop_stack(l_work2)) 1080 1 call errquit('tcc_ao2e: MA problem',14,MA_ERR) 1081 if (.not.ma_pop_stack(l_work1)) 1082 1 call errquit('tcc_ao2e: MA problem',15,MA_ERR) 1083c 1084 if (.not.ma_pop_stack(l_movecs_orb)) 1085 1 call errquit('tcc_ao2e: MA problem',15,MA_ERR) 1086c 1087 if (.not.ma_pop_stack(l_gpair)) 1088 1 call errquit('tcc_ao2e: MA problem',15,MA_ERR) 1089c 1090c 1091c 1092 if(idiskl) then 1093cccx if (parallel) then 1094cccx call ga_sync() 1095cccx if (sf_open(d_4af).ne.0) 1096cccx 1 call errquit('deletefile: sf problem',0,DISK_ERR) 1097cccx endif 1098ccx if (.not.sf_destroy(d_4af)) 1099ccx 1 call errquit('tcc_sf_destroy2: sf problem',15,MA_ERR) 1100 if (parallel) call ga_sync() 1101 else 1102 call deletefile(d_4af) 1103 end if 1104c 1105c 1106c 1107 if (.not.ma_pop_stack(l_4af_offset)) 1108 1 call errquit('tcc_ao2e: MA problem',15,MA_ERR) 1109c 1110 call ga_sync() 1111c *** debug *** 1112c 800 format('DGEMM1 MAX',i5,2x,3f15.5) 1113c 801 format('DGEMM2 ',i5,2x,3f15.5) 1114 9000 format('PART1',i4,1x,'Cpu wall ',2(f17.12,1x),3x,'g4b g3b',2i5) 1115c 9001 format('PART2',i4,1x,'Cpu wall ',2(f17.12,1x),3x,'g4b g3b',2i5) 1116c 9003 format('PART1-4a',i4,1x,'Cpu wall ',2(f17.12,1x)) 1117c 9004 format('PART1-2g2z',i4,1x,'Cpu wall ',2(f17.12,1x)) 1118c 9005 format('PART1-dgemm',i4,1x,'Cpu wall ',2(f17.12,1x)) 1119c 9010 format(' P1-mnrs',i3,1x,2i5,1x,2i5,1x,'Cpu wall ',2(f17.12,1x)) 1120 555 format('atom loop ',2x,i5,3x,2i5,3x,2i5,i12) 1121 556 format('atom time',2x,i5,3x,2i5,3x,2i5,'Cpu wall ',2(f12.7,1x)) 1122 777 format('main do loop ',2x,i5,3x,2i5,3x,2i5) 1123 775 format('main loop step1 ',2x,i5,3x,2i5,3x,2i5) 1124 776 format('main loop step2 ',2x,i5,3x,2i5,3x,2i5) 1125 778 format('PART1',2x,i5,3x,2i5,3x,2i5,2x,'Cpu wall ',2(f17.12,1x)) 1126 779 format('PART2',2x,i5,3x,2i5,3x,2i5,2x,'Cpu wall ',2(f17.12,1x)) 1127 780 format('ADD BLOCK',2x,i5,3x,2i5,3x,2i5,2x,'Cpu wall ', 1128 & 2(f17.12,1x)) 1129c ************* 1130c 1131 RETURN 1132 END 1133c 1134c 1135c 1136c 1137