1 2* **************************** 3* * * 4* * pspw_potential_sHFX0 * 5* * * 6* **************************** 7 subroutine pspw_potential_sHFX0(ispin0,psi_r,Hpsi_r) 8 implicit none 9 integer ispin0 10 real psi_r(*) 11 real Hpsi_r(*) 12 13#include "bafdecls.fh" 14#include "pspw_hfx.fh" 15#include "errquit.fh" 16 17 integer istart,iend,jstart,jend,imodn,imodtask 18 integer ms,l,q,n,indx1,indx2,Levels,neq(2) 19 integer requests(5),reqcnt 20 21 integer Butter_Levels,Dneall_na_ptr 22 external Butter_Levels,Dneall_na_ptr 23 24* ***** now do exchange as normal **** 25!$OMP MASTER 26 ehfx = 0.0d0 27 phfx = 0.0d0 28!$OMP END MASTER 29 if (((norbs(1)+norbs(2)).ne.0).and.relaxed) then 30 31 if (replicated) then 32 33* **** butterfly algorithm **** 34 if (butterfly) then 35 call Dneall_neq(neq) 36 Levels = Butter_Levels(npj) 37 do ms=1,ispin 38 call Parallel_shared_vector_szero(.false.,nrsize, 39 > real_mb(Hpsi_r_replicated(1))) 40 call Parallel_shared_vector_scopy(.true., 41 > neq(ms)*n2ft3d, 42 > psi_r(1+(ms-1)*neq(1)*n2ft3d), 43 > real_mb(psi_r_replicated(1))) 44 45 do l=0,Levels-1 46 call D1dBs_Brdcst_step(l, 47 > int_mb(Dneall_na_ptr(ms)),-1, 48 > n2ft3d, 49 > real_mb(psi_r_replicated(1)), 50 > requests,reqcnt) 51 52 call Butter_indexes(l,taskid_j,npj, 53 > int_mb(Dneall_na_ptr(ms)), 54 > istart,iend,jstart,jend, 55 > imodn,imodtask) 56 call pspw_potential_sHFX_sub2(solver_type, 57 > istart,iend, 58 > jstart,jend, 59 > imodn,imodtask, 60 > n2ft3d, 61 > real_mb(psi_r_replicated(1)), 62 > real_mb(Hpsi_r_replicated(1)), 63 > ehfx) 64 65 call D1dB_WaitAll(requests,reqcnt) 66 end do 67 68 call Butter_indexes_L1(taskid_j,npj, 69 > int_mb(Dneall_na_ptr(ms)), 70 > istart,iend,jstart,jend, 71 > imodn,imodtask) 72 if (jend.ge.jstart) 73 > call pspw_potential_sHFX_sub2(solver_type, 74 > istart,iend, 75 > jstart,jend, 76 > imodn,imodtask, 77 > n2ft3d, 78 > real_mb(psi_r_replicated(1)), 79 > real_mb(Hpsi_r_replicated(1)), 80 > ehfx) 81 call Butter_indexes_L2(taskid_j,npj, 82 > int_mb(Dneall_na_ptr(ms)), 83 > istart,iend,jstart,jend, 84 > imodn,imodtask) 85 call pspw_potential_sHFX_sub2(solver_type, 86 > istart,iend, 87 > jstart,jend, 88 > imodn,imodtask, 89 > n2ft3d, 90 > real_mb(psi_r_replicated(1)), 91 > real_mb(Hpsi_r_replicated(1)), 92 > ehfx) 93 94 do l=Levels-1,0,-1 95 call D1dBs_Reduce_step(l, 96 > int_mb(Dneall_na_ptr(ms)),-1, 97 > n2ft3d, 98 > real_mb(Hpsi_r_replicated(1)), 99 > real_mb(psi_r_replicated(1))) 100 end do 101 call SAXPY_OMP(neq(ms)*n2ft3d,hfx_parameter, 102 > real_mb(Hpsi_r_replicated(1)),1, 103 > Hpsi_r(1+(ms-1)*neq(1)*n2ft3d),1) 104 end do 105 106 107* *** apply hfx_parameter **** 108!$OMP MASTER 109 ehfx = ehfx*hfx_parameter 110 111 if (ispin.eq.1) ehfx = ehfx + ehfx 112!$OMP END MASTER 113 call Parallel_SumAll(ehfx) 114!$OMP MASTER 115 phfx = 2.0d0*ehfx 116!$OMP END MASTER 117 118* **** reduceall algorithm **** 119 else 120 call Parallel_shared_vector_szero(.false.,nrsize, 121 > real_mb(psi_r_replicated(1))) 122 call Parallel_shared_vector_szero(.true.,nrsize, 123 > real_mb(Hpsi_r_replicated(1)),1) 124 do q=1,neqall 125 call Dneall_qton(q,n) 126 indx1 = (q-1)*n2ft3d + 1 127 indx2 = psi_r_replicated(1)+(n-1)*n2ft3d 128 call Parallel_shared_vector_scopy(.true.,n2ft3d, 129 > psi_r(indx1),real_mb(indx2)) 130 end do 131 call D1dBs_Vector_SumAll(nrsize, 132 > real_mb(psi_r_replicated(1))) 133 call pspw_potential_sHFX_sub(ispin0, 134 > real_mb(psi_r_replicated(1)), 135 > real_mb(Hpsi_r_replicated(1))) 136 call D1dBs_Vector_SumAll(nrsize, 137 > real_mb(Hpsi_r_replicated(1))) 138 do q=1,neqall 139 call Dneall_qton(q,n) 140 indx1 = Hpsi_r_replicated(1)+(n-1)*n2ft3d 141 indx2 = (q-1)*n2ft3d + 1 142 call SAXPY_OMP(n2ft3d,1.0d0,real_mb(indx1),1, 143 > Hpsi_r(indx2),1) 144 end do 145 end if 146 147 else 148 call pspw_potential_sHFX_sub(ispin0,psi_r,Hpsi_r) 149 end if 150 151 end if 152 153 return 154 end 155 156 157 158* ************************* 159* * * 160* * pspw_energy_sHFX0 * 161* * * 162* ************************* 163 subroutine pspw_energy_sHFX0(ispin0,psi_r,ehfx_out,phfx_out) 164 implicit none 165 integer ispin0 166 real psi_r(*) 167 real*8 ehfx_out 168 real*8 phfx_out 169 170#include "bafdecls.fh" 171#include "pspw_hfx.fh" 172#include "errquit.fh" 173 174 integer q,n,indx1,indx2 175 176 if (((norbs(1)+norbs(2)).ne.0).and.(.not.relaxed)) then 177 178c **** calculate HFX energy **** 179 if (replicated) then 180 181 call Parallel_shared_vector_szero(.true.,nrsize, 182 > real_mb(psi_r_replicated(1))) 183 do q=1,neqall 184 call Dneall_qton(q,n) 185 indx1 = (q-1)*n2ft3d + 1 186 indx2 = psi_r_replicated(1)+(n-1)*n2ft3d 187 call Parallel_shared_vector_scopy(.true.,n2ft3d, 188 > psi_r(indx1),real_mb(indx2)) 189 end do 190 call D1dBs_Vector_SumAll(nrsize, 191 > real_mb(psi_r_replicated(1))) 192 call pspw_energy_sHFX_sub(ispin0, 193 > real_mb(psi_r_replicated(1)), 194 > ehfx_out,phfx_out) 195 196 else 197 198 call pspw_energy_sHFX_sub(ispin0,psi_r,ehfx_out,phfx_out) 199 200 end if 201 202c **** nothing to do **** 203 else 204 ehfx_out = ehfx 205 phfx_out = phfx 206 end if 207 208 return 209 end 210 211 212 213 214* ******************************** 215* * * 216* * pspw_potential_sHFX_orb * 217* * * 218* ******************************** 219 subroutine pspw_potential_sHFX_orb(ms,psi_r, 220 > orb_r,Horb_r) 221 implicit none 222 integer ms 223 real psi_r(*) 224 real orb_r(*) 225 real Horb_r(*) 226 227#include "bafdecls.fh" 228#include "pspw_hfx.fh" 229#include "errquit.fh" 230 231* **** local variables **** 232 logical value 233 integer i,j,n1,n2,n3 234 integer dn(2),vij(2),tmp1(2),index2 235 real scal1,scal2,dv,seh 236 real*8 eh,ph 237 238* **** external functions **** 239 real*8 lattice_omega 240 external lattice_omega 241 real coulomb_screened_s_e 242 external coulomb_screened_s_e 243 244 245 call nwpw_timing_start(33) 246 if ((norbs(ms).ne.0).and.relaxed) then 247 call D3dB_nx(1,n1) 248 call D3dB_ny(1,n2) 249 call D3dB_nz(1,n3) 250 !call D3dB_n2ft3d(1,n2ft3d) 251 value = BA_push_get(mt_real,(n2ft3d),'dn_hfx',dn(2),dn(1)) 252 value = value.and. 253 > BA_push_get(mt_real,(n2ft3d),'vij_hfx',vij(2),vij(1)) 254 value = value.and. 255 > BA_push_get(mt_real,(n2ft3d),'tmp1_hfx',tmp1(2),tmp1(1)) 256 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 257 call Parallel_shared_vector_szero(.false.,n2ft3d, 258 > real_mb(dn(1))) 259 call Parallel_shared_vector_szero(.false.,n2ft3d, 260 > real_mb(vij(1))) 261 call Parallel_shared_vector_szero(.true.,n2ft3d, 262 > real_mb(tmp1(1))) 263 264 scal1 = real(1.0d0/dble(n1*n2*n3)) 265 scal2 = real(1.0d0/lattice_omega()) 266 dv = scal1/scal2 267 268 do j=1,norbs(ms) 269 index2 = (int_mb(orbital_list(1,ms)+j-1)-1)*n2ft3d + 1 270 271* **** generate dnij for Vij **** 272 call D3dBs_rr_Mul(1,psi_r(index2),orb_r,real_mb(dn(1))) 273 call D3dBs_r_SMul1(1,scal2,real_mb(dn(1))) 274 call D3dBs_r_Zero_Ends(1,real_mb(dn(1))) 275 276* ***** screened coulomb solver **** 277 if (solver_type.eq.1) then 278 call D3dBs_r_SMul1(1,scal1,real_mb(dn(1))) 279 call D3dBs_rc_pfft3f(1,0,real_mb(dn(1))) 280 call Packs_c_pack(0,real_mb(dn(1))) 281 282* **** get Ecoul energy **** 283 eh = dble(coulomb_screened_s_e(real_mb(dn(1)))) 284 285* **** generate Vcoul **** 286 call coulomb_screened_s_v(real_mb(dn(1)),real_mb(vij(1))) 287 call Packs_c_unpack(0,real_mb(vij(1))) 288 call D3dBs_cr_pfft3b(1,0,real_mb(vij(1))) 289 290* ***** free-space coulomb solver **** 291 else 292 call coulomb2_s_v(real_mb(dn(1)),real_mb(vij(1))) 293 call D3dBs_rr_dot(1,real_mb(dn(1)),real_mb(vij(1)),seh) 294 eh = dble(0.5*seh*dv) 295 end if 296 297* **** apply hfx_parameter, double eh for restricted, and calculcate ph **** 298 eh = eh*hfx_parameter 299 call D3dBs_r_SMul1(1,real(hfx_parameter),real_mb(vij(1))) 300 if (ispin.eq.1) eh = eh + eh 301 ph = 2.0d0*eh 302 303* **** generate (Vij)*psi_r *** 304 call D3dBs_rr_Mul(1,real_mb(vij(1)), 305 > psi_r(index2), 306 > real_mb(tmp1(1))) 307 call D3dBs_r_Zero_Ends(1,real_mb(tmp1(1))) 308 309* **** add -(Vij)*psi_r to Hpsi_r *** 310 call D3dBs_rr_Sub(1,Horb_r, 311 > real_mb(tmp1(1)), 312 > Horb_r) 313 end do 314 315 value = value.and.BA_pop_stack(tmp1(2)) 316 value = value.and.BA_pop_stack(vij(2)) 317 value = value.and.BA_pop_stack(dn(2)) 318 if (.not. value) 319 > call errquit('pspw_potential_sHFX_orb:popping stack memory',0, 320 & MA_ERR) 321 end if 322 call nwpw_timing_end(33) 323 return 324 end 325 326 327c***************** sub/replicated routines ***************************** 328 329* ******************************** 330* * * 331* * pspw_potential_sHFX_sub * 332* * * 333* ******************************** 334 subroutine pspw_potential_sHFX_sub(ispin0,psi_r,Hpsi_r) 335 implicit none 336 integer ispin0 337 real psi_r(*) 338 real Hpsi_r(*) 339 340#include "bafdecls.fh" 341#include "pspw_hfx.fh" 342#include "errquit.fh" 343 344* **** local variables **** 345 logical value,done 346 integer i,j,n1,n2,n3,ms 347 integer dn(2),vij(2),tmp1(2),tmp2(2),index1,index2 348 integer i1,j1,k1,NN 349 integer i2,j2,k2 350 integer i3,j3,k3 351 real scal1,scal2,dv,seh 352 real*8 eh,ph,ss,teh 353 integer center(3) 354 real*8 rcenter(3) 355 integer taskid,icount 356 real*8 cpu0,cpu1 357 integer ktaskjid,kcompute(2) 358 359* **** external functions **** 360 real*8 lattice_omega 361 external lattice_omega 362 363 logical D3dBs_rc_pfft3_queue_filled,D3dBs_cr_pfft3_queue_filled 364 external D3dBs_rc_pfft3_queue_filled,D3dBs_cr_pfft3_queue_filled 365 logical pspw_hfx_localize_closeenough 366 external pspw_hfx_localize_closeenough 367 368 real*8 pspw_hfx_localize_switchr 369 external pspw_hfx_localize_switchr 370 371 real icoulomb_screened_s_e 372 external icoulomb_screened_s_e 373 real icoulomb_screened_small_s_e 374 external icoulomb_screened_small_s_e 375 376 call Parallel2d_taskid_i(taskid) 377 icount = 0 378 call current_second(cpu0) 379 380!$OMP MASTER 381 ehfx = 0.0d0 382 phfx = 0.0d0 383!$OMP END MASTER 384 if (((norbs(1)+norbs(2)).ne.0).and.relaxed) then 385 !call D3dB_nx(1,n1) 386 !call D3dB_ny(1,n2) 387 !call D3dB_nz(1,n3) 388 !call D3dB_n2ft3d(1,n2ft3d) 389 value = BA_push_get(mt_real,(n2ft3d),'dn_hfx',dn(2),dn(1)) 390 value = value.and. 391 > BA_push_get(mt_real,(n2ft3d),'vij_hfx',vij(2),vij(1)) 392 value = value.and. 393 > BA_push_get(mt_real,(n2ft3d),'tmp1_hfx',tmp1(2),tmp1(1)) 394 value = value.and. 395 > BA_push_get(mt_real,(n2ft3d),'tmp2_hfx',tmp2(2),tmp2(1)) 396 NN = norbs(1)*(norbs(1)+1)/2 397 value = value.and. 398 > BA_push_get(mt_int,NN,'kcmpute',kcompute(2),kcompute(1)) 399 if (.not.value) 400 > call errquit('pspw_potential_HFX_sub:out of stack memory', 401 > 0,MA_ERR) 402 call Parallel_shared_vector_szero(.false., 403 > n2ft3d,real_mb(dn(1))) 404 call Parallel_shared_vector_szero(.false., 405 > n2ft3d,real_mb(vij(1))) 406 call Parallel_shared_vector_szero(.false., 407 > n2ft3d,real_mb(tmp1(1))) 408 call Parallel_shared_vector_szero(.true., 409 > n2ft3d,real_mb(tmp2(1))) 410 411 call D3dB_nx(1,n1) 412 call D3dB_ny(1,n2) 413 call D3dB_nz(1,n3) 414 scal1 = real(1.0d0/dble(n1*n2*n3)) 415 scal2 = real(1.0d0/lattice_omega()) 416 dv = scal1/scal2 417 418 if (localize_on.and.has_smallgrid) then 419 call D3dB_nx(3,n1) 420 call D3dB_ny(3,n2) 421 call D3dB_nz(3,n3) 422 scal1 = real(1.0d0/dble(n1*n2*n3)) 423 end if 424 425* **** compute kcompute **** 426 ktaskjid = 0 427 428* ***** screened coulomb solver **** 429 if (solver_type.eq.1) then 430 do ms=1,ispin0 431 if (norbs(ms).eq.0) go to 898 432 call Parallel_shared_vector_zero(.true.,norbs(ms), 433 > dbl_mb(ehfx_orb(1,ms))) 434 NN = norbs(ms)*(norbs(ms)+1)/2 435 436* **** compute kcompute **** 437 i1 = 1 438 j1 = 1 439 do k1=1,NN 440 if (pspw_hfx_localize_closeenough( 441 > int_mb(orbital_list(1,ms)+i1-1), 442 > int_mb(orbital_list(1,ms)+j1-1))) then 443 int_mb(kcompute(1)+k1-1) = ktaskjid 444 ktaskjid = mod(ktaskjid+1,npj) 445 else 446 int_mb(kcompute(1)+k1-1) = npj+1 447 end if 448 j1 = j1 + 1 449 if (j1.gt.i1) then 450 j1 = 1 451 i1 = i1 + 1 452 end if 453 end do 454 455 i1 = 1 456 j1 = 1 457 k1 = 1 458 i2 = 1 459 j2 = 1 460 k2 = 1 461 i3 = 1 462 j3 = 1 463 k3 = 1 464 done = .false. 465 do while (.not.done) 466 467 if ((k1.le.NN).and. 468 > (.not.D3dBs_rc_pfft3_queue_filled())) then 469 470 !if (mod(k1,npj).eq.taskid_j) then 471 if (int_mb(kcompute(1)+k1-1).eq.taskid_j) then 472 if (pspw_hfx_localize_closeenough( 473 > int_mb(orbital_list(1,ms)+i1-1), 474 > int_mb(orbital_list(1,ms)+j1-1))) then 475 476 index1 =(int_mb(orbital_list(1,ms)+i1-1)-1)*n2ft3d+1 477 index2 =(int_mb(orbital_list(1,ms)+j1-1)-1)*n2ft3d+1 478 479* **** generate dnij for Vij **** 480 call D3dBs_rr_Mul(1,psi_r(index2), 481 > psi_r(index1),real_mb(dn(1))) 482 483 call D3dBs_r_SMul1(1,scal2*scal1,real_mb(dn(1))) 484 call D3dBs_r_Zero_Ends(1,real_mb(dn(1))) 485 486 if (localize_on.and.has_smallgrid) then 487 call pspw_hfx_localize_center_ovlp( 488 > int_mb(orbital_list(1,ms)+i1-1), 489 > int_mb(orbital_list(1,ms)+j1-1),center) 490 call D3dBs_rc_pfft3f_queuein_center(2,center, 491 > real_mb(dn(1))) 492 else 493 call D3dBs_rc_pfft3f_queuein(0,real_mb(dn(1))) 494 end if 495c 496 497 498 end if 499 end if 500 501 k1 = k1 + 1 502 j1 = j1 + 1 503 if (j1.gt.i1) then 504 j1 = 1 505 i1 = i1 + 1 506 end if 507 end if 508 509 if (( ((D3dBs_rc_pfft3_queue_filled()).or.(k1.gt.NN)) 510 > .and.(k2.le.NN)).and. 511 > (.not.D3dBs_cr_pfft3_queue_filled())) then 512 513 !if (mod(k2,npj).eq.taskid_j) then 514 if (int_mb(kcompute(1)+k2-1).eq.taskid_j) then 515 if (pspw_hfx_localize_closeenough( 516 > int_mb(orbital_list(1,ms)+i2-1), 517 > int_mb(orbital_list(1,ms)+j2-1))) then 518 519 ss = pspw_hfx_localize_switchr( 520 > int_mb(orbital_list(1,ms)+i2-1), 521 > int_mb(orbital_list(1,ms)+j2-1)) 522 523 if (localize_on.and.has_smallgrid) then 524 call D3dBs_rc_pfft3f_queueout_center(2, 525 > real_mb(dn(1))) 526 eh = dble(icoulomb_screened_small_s_e( 527 > real_mb(dn(1)))) 528 call coulomb_screened_small_s_v(real_mb(dn(1)), 529 > real_mb(vij(1))) 530 else 531 call D3dBs_rc_pfft3f_queueout(0,real_mb(dn(1))) 532 eh = dble(icoulomb_screened_s_e(real_mb(dn(1)))) 533 call coulomb_screened_s_v(real_mb(dn(1)), 534 > real_mb(vij(1))) 535 end if 536 537* **** apply hfx_parameter, double eh for restricted, and calculcate ph **** 538 eh = eh*hfx_parameter*ss 539 if (ispin0.eq.1) eh = eh + eh 540 ph = 2.0d0*eh 541!$OMP MASTER 542 ehfx = ehfx - eh 543 phfx = phfx - ph 544 dbl_mb(ehfx_orb(1,ms)+i2-1) 545 > = dbl_mb(ehfx_orb(1,ms)+i2-1) - eh 546!$OMP END MASTER 547 if (i2.ne.j2) then 548!$OMP MASTER 549 ehfx = ehfx - eh 550 phfx = phfx - ph 551 dbl_mb(ehfx_orb(1,ms)+i2-1) 552 > = dbl_mb(ehfx_orb(1,ms)+i2-1) - eh 553!$OMP END MASTER 554 end if 555 556 if (localize_on.and.has_smallgrid) then 557 call pspw_hfx_localize_center_ovlp( 558 > int_mb(orbital_list(1,ms)+i2-1), 559 > int_mb(orbital_list(1,ms)+j2-1),center) 560 call D3dBs_cr_pfft3b_queuein_center(2,center, 561 > real_mb(vij(1))) 562 else 563 call D3dBs_cr_pfft3b_queuein(0,real_mb(vij(1))) 564 end if 565 566 end if 567 end if 568 569 k2 = k2 + 1 570 j2 = j2 + 1 571 if (j2.gt.i2) then 572 j2 = 1 573 i2 = i2 + 1 574 end if 575 end if 576 577 if ((D3dBs_cr_pfft3_queue_filled()).or.(k2.gt.NN)) then 578 579 !if (mod(k3,npj).eq.taskid_j) then 580 if (int_mb(kcompute(1)+k3-1).eq.taskid_j) then 581 if (pspw_hfx_localize_closeenough( 582 > int_mb(orbital_list(1,ms)+i3-1), 583 > int_mb(orbital_list(1,ms)+j3-1))) then 584 585 index1 =(int_mb(orbital_list(1,ms)+i3-1)-1)*n2ft3d+1 586 index2 =(int_mb(orbital_list(1,ms)+j3-1)-1)*n2ft3d+1 587 588 ss = pspw_hfx_localize_switchr( 589 > int_mb(orbital_list(1,ms)+i3-1), 590 > int_mb(orbital_list(1,ms)+j3-1)) 591 592 if (localize_on.and.has_smallgrid) then 593 call D3dBs_cr_pfft3b_queueout_center(2, 594 > real_mb(vij(1))) 595 else 596 call D3dBs_cr_pfft3b_queueout(0,real_mb(vij(1))) 597 end if 598 599 600* **** apply hfx_parameter **** 601 call D3dBs_r_SMul1(1,real(hfx_parameter*ss), 602 > real_mb(vij(1))) 603 604* **** generate (Vij)*psi_r *** 605 call D3dBs_rr_Mul(1,real_mb(vij(1)), 606 > psi_r(index2), 607 > real_mb(tmp1(1))) 608 call D3dBs_r_Zero_Ends(1,real_mb(tmp1(1))) 609 610* **** add -(Vij)*psi_r to Hpsi_r *** 611 call D3dBs_rr_Sub2(1,real_mb(tmp1(1)), 612 > Hpsi_r(index1)) 613 614 !**** include off diagonal terms **** 615 if (i3.ne.j3) then 616* **** generate (Vij)*psi_r *** 617 call D3dBs_rr_Mul(1,real_mb(vij(1)), 618 > psi_r(index1), 619 > real_mb(tmp2(1))) 620 call D3dBs_r_Zero_Ends(1,real_mb(tmp2(1))) 621 622* **** add -(Vij)*psi_r to Hpsi_r *** 623 call D3dBs_rr_Sub2(1,real_mb(tmp2(1)), 624 > Hpsi_r(index2)) 625 end if 626 end if 627 end if 628 629 k3 = k3 + 1 630 j3 = j3 + 1 631 if (j3.gt.i3) then 632 j3 = 1 633 i3 = i3 + 1 634 end if 635 end if 636 done = (k1.gt.NN).and.(k2.gt.NN).and.(k3.gt.NN) 637 end do !**** while **** 638 call Parallel_Vector_SumAll(norbs(ms), 639 > dbl_mb(ehfx_orb(1,ms))) 640 641 898 continue 642 end do !**** ms ***** 643 644* ***** free-space coulomb solver **** 645 else 646 k1 = 1 647 do ms=1,ispin0 648 do i=1,norbs(ms) 649!$OMP MASTER 650 dbl_mb(ehfx_orb(1,ms)+i-1) = 0.0d0 651!$OMP END MASTER 652 do j=1,i 653 if (mod(k1,npj).eq.taskid_j) then 654 if (pspw_hfx_localize_closeenough( 655 > int_mb(orbital_list(1,ms)+i-1), 656 > int_mb(orbital_list(1,ms)+j-1))) then 657 658 index1 = (int_mb(orbital_list(1,ms)+i-1)-1)*n2ft3d + 1 659 index2 = (int_mb(orbital_list(1,ms)+j-1)-1)*n2ft3d + 1 660 661 ss = pspw_hfx_localize_switchr( 662 > int_mb(orbital_list(1,ms)+i-1), 663 > int_mb(orbital_list(1,ms)+j-1)) 664 665* **** generate dnij for Vij **** 666 call D3dBs_rr_Mul(1,psi_r(index2),psi_r(index1), 667 > real_mb(dn(1))) 668 call D3dBs_r_SMul1(1,scal2,real_mb(dn(1))) 669 call D3dBs_r_Zero_Ends(1,real_mb(dn(1))) 670 671 call coulomb2_s_v(real_mb(dn(1)),real_mb(vij(1))) 672 call D3dBs_rr_idot(1,real_mb(dn(1)),real_mb(vij(1)),seh) 673 eh = dble(0.5*seh*dv) 674 675* **** apply hfx_parameter, double eh for restricted, and calculcate ph **** 676 eh = eh*hfx_parameter*ss 677 call D3dBs_r_SMul1(1,real(hfx_parameter*ss), 678 > real_mb(vij(1))) 679 if (ispin0.eq.1) eh = eh + eh 680 ph = 2.0d0*eh 681 682 683!$OMP MASTER 684 ehfx = ehfx - eh 685 phfx = phfx - ph 686 dbl_mb(ehfx_orb(1,ms)+i-1) =dbl_mb(ehfx_orb(1,ms)+i-1)-eh 687!$OMP END MASTER 688 689* **** generate (Vij)*psi_r *** 690 call D3dBs_rr_Mul(1,real_mb(vij(1)), 691 > psi_r(index2), 692 > real_mb(tmp1(1))) 693 call D3dBs_r_Zero_Ends(1,real_mb(tmp1(1))) 694 695 696* **** add -(Vij)*psi_r to Hpsi_r *** 697 call D3dBs_rr_Sub2(1,real_mb(tmp1(1)),Hpsi_r(index1)) 698 699 !**** include off diagonal terms **** 700 if (i.ne.j) then 701!$OMP MASTER 702 ehfx = ehfx - eh 703 phfx = phfx - ph 704 dbl_mb(ehfx_orb(1,ms)+i-1) = dbl_mb(ehfx_orb(1,ms)+i-1) 705 > - eh 706!$OMP END MASTER 707* **** generate (Vij)*psi_r *** 708 call D3dBs_rr_Mul(1,real_mb(vij(1)), 709 > psi_r(index1), 710 > real_mb(tmp2(1))) 711 call D3dBs_r_Zero_Ends(1,real_mb(tmp2(1))) 712 713* **** add -(Vij)*psi_r to Hpsi_r *** 714 call D3dBs_rr_Sub2(1,real_mb(tmp2(1)), 715 > Hpsi_r(index2)) 716 end if 717 end if 718 end if 719 k1 = k1 + 1 720 end do 721 end do 722 call Parallel_Vector_SumAll(norbs(ms), 723 > dbl_mb(ehfx_orb(1,ms))) 724 end do 725 726 end if 727 728 value = BA_pop_stack(kcompute(2)) 729 value = value.and.BA_pop_stack(tmp2(2)) 730 value = value.and.BA_pop_stack(tmp1(2)) 731 value = value.and.BA_pop_stack(vij(2)) 732 value = value.and.BA_pop_stack(dn(2)) 733 if (.not. value) 734 > call errquit('pspw_potential_HFX:popping stack memory',0, 735 & MA_ERR) 736 737 call Parallel_SumAll(ehfx) 738 call Parallel_SumAll(phfx) 739 740 end if 741 742 return 743 end 744 745 746* ***************************** 747* * * 748* * pspw_energy_sHFX_sub * 749* * * 750* ***************************** 751 subroutine pspw_energy_sHFX_sub(ispin0,psi_r,ehfx_out,phfx_out) 752 implicit none 753 integer ispin0 754 real psi_r(*) 755 real*8 ehfx_out 756 real*8 phfx_out 757 758#include "bafdecls.fh" 759#include "pspw_hfx.fh" 760#include "errquit.fh" 761 762* **** local variables **** 763 logical value 764 integer i,j,n1,n2,n3,ms,k1 765 integer dn(2),tmp1(2),index1,index2 766 real scal1,scal2,dv,seh 767 real*8 eh,ph,ss 768 769* **** external functions **** 770 real*8 lattice_omega 771 external lattice_omega 772 logical pspw_hfx_localize_closeenough 773 external pspw_hfx_localize_closeenough 774 real*8 pspw_hfx_localize_switchr 775 external pspw_hfx_localize_switchr 776 real coulomb_screened_s_e 777 external coulomb_screened_s_e 778 779 780 if (((norbs(1)+norbs(2)).ne.0).and.(.not.relaxed)) then 781!$OMP MASTER 782 ehfx = 0.0d0 783 phfx = 0.0d0 784!$OMP END MASTER 785 786 call D3dB_nx(1,n1) 787 call D3dB_ny(1,n2) 788 call D3dB_nz(1,n3) 789 !call D3dB_n2ft3d(1,n2ft3d) 790 value = BA_push_get(mt_real,(2*n2ft3d),'dn_hfx',dn(2),dn(1)) 791 value = value.and. 792 > BA_push_get(mt_real,(n2ft3d),'tmp1_hfx',tmp1(2),tmp1(1)) 793 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 794 call Parallel_shared_vector_szero(.false., 795 > 2*n2ft3d,real_mb(dn(1))) 796 call Parallel_shared_vector_szero(.true., 797 > n2ft3d,real_mb(tmp1(1))) 798 799 scal1 = real(1.0d0/dble(n1*n2*n3)) 800 scal2 = real(1.0d0/lattice_omega()) 801 dv = scal1/scal2 802 803 k1 = 1 804 do ms=1,ispin 805 do i=1,norbs(ms) 806!$OMP MASTER 807 dbl_mb(ehfx_orb(1,ms)+i-1) = 0.0d0 808!$OMP END MASTER 809 do j=1,i 810 811 if (mod(k1,npj).eq.taskid_j) then 812 if (pspw_hfx_localize_closeenough( 813 > int_mb(orbital_list(1,ms)+i-1), 814 > int_mb(orbital_list(1,ms)+j-1))) then 815 816 index1 = (int_mb(orbital_list(1,ms)+i-1)-1)*n2ft3d + 1 817 index2 = (int_mb(orbital_list(1,ms)+j-1)-1)*n2ft3d + 1 818 819 ss = pspw_hfx_localize_switchr( 820 > int_mb(orbital_list(1,ms)+i-1), 821 > int_mb(orbital_list(1,ms)+j-1)) 822 823 824* **** generate dnij **** 825 call D3dBs_rr_Mul(1,psi_r(index1),psi_r(index2), 826 > real_mb(dn(1))) 827 call D3dBs_r_SMul1(1,scal2,real_mb(dn(1))) 828 call D3dBs_r_Zero_Ends(1,real_mb(dn(1))) 829 830* ***** screened coulomb solver **** 831 if (solver_type.eq.1) then 832 833* **** generate dng **** 834 call D3dBs_r_SMul1(1,scal1,real_mb(dn(1))) 835 call D3dBs_rc_pfft3f(1,0,real_mb(dn(1))) 836 call Packs_c_pack(0,real_mb(dn(1))) 837 838* **** get Ecoul energy **** 839 eh = dble(coulomb_screened_s_e(real_mb(dn(1)))) 840 841* ***** free-space coulomb solver **** 842 else 843 call coulomb2_s_v(real_mb(dn(1)),real_mb(tmp1(1))) 844 call D3dBs_rr_dot(1,real_mb(dn(1)), 845 > real_mb(tmp1(1)),seh) 846 eh = dble(0.5*seh*dv) 847 end if 848 849* **** apply hfx_parameter, double eh for restricted, and calculcate ph **** 850 eh = eh*hfx_parameter*ss 851 if (ispin0.eq.1) eh = eh + eh 852 ph = 2.0d0*eh 853 854!$OMP MASTER 855 ehfx = ehfx - eh 856 phfx = phfx - ph 857 dbl_mb(ehfx_orb(1,ms)+i-1) = dbl_mb(ehfx_orb(1,ms)+i-1)-eh 858!$OMP END MASTER 859 860 !**** include off diagonal terms **** 861 if (i.ne.j) then 862!$OMP MASTER 863 ehfx = ehfx - eh 864 phfx = phfx - ph 865 dbl_mb(ehfx_orb(1,ms)+i-1) = dbl_mb(ehfx_orb(1,ms)+i-1) 866 > - eh 867!$OMP END MASTER 868 end if 869 870 end if 871 end if 872 k1 = k1 + 1 873 874 end do 875 end do 876 call D1dB_Vector_SumAll(norbs(ms),dbl_mb(ehfx_orb(1,ms))) 877 end do 878 879 value = BA_pop_stack(tmp1(2)) 880 value = value.and.BA_pop_stack(dn(2)) 881 if (.not. value) 882 > call errquit('pspw_energy_HFX_sub:popping stack memory',0, 883 & MA_ERR) 884 885 call D1dB_SumAll(ehfx) 886 call D1dB_SumAll(phfx) 887 end if 888 ehfx_out = ehfx 889 phfx_out = phfx 890 891 return 892 end 893 894 895 896 897* ************************************ 898* * * 899* * pspw_potential_HFX_orb_sub * 900* * * 901* ************************************ 902* 903* Note that orb_r and Horb_r are assumed to be replicated rather than psi_r 904* orb_r is not replicated in this routine 905* Horb_r is not reduced in this routine 906* 907 subroutine pspw_potential_sHFX_orb_sub(ms,psi_r, 908 > orb_r,Horb_r) 909 implicit none 910 integer ms 911 real psi_r(*) 912 real orb_r(*) 913 real Horb_r(*) 914 915#include "bafdecls.fh" 916#include "pspw_hfx.fh" 917#include "errquit.fh" 918 919* **** local variables **** 920 logical value 921 integer j,n1,n2,n3,q2,p2 922 integer dn(2),vij(2),tmp1(2),tmp2(2),index2 923 real scal1,scal2,dv,seh 924 real*8 eh,ph 925 926* **** external functions **** 927 real*8 lattice_omega 928 external lattice_omega 929 real coulomb_screened_s_e 930 external coulomb_screened_s_e 931 932 if (((norbs(1)+norbs(2)).ne.0).and.relaxed) then 933 call D3dB_nx(1,n1) 934 call D3dB_ny(1,n2) 935 call D3dB_nz(1,n3) 936 !call D3dB_n2ft3d(1,n2ft3d) 937 value = BA_push_get(mt_real,(n2ft3d),'dn_hfx',dn(2),dn(1)) 938 value = value.and. 939 > BA_push_get(mt_real,(n2ft3d),'vij_hfx',vij(2),vij(1)) 940 value = value.and. 941 > BA_push_get(mt_real,(n2ft3d),'tmp1_hfx',tmp1(2),tmp1(1)) 942 value = value.and. 943 > BA_push_get(mt_real,(n2ft3d),'tmp2_hfx',tmp2(2),tmp2(1)) 944 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 945 call Parallel_shared_vector_szero(.false.,n2ft3d, 946 > real_mb(dn(1))) 947 call Parallel_shared_vector_szero(.false.,n2ft3d, 948 > real_mb(vij(1))) 949 call Parallel_shared_vector_szero(.false.,n2ft3d, 950 > real_mb(tmp1(1))) 951 call Parallel_shared_vector_szero(.true.,n2ft3d, 952 > real_mb(tmp2(1))) 953 954 scal1 = real(1.0d0/dble(n1*n2*n3)) 955 scal2 = real(1.0d0/lattice_omega()) 956 dv = scal1/scal2 957 958 do j=1,norbs(ms) 959 call Dneall_ntoqp(int_mb(orbital_list(1,ms)+j-1),q2,p2) 960 index2 = (q2-1)*n2ft3d + 1 961 962 if (p2.eq.taskid_j) then 963* **** generate dnij for Vij **** 964 call D3dBs_rr_Mul(1,psi_r(index2),orb_r,real_mb(dn(1))) 965 call D3dBs_r_SMul1(1,scal2,real_mb(dn(1))) 966 call D3dBs_r_Zero_Ends(1,real_mb(dn(1))) 967 968* ***** screened coulomb solver **** 969 if (solver_type.eq.1) then 970 call D3dBs_r_SMul1(1,scal1,real_mb(dn(1))) 971 call D3dBs_rc_pfft3f(1,0,real_mb(dn(1))) 972 call Packs_c_pack(0,real_mb(dn(1))) 973 974* **** get Ecoul energy **** 975 eh = dble(coulomb_screened_s_e(real_mb(dn(1)))) 976 977* **** generate Vcoul **** 978 call coulomb_screened_s_v(real_mb(dn(1)), 979 > real_mb(vij(1))) 980 call Packs_c_unpack(0,real_mb(vij(1))) 981 call D3dBs_cr_pfft3b(1,0,real_mb(vij(1))) 982 983* ***** free-space coulomb solver **** 984 else 985 call coulomb2_s_v(real_mb(dn(1)),real_mb(vij(1))) 986 call D3dBs_rr_dot(1,real_mb(dn(1)), 987 > real_mb(vij(1)),seh) 988 eh = dble(0.5*seh*dv) 989 end if 990 991* **** apply hfx_parameter, double eh for restricted, and calculcate ph **** 992 eh = eh*hfx_parameter 993 call D3dBs_r_SMul1(1,hfx_parameter,real_mb(vij(1))) 994 if (ispin.eq.1) eh = eh + eh 995 ph = 2.0d0*eh 996 997 998* **** generate (Vij)*psi_r *** 999 call D3dBs_rr_Mul(1,real_mb(vij(1)), 1000 > psi_r(index2), 1001 > real_mb(tmp1(1))) 1002 call D3dBs_r_Zero_Ends(1,real_mb(tmp1(1))) 1003 1004* **** add -(Vij)*psi_r to Hpsi_r *** 1005 call D3dBs_rr_Sub2(1,real_mb(tmp1(1)),Horb_r) 1006 end if 1007 end do 1008 value = BA_pop_stack(tmp2(2)) 1009 value = value.and.BA_pop_stack(tmp1(2)) 1010 value = value.and.BA_pop_stack(vij(2)) 1011 value = value.and.BA_pop_stack(dn(2)) 1012 if (.not. value) 1013 > call errquit('pspw_potential_sHFX_orb_sub:popping stack memory', 1014 > 0,MA_ERR) 1015 1016c **** eh and ph not used yet **** 1017c call D1dB_SumAll(eh) 1018c call D1dB_SumAll(ph) 1019 end if 1020 return 1021 end 1022 1023 1024 1025* *********************************** 1026* * * 1027* * pspw_potential_HFX_sub2 * 1028* * * 1029* *********************************** 1030* 1031* This routine is a kernel for computing exact exchange. 1032* 1033* for i=istart:iend 1034* for j=jstart:jend 1035* dnij(*) = psi_r(*,j) .* psi_r(*,i) 1036* Vij(*) = Coulomb operator(dnij(*)) 1037* Hpsi_r(*,i) = Vij(*) .* psi_r(*,j) 1038* Hpsi_r(*,j) = Vij(*) .* psi_r(*,i) 1039* ehfx += 0.5*<psi_r(*,i)|Hpsi(*,i)> 1040* + 0.5*<psi_r(*,j)|Hpsi(*,j)> 1041* 1042* Entry - solver_type: if solver_type==1 then periodic solver, else aperiodic solver 1043* istart,iend: indexes 1044* jstart,jend: indexes 1045* imodn,imodtask: used to define which (i,j) combinations are computed. 1046* n2ft3d: size of realspace grid 1047* psi_r: wavenfucntions in realspace. 1048* ehfx: running sum of exchange energy, not initialized in this routine 1049* 1050* Exit - Hpsi_r: wavefunction gradients in realspace. 1051* ehfx: running sum of exchange energy. 1052 1053 subroutine pspw_potential_sHFX_sub2(solver_type, 1054 > istart,iend, 1055 > jstart,jend, 1056 > imodn,imodtask, 1057 > n2ft3d,psi_r,Hpsi_r, 1058 > ehfx) 1059 implicit none 1060 integer solver_type 1061 integer istart,iend,jstart,jend 1062 integer imodn,imodtask 1063 integer n2ft3d 1064 real psi_r(n2ft3d,*) 1065 real Hpsi_r(n2ft3d,*) 1066 real*8 ehfx 1067 1068#include "bafdecls.fh" 1069#include "errquit.fh" 1070 1071 integer taskid_j 1072 1073* **** local variables **** 1074 logical value,done,special 1075 integer n1,n2,n3 1076 integer dn(2),vij(2),tmp1(2) 1077 integer i1,j1,k1,NN 1078 integer i2,j2,k2 1079 integer i3,j3,k3 1080 real scal1,scal2,dv,seh 1081 real*8 eh,ph 1082 1083* **** external functions **** 1084 real*8 lattice_omega 1085 external lattice_omega 1086 logical D3dBs_rc_pfft3_queue_filled,D3dBs_cr_pfft3_queue_filled 1087 external D3dBs_rc_pfft3_queue_filled,D3dBs_cr_pfft3_queue_filled 1088 real icoulomb_screened_s_e 1089 external icoulomb_screened_s_e 1090 1091 call Parallel2d_taskid_j(taskid_j) 1092 1093 special = ((istart.eq.jstart).and.(iend.eq.jend)) 1094 1095 call D3dB_nx(1,n1) 1096 call D3dB_ny(1,n2) 1097 call D3dB_nz(1,n3) 1098 value = BA_push_get(mt_real,(n2ft3d),'dn_hfx',dn(2),dn(1)) 1099 value = value.and. 1100 > BA_push_get(mt_real,(n2ft3d),'vij_hfx',vij(2),vij(1)) 1101 value = value.and. 1102 > BA_push_get(mt_real,(n2ft3d),'tmp1_hfx',tmp1(2),tmp1(1)) 1103 if (.not. value) 1104 > call errquit('pspw_potential_sHFX_sub:out of stack',0,MA_ERR) 1105 call Parallel_shared_vector_szero(.false., 1106 > n2ft3d,real_mb(dn(1))) 1107 call Parallel_shared_vector_szero(.false., 1108 > n2ft3d,real_mb(vij(1))) 1109 call Parallel_shared_vector_szero(.true., 1110 > n2ft3d,real_mb(tmp1(1))) 1111 1112 scal1 = real(1.0d0/dble(n1*n2*n3)) 1113 scal2 = real(1.0d0/lattice_omega()) 1114 dv = scal1/scal2 1115 1116* *** special if i and j span the same indexes *** 1117 if (special) then 1118 NN = (iend-istart+1)*(jend-jstart+2)/2 1119 else 1120 NN = (iend-istart+1)*(jend-jstart+1) 1121 end if 1122 1123* ***** screened coulomb solver **** 1124 if (solver_type.eq.1) then 1125 i1 = istart 1126 j1 = jstart 1127 k1 = 1 1128 1129 i2 = istart 1130 j2 = jstart 1131 k2 = 1 1132 1133 i3 = istart 1134 j3 = jstart 1135 k3 = 1 1136 done = .false. 1137 do while (.not.done) 1138 1139* *** pipeline step 1 *** 1140 if (k1.le.NN) then 1141 1142 if (mod(k1,imodn).eq.imodtask) then 1143 1144* **** generate dnij for Vij **** 1145 call D3dBs_rr_Mul(1,psi_r(1,j1), 1146 > psi_r(1,i1),real_mb(dn(1))) 1147 call D3dBs_r_SMul1(1,scal2*scal1,real_mb(dn(1))) 1148 call D3dBs_r_Zero_Ends(1,real_mb(dn(1))) 1149 call D3dBs_rc_pfft3f_queuein(0,real_mb(dn(1))) 1150 1151 end if 1152 1153 k1 = k1 + 1 1154 j1 = j1 + 1 1155 if (special) then 1156 if (j1.gt.i1) then 1157 j1 = jstart 1158 i1 = i1 + 1 1159 end if 1160 else 1161 if (j1.gt.jend) then 1162 j1 = jstart 1163 i1 = i1 + 1 1164 end if 1165 end if 1166 end if 1167 1168* *** pipeline step 2 *** 1169 if ( ((D3dBs_rc_pfft3_queue_filled()).or.(k1.gt.NN)) 1170 > .and.(k2.le.NN)) then 1171 1172 if (mod(k2,imodn).eq.imodtask) then 1173 call D3dBs_rc_pfft3f_queueout(0,real_mb(dn(1))) 1174 1175* **** generate Vcoul **** 1176 eh = dble(icoulomb_screened_s_e(real_mb(dn(1)))) 1177 call coulomb_screened_s_v(real_mb(dn(1)), 1178 > real_mb(vij(1))) 1179 1180 1181* **** calculcate ph **** 1182!OMP MASTER 1183 ehfx = ehfx - eh 1184!OMP END MASTER 1185 1186* **** include transpose *** 1187 if ((i2.ne.j2).or.(.not.special)) then 1188!OMP MASTER 1189 ehfx = ehfx - eh 1190!OMP END MASTER 1191 end if 1192 1193 call D3dBs_cr_pfft3b_queuein(0,real_mb(vij(1))) 1194 end if 1195 1196 k2 = k2 + 1 1197 j2 = j2 + 1 1198 if (special) then 1199 if (j2.gt.i2) then 1200 j2 = jstart 1201 i2 = i2 + 1 1202 end if 1203 else 1204 if (j2.gt.jend) then 1205 j2 = jstart 1206 i2 = i2 + 1 1207 end if 1208 end if 1209 1210 end if 1211 1212* *** pipeline step 3 *** 1213 if ((D3dBs_cr_pfft3_queue_filled()).or.(k2.gt.NN)) then 1214 1215 if (mod(k3,imodn).eq.imodtask) then 1216 call D3dBs_cr_pfft3b_queueout(0,real_mb(vij(1))) 1217 1218* **** generate (Vij)*psi_r *** 1219 call D3dBs_rr_Mul(1,real_mb(vij(1)), 1220 > psi_r(1,j3), 1221 > real_mb(tmp1(1))) 1222 call D3dBs_r_Zero_Ends(1,real_mb(tmp1(1))) 1223 1224* **** add -(Vij)*psi_r to Hpsi_r *** 1225 call D3dBs_rr_Sub2(1,real_mb(tmp1(1)),Hpsi_r(1,i3)) 1226 1227 !**** include transpose **** 1228 if ((i3.ne.j3).or.(.not.special)) then 1229 1230* **** generate (Vij)*psi_r *** 1231 call D3dBs_rr_Mul(1,real_mb(vij(1)), 1232 > psi_r(1,i3), 1233 > real_mb(tmp1(1))) 1234 call D3dBs_r_Zero_Ends(1,real_mb(tmp1(1))) 1235 1236* **** add -(Vij)*psi_r to Hpsi_r *** 1237 call D3dBs_rr_Sub2(1,real_mb(tmp1(1)), 1238 > Hpsi_r(1,j3)) 1239 end if 1240 endif 1241 1242 k3 = k3 + 1 1243 j3 = j3 + 1 1244 if (special) then 1245 if (j3.gt.i3) then 1246 j3 = jstart 1247 i3 = i3 + 1 1248 end if 1249 else 1250 if (j3.gt.jend) then 1251 j3 = jstart 1252 i3 = i3 + 1 1253 end if 1254 end if 1255 1256 end if 1257 done = (k1.gt.NN).and.(k2.gt.NN).and.(k3.gt.NN) 1258 end do !**** while **** 1259 1260 1261* ***** free-space coulomb solver -- not pipelined **** 1262 else 1263 k1 = 1 1264 i1 = istart 1265 j1 = jstart 1266 done = .false. 1267 do while (.not.done) 1268 1269 if (mod(k1,imodn).eq.imodtask) then 1270 1271* **** generate dnij for Vij **** 1272 call D3dBs_rr_Mul(1,psi_r(1,j1),psi_r(1,i1), 1273 > real_mb(dn(1))) 1274 call D3dBs_r_SMul1(1,scal2,real_mb(dn(1))) 1275 call D3dBs_r_Zero_Ends(1,real_mb(dn(1))) 1276 1277 call coulomb2_s_v(real_mb(dn(1)),real_mb(vij(1))) 1278 call D3dBs_rr_idot(1,real_mb(dn(1)), 1279 > real_mb(vij(1)),seh) 1280 eh = dble(0.5*seh*dv) 1281 1282* **** calculcate ph **** 1283!OMP MASTER 1284 ehfx = ehfx - eh 1285!OMP END MASTER 1286 1287* **** generate (Vij)*psi_r *** 1288 call D3dBs_rr_Mul(1,real_mb(vij(1)), 1289 > psi_r(1,j1), 1290 > real_mb(tmp1(1))) 1291 call D3dBs_r_Zero_Ends(1,real_mb(tmp1(1))) 1292 1293* **** add -(Vij)*psi_r to Hpsi_r *** 1294 call D3dBs_rr_Sub2(1,real_mb(tmp1(1)),Hpsi_r(1,i1)) 1295 1296 !**** include transpose terms **** 1297 if ((i1.ne.j1).or.(.not.special)) then 1298!OMP MASTER 1299 ehfx = ehfx - eh 1300!OMP END MASTER 1301 1302* **** generate (Vij)*psi_r *** 1303 call D3dBs_rr_Mul(1,real_mb(vij(1)), 1304 > psi_r(1,i1), 1305 > real_mb(tmp1(1))) 1306 call D3dBs_r_Zero_Ends(1,real_mb(tmp1(1))) 1307 1308* **** add -(Vij)*psi_r to Hpsi_r *** 1309 call D3dBs_rr_Sub2(1,real_mb(tmp1(1)), 1310 > Hpsi_r(1,j1)) 1311 end if 1312 1313 end if 1314 1315 k1 = k1 + 1 1316 j1 = j1 + 1 1317 if (special) then 1318 if (j1.gt.i1) then 1319 j1 = jstart 1320 i1 = i1 + 1 1321 end if 1322 else 1323 if (j1.gt.jend) then 1324 j1 = jstart 1325 i1 = i1 + 1 1326 end if 1327 end if 1328 done = (k1.gt.NN) 1329 end do 1330 1331 end if 1332 1333 !**** deallocate memory **** 1334 value = BA_pop_stack(tmp1(2)) 1335 value = value.and.BA_pop_stack(vij(2)) 1336 value = value.and.BA_pop_stack(dn(2)) 1337 if (.not. value) call errquit( 1338 > 'pspw_potential_sHFX_sub2:popping stack memory',0,MA_ERR) 1339 1340 return 1341 end 1342 1343 1344 1345* ******************************************* 1346* * * 1347* * pspw_potential_sHFX_orb_replicated * 1348* * * 1349* ******************************************* 1350 subroutine pspw_potential_sHFX_orb_replicated(ms,psi_r, 1351 > orb_r,Horb_r) 1352 implicit none 1353 integer ms 1354 real*8 psi_r(*) 1355 real*8 orb_r(*) 1356 real*8 Horb_r(*) 1357 1358#include "bafdecls.fh" 1359#include "pspw_hfx.fh" 1360#include "errquit.fh" 1361 1362 1363 call nwpw_timing_start(33) 1364 if ((norbs(ms).ne.0).and.relaxed) then 1365 if (replicated) then 1366 call Parallel_shared_vector_scopy(.true.,n2ft3d, 1367 > real_mb(Hpsi_r_replicated(1))) 1368 call pspw_potential_HFX_orb_sub(ms,psi_r,orb_r, 1369 > real_mb(Hpsi_r_replicated(1))) 1370 call D1dBs_Vector_SumAll(n2ft3d, 1371 > real_mb(Hpsi_r_replicated(1))) 1372 call SAXPY_OMP(n2ft3d,1.0d0,real_mb(Hpsi_r_replicated(1)),1, 1373 > Horb_r,1) 1374 1375 else 1376 call pspw_potential_sHFX_orb_sub(ms,psi_r,orb_r,Horb_r) 1377 end if 1378 1379 end if 1380 call nwpw_timing_end(33) 1381 return 1382 end 1383 1384 1385 1386 1387 1388 1389* ***** routines below used for pspw_et calculations **** 1390 1391 1392* ************************* 1393* * * 1394* * pspw_energy_sHFX2 * 1395* * * 1396* ************************* 1397 subroutine pspw_energy_sHFX2(ispin0,psi1_r,psi2_r, 1398 > ehfx_out,phfx_out) 1399 implicit none 1400 integer ispin0 1401 real psi1_r(*),psi2_r(*) 1402 real*8 ehfx_out 1403 real*8 phfx_out 1404 1405#include "bafdecls.fh" 1406#include "pspw_hfx.fh" 1407#include "errquit.fh" 1408 1409 integer q,n,indx1,indx2 1410 1411 call nwpw_timing_start(33) 1412 1413c **** calculate HFX energy **** 1414 if ((norbs(1)+norbs(2)).ne.0) then 1415 1416 if (replicated) then 1417 call Parallel_shared_vector_szero(.false.,nrsize, 1418 > real_mb(psi_r_replicated(1))) 1419 call Parallel_shared_vector_szero(.true.,nrsize, 1420 > real_mb(Hpsi_r_replicated(1))) 1421 do q=1,neqall 1422 call Dneall_qton(q,n) 1423 indx1 = (q-1)*n2ft3d + 1 1424 indx2 = psi_r_replicated(1)+(n-1)*n2ft3d 1425 call Parallel_shared_vector_scopy(.false.,n2ft3d, 1426 > psi1_r(indx1),real_mb(indx2)) 1427 1428 indx2 = Hpsi_r_replicated(1)+(n-1)*n2ft3d 1429 call Parallel_shared_vector_scopy(.true.,n2ft3d, 1430 > psi2_r(indx1),real_mb(indx2)) 1431 end do 1432 call D1dBs_Vector_SumAll(nrsize, 1433 > real_mb(psi_r_replicated(1))) 1434 call D1dBs_Vector_SumAll(nrsize, 1435 > real_mb(Hpsi_r_replicated(1))) 1436 call pspw_energy_sHFX2_sub(ispin0, 1437 > real_mb(psi_r_replicated(1)), 1438 > real_mb(Hpsi_r_replicated(1)), 1439 > ehfx_out,phfx_out) 1440 1441 else 1442 call pspw_energy_sHFX2_sub(ispin0,psi1_r,psi2_r, 1443 > ehfx_out,phfx_out) 1444 end if 1445 1446c **** nothing to do **** 1447 else 1448 ehfx_out = ehfx 1449 phfx_out = phfx 1450 end if 1451 call nwpw_timing_end(33) 1452 1453 return 1454 end 1455 1456 1457 1458* ***************************** 1459* * * 1460* * pspw_energy_sHFX2_sub * 1461* * * 1462* ***************************** 1463 subroutine pspw_energy_sHFX2_sub(ispin0,psi1_r,psi2_r, 1464 > ehfx_out,phfx_out) 1465 implicit none 1466 integer ispin0 1467 real psi1_r(*) 1468 real psi2_r(*) 1469 real*8 ehfx_out 1470 real*8 phfx_out 1471 1472#include "bafdecls.fh" 1473#include "pspw_hfx.fh" 1474#include "errquit.fh" 1475 1476* **** local variables **** 1477 logical value 1478 integer i,j,n1,n2,n3,ms,k1 1479 integer dn(2),tmp1(2),index1,index2 1480 real scal1,scal2,dv,seh 1481 real*8 eh,ph 1482 1483* **** external functions **** 1484 real*8 lattice_omega,coulomb_screened_s_e 1485 external lattice_omega,coulomb_screened_s_e 1486 1487 integer taskid 1488 1489 call Parallel_taskid(taskid) 1490 1491 1492 if ((norbs(1)+norbs(2)).ne.0) then 1493!OMP MASTER 1494 ehfx = 0.0d0 1495 phfx = 0.0d0 1496!OMP END MASTER 1497 1498 call D3dB_nx(1,n1) 1499 call D3dB_ny(1,n2) 1500 call D3dB_nz(1,n3) 1501 !call D3dB_n2ft3d(1,n2ft3d) 1502 value = BA_push_get(mt_real,(2*n2ft3d),'dn_hfx',dn(2),dn(1)) 1503 value = value.and. 1504 > BA_push_get(mt_real,(n2ft3d),'tmp1_hfx',tmp1(2),tmp1(1)) 1505 if (.not. value) call errquit('out of stack memory',0, MA_ERR) 1506 call Parallel_shared_vector_szero(.false., 1507 > 2*n2ft3d,real_mb(dn(1)),1) 1508 call Parallel_shared_vector_szero(.true., 1509 > n2ft3d,real_mb(tmp1(1)),1) 1510 1511 scal1 = real(1.0d0/dble(n1*n2*n3)) 1512 scal2 = real(1.0d0/lattice_omega()) 1513 dv = scal1/scal2 1514 1515 k1 = 1 1516 do ms=1,ispin 1517 do i=1,norbs(ms) 1518!OMP MASTER 1519 dbl_mb(ehfx_orb(1,ms)+i-1) = 0.0d0 1520!OMP END MASTER 1521 do j=1,i 1522 1523 if (mod(k1,npj).eq.taskid_j) then 1524 index1 = (int_mb(orbital_list(1,ms)+i-1)-1)*n2ft3d + 1 1525 index2 = (int_mb(orbital_list(1,ms)+j-1)-1)*n2ft3d + 1 1526 1527* **** generate dnij **** 1528 call D3dBs_rr_Mul(1,psi1_r(index1),psi2_r(index2), 1529 > real_mb(dn(1))) 1530 call D3dBs_r_SMul1(1,scal2,real_mb(dn(1))) 1531 call D3dBs_r_Zero_Ends(1,real_mb(dn(1))) 1532 1533* **** generate dnji**** 1534 call D3dBs_rr_Mul(1,psi1_r(index2),psi2_r(index1), 1535 > real_mb(dn(1)+n2ft3d)) 1536 call D3dBs_r_SMul1(1,scal2,real_mb(dn(1)+n2ft3d)) 1537 call D3dBs_r_Zero_Ends(1,real_mb(dn(1)+n2ft3d)) 1538 1539* ***** screened coulomb solver **** 1540 if (solver_type.eq.1) then 1541 1542* **** generate dng **** 1543 call D3dBs_r_SMul1(1,scal1,real_mb(dn(1))) 1544 call D3dBs_rc_pfft3f(1,0,real_mb(dn(1))) 1545 call Packs_c_pack(0,real_mb(dn(1))) 1546 1547 call D3dB_r_SMul1(1,scal1,real_mb(dn(1)+n2ft3d)) 1548 call D3dB_rc_pfft3f(1,0,real_mb(dn(1)+n2ft3d)) 1549 call Packs_c_pack(0,real_mb(dn(1)+n2ft3d)) 1550 1551* **** get Ecoul energy **** 1552 call coulomb_screened_s_v(real_mb(dn(1)), 1553 > real_mb(tmp1(1))) 1554 call Packs_cc_dot(0,real_mb(dn(1)+n2ft3d), 1555 > real_mb(tmp1(1)),seh) 1556 eh = dble(0.5*seh*lattice_omega()) 1557 1558* ***** free-space coulomb solver **** 1559 else 1560 call coulomb2_s_v(real_mb(dn(1)),real_mb(tmp1(1))) 1561 call D3dBs_rr_dot(1,real_mb(dn(1)+n2ft3d), 1562 > real_mb(tmp1(1)),seh) 1563 eh = dble(0.5*seh*dv) 1564 1565 !if (taskid.eq.0) write(*,*) " - ms,i,j,ehfx=",ms,i,j,eh 1566 1567 end if 1568 1569* **** apply hfx_parameter, double eh for restricted, and calculcate ph **** 1570 eh = eh*hfx_parameter 1571 if (ispin0.eq.1) eh = eh + eh 1572 ph = 2.0d0*eh 1573 1574!OMP MASTER 1575 ehfx = ehfx - eh 1576 phfx = phfx - ph 1577 dbl_mb(ehfx_orb(1,ms)+i-1) = dbl_mb(ehfx_orb(1,ms)+i-1)-eh 1578!OMP END MASTER 1579 1580 !**** include off diagonal terms **** 1581 if (i.ne.j) then 1582!OMP MASTER 1583 ehfx = ehfx - eh 1584 phfx = phfx - ph 1585 dbl_mb(ehfx_orb(1,ms)+i-1) = dbl_mb(ehfx_orb(1,ms)+i-1) 1586 > - eh 1587!OMP END MASTER 1588 end if 1589 1590 end if 1591 k1 = k1 + 1 1592 1593 end do 1594 end do 1595 call D1dB_Vector_SumAll(norbs(ms),dbl_mb(ehfx_orb(1,ms))) 1596 end do 1597 1598 value = BA_pop_stack(tmp1(2)) 1599 value = value.and.BA_pop_stack(dn(2)) 1600 if (.not. value) 1601 > call errquit('pspw_energy_sHFX2_sub:popping stack memory',0, 1602 > MA_ERR) 1603 1604 call D1dB_SumAll(ehfx) 1605 call D1dB_SumAll(phfx) 1606 end if 1607 ehfx_out = ehfx 1608 phfx_out = phfx 1609 1610 return 1611 end 1612 1613 1614 1615 1616