1SUBROUTINE exx_gs(nfi, c) 2 !======================================================================================= 3 ! Code Version 1.0 (Princeton University, September 2014) 4 !======================================================================================= 5 ! Note: From this code exx_potential is returned after multiplying mixing parameter exxalfa. 6 ! Later the exx_potential is added with GGA potential in forces.f90. 7 ! In the future, full exx_potential should be returned and the mixing parameter exxalfa 8 ! should be multiplied in forces.f90. 9 !======================================================================================= 10 ! 11 USE kinds, ONLY : DP 12 USE constants, ONLY : fpi 13 USE fft_base, ONLY : dffts, dfftp 14 USE mp, ONLY : mp_barrier, mp_sum 15 USE mp_global, ONLY : nproc_image, me_image, root_image, intra_image_comm, intra_bgrp_comm, me_bgrp 16 USE parallel_include 17 USE io_global, ONLY : stdout 18 USE cell_base, ONLY : omega, ainv, h 19 USE cell_base, ONLY : ibrav 20 USE cell_base, ONLY : isotropic !True if volume option is chosen for cell_dofree 21 USE electrons_base, ONLY : nbsp, nbspx, nspin 22 USE gvecw, ONLY : ngw 23 USE wannier_module, ONLY : wfc 24 USE exx_module, ONLY : my_nbspx, my_nbsp, my_nxyz, index_my_nbsp,rk_of_obtl, lindex_of_obtl 25 USE exx_module, ONLY : selfv, pairv, pair_dist 26 USE exx_module, ONLY : pair_label, pair_status, pair_step 27 USE exx_module, ONLY : exx_potential 28 USE exx_module, ONLY : n_exx, sc_fac ! EXX step and the performance scaling factor 29 USE exx_module, ONLY : coe_1st_derv, coeke, nord1, nord2, fornberg 30 USE exx_module, ONLY : thdtood, odtothd_in_sp, thdtood_in_sp 31 USE exx_module, ONLY : np_in_sp_s , np_in_sp_me_s , np_in_sp_p , np_in_sp_me_p 32 USE exx_module, ONLY : xx_in_sp,yy_in_sp,zz_in_sp,sc_xx_in_sp,sc_yy_in_sp,sc_zz_in_sp 33 USE energies, ONLY : exx 34 USE printout_base, ONLY : printout_base_open, printout_base_unit, printout_base_close 35 USE wannier_base, ONLY : neigh, dis_cutoff, texx_cube 36 USE mp_wave, ONLY : redistwfr 37 ! 38 USE time_step, ONLY : tps !md time in picoseconds 39 USE io_global, ONLY : ionode !logical for I/O node 40 USE cp_main_variables, ONLY : iprint_stdout !print control 41 USE printout_base, ONLY : printout_base_close!close print unit 42 USE printout_base, ONLY : printout_base_open !open print unit 43 USE printout_base, ONLY : printout_base_unit !printout_base_unit 44 USE exx_module, ONLY : dexx_dh 45 USE exx_module, ONLY : exx_energy_cell_derivative 46 USE exx_module, ONLY : exxalfa 47 USE fft_helper_subroutines 48 use exx_module, only : psime_pair_recv, psime_pair_send 49 ! cubic domain related "use" variables 50 USE exx_module, ONLY : selfrho, pairrho 51 USE exx_module, ONLY : nrg 52 USE exx_module, ONLY : s_me_r, s_ps_r, p_me_r, p_ps_r 53 USE exx_module, ONLY : n_s_me, n_s_ps, n_p_me, n_p_ps 54 USE exx_module, ONLY : lmax 55 USE exx_module, ONLY : me_cs 56 USE exx_module, ONLY : me_rs 57 USE exx_module, ONLY : me_ri 58 USE exx_module, ONLY : me_rc 59 USE exx_module, ONLY : coemicf !MCA/HK : dirty hack for std CG 60 USE exx_module, ONLY : exx_energy_cell_derivative_cube 61 ! 62 IMPLICIT NONE 63 COMPLEX(DP) c(ngw, nbspx) ! wave functions at time t 64#if defined(__MPI) 65 ! 66 INTEGER :: istatus(MPI_STATUS_SIZE) 67#endif 68 INTEGER ir, ip, i, j,nfi, ierr, nnrtot, nr1s,nr2s,nr3s 69 INTEGER nj_max, iunit 70 REAl(DP) sa1,a(3),ha, hb, hc 71 REAl(DP) hcub, centerx, centery, centerz 72 REAL(DP) middle(3) 73 REAL(DP) d_pair ! pair distance 74 ! 75 REAL(DP), ALLOCATABLE :: vpsil(:,:) 76 REAL(DP), ALLOCATABLE :: rhol(:),rho_in_sp(:),vl(:) 77 ! cubic domain related variables 78 INTEGER l,m 79 INTEGER lid,gid 80 INTEGER s_me_r1,s_me_r2,s_me_r3,s_me_r4,s_me_r5,s_me_r6 81 REAl(DP) inv_omega 82 REAl(DP) dist, dxy, costheta, sintheta 83 COMPLEX(DP) cxy 84 REAl(DP) plm(0:lmax,0:lmax) 85 REAl(DP) ha_proj(3), hb_proj(3), hc_proj(3) 86 REAL(DP) dq1,dq2,dq3 87 REAL(DP) dqs1,dqs2,dqs3 88 REAL(DP) start_timer, stop_timer 89 REAL(DP), ALLOCATABLE :: rhome(:),rhops(:),potme(:) 90 INTEGER :: pos, oldest_step, guess_status 91 REAl(DP), ALLOCATABLE :: psime(:) 92 REAL(DP), ALLOCATABLE :: psi(:,:) 93 ! 94 INTEGER iobtl, gindex_of_iobtl, irank, rk_of_obtl_trcv, rk_of_obtl_tbs 95 INTEGER obtl_tbs, lindex_obtl_tbs, obtl_trcv, lindex_obtl_trcv 96 INTEGER obtl_tbadd 97 REAL(DP) totalenergy, totalenergyg, tot_energy(nbsp) 98 REAL(DP) total_exx_derv(3,3), total_exx_derv_g(3,3) 99 REAL(DP) selfe, paire(neigh/2), & 100 self_dexx_dhab(3,3), pair_dexx_dhab(3,3,neigh/2) 101 ! 102 INTEGER, ALLOCATABLE :: isendreq(:) 103 INTEGER, ALLOCATABLE :: irecvreq(:) 104 INTEGER :: irecv_count 105 INTEGER :: isend_count 106 INTEGER :: itr 107 INTEGER :: jtr 108 INTEGER :: tran(3) 109 INTEGER :: proc 110 INTEGER :: tmp_iobtl 111 INTEGER :: me 112 ! 113 INTEGER :: k,jj,ii,ia,ib,ic,my_var,my_var2,my_var3,i_fac,va,cgstep 114 INTEGER :: ndim,nogrp 115 INTEGER, ALLOCATABLE :: obtl_recv(:,:), obtl_send(:,:), num_recv(:), num_send(:) 116 REAl(DP), ALLOCATABLE :: wannierc(:,:),wannierc_tmp(:,:) 117 REAl(DP), ALLOCATABLE :: psil(:) 118 REAL(DP), ALLOCATABLE :: exx_tmp(:,:),exx_tmp3(:,:) 119 INTEGER, ALLOCATABLE :: sdispls(:), sendcount(:) 120 INTEGER, ALLOCATABLE :: rdispls(:), recvcount(:) 121 INTEGER, ALLOCATABLE :: sdispls1(:), sendcount1(:) 122 INTEGER, ALLOCATABLE :: rdispls1(:), recvcount1(:) 123 ! 124 REAL(DP) :: Jim(3,3) ! jacobian [d/d x] [d/d a] 125 ! |d/d y| = [J] |d/d b| 126 ! [d/d z] [d/d c] 127 INTEGER :: psgsn=3 !MCA: number of steps that arrays are stored. Memory overhead from here? 128 ! 129 !============================================================================================= 130 ! 131 CALL start_clock('exx_gs_setup') 132 CALL exx_gs_setup_common 133 if (texx_cube) then 134 CALL exx_gs_setup_cube 135 else 136 CALL exx_gs_setup_sphere 137 end if 138 ! 139 !======================================================================== 140 ! 141 CALL stop_clock('exx_gs_setup') 142 ! 143 !------------------------------------------------------------------------- 144 ! Get the Wannier center and compute the pair overlap matrix 145 !------------------------------------------------------------------------- 146 ! 147 CALL start_clock('exx_pairs') 148 ! 149 ndim=MAX(nproc_image, nbsp) 150 ALLOCATE (wannierc(3,ndim)); wannierc=0.0_DP 151 ALLOCATE (wannierc_tmp(3,nbsp)); wannierc_tmp=0.0_DP 152 ! 153 ! Adjust Cartesian coordinates of wannier centres according to periodic boundary conditions... 154 ! N.B.: PBC are imposed here in the range [0,1)... 155 ! 156 DO iobtl=1,nbsp 157 ! 158 wannierc_tmp(1,iobtl)=ainv(1,1)*wfc(1,iobtl)+ainv(1,2)*wfc(2,iobtl)+ainv(1,3)*wfc(3,iobtl) ! s = h^-1 r 159 wannierc_tmp(2,iobtl)=ainv(2,1)*wfc(1,iobtl)+ainv(2,2)*wfc(2,iobtl)+ainv(2,3)*wfc(3,iobtl) ! s = h^-1 r 160 wannierc_tmp(3,iobtl)=ainv(3,1)*wfc(1,iobtl)+ainv(3,2)*wfc(2,iobtl)+ainv(3,3)*wfc(3,iobtl) ! s = h^-1 r 161 ! 162 wannierc_tmp(1,iobtl)=wannierc_tmp(1,iobtl)-FLOOR(wannierc_tmp(1,iobtl)) ! impose PBC on s in range: [0,1) 163 wannierc_tmp(2,iobtl)=wannierc_tmp(2,iobtl)-FLOOR(wannierc_tmp(2,iobtl)) ! impose PBC on s in range: [0,1) 164 wannierc_tmp(3,iobtl)=wannierc_tmp(3,iobtl)-FLOOR(wannierc_tmp(3,iobtl)) ! impose PBC on s in range: [0,1) 165 ! 166 wannierc(1,iobtl)=h(1,1)*wannierc_tmp(1,iobtl)+h(1,2)*wannierc_tmp(2,iobtl)+h(1,3)*wannierc_tmp(3,iobtl) ! r = h s 167 wannierc(2,iobtl)=h(2,1)*wannierc_tmp(1,iobtl)+h(2,2)*wannierc_tmp(2,iobtl)+h(2,3)*wannierc_tmp(3,iobtl) ! r = h s 168 wannierc(3,iobtl)=h(3,1)*wannierc_tmp(1,iobtl)+h(3,2)*wannierc_tmp(2,iobtl)+h(3,3)*wannierc_tmp(3,iobtl) ! r = h s 169 ! 170 wannierc_tmp(:,iobtl)=wannierc(:,iobtl) ! keep a temporary copy to compute pair indices 171 ! 172 END DO 173 ! 174 ! make copy of wannier centres when number of processors > number of bands 175 ! 176 IF(nproc_image.GT.nbsp) THEN 177 ! 178 DO iobtl=nbsp+1,nproc_image 179 ! 180 ir=MOD(iobtl,nbsp) 181 IF(ir.EQ.0 )THEN 182 wannierc(:,iobtl)=wannierc(:,nbsp) 183 ELSE 184 wannierc(:,iobtl)=wannierc(:,ir) 185 END IF 186 ! 187 END DO 188 ! 189 END IF 190 ! 191 ! overlap is the unique neighbor list that for each band or processor image (ndim) 192 ! num_recv is the number of unique neighbors for each band or processor image (ndim) 193 !--------------------------------------------------------------------------------------------- 194 ALLOCATE (obtl_recv(neigh/2, ndim)); obtl_recv=0 195 ALLOCATE (obtl_send(neigh, ndim)); obtl_send=0 196 ALLOCATE (num_recv(ndim)); num_recv=0 197 ALLOCATE (num_send(ndim)); num_send=0 198 ! generate the unique neighbor list 199 ! 200 CALL exx_index_pair(wannierc_tmp, obtl_recv, num_recv, nj_max, ndim) 201 ! 202 IF (ALLOCATED(wannierc_tmp)) DEALLOCATE(wannierc_tmp) 203 !--------------------------------------------------------------------------------------------- 204 DO itr = 1, ndim 205 ! 206 DO jtr = 1, neigh/2 207 ! 208 IF (obtl_recv(jtr, itr) .NE. 0) THEN 209 ! 210 num_send( obtl_recv(jtr, itr) ) = num_send( obtl_recv(jtr, itr) ) + 1 211 obtl_send( num_send( obtl_recv(jtr, itr) ), obtl_recv(jtr, itr) ) = itr 212 ! 213 END IF 214 ! 215 END DO 216 ! 217 END DO 218 !--------------------------------------------------------------------------------------------- 219 ! 220 CALL stop_clock('exx_pairs') 221 ! 222 !------------------------------------------------------------------------- 223 ! 224 ! Allocate variables to store potentials for 3 steps .... 225 ! 226 IF (n_exx.EQ.0) THEN 227 ! 228 ! the following variables are used in the extrapolation of exx potentials 229 if (texx_cube) then 230 ALLOCATE( selfv ( n_s_ps, psgsn, my_nbspx), stat=ierr ); selfv=0.0_DP 231 ALLOCATE( pairv ( n_p_ps, psgsn, neigh, my_nbspx), stat=ierr ); pairv=0.0_DP 232 ALLOCATE( selfrho( n_s_ps, psgsn, my_nbspx), stat=ierr ); selfrho=0.0_DP 233 ALLOCATE( pairrho( n_p_ps, psgsn, neigh, my_nbspx), stat=ierr ); pairrho=0.0_DP 234 ALLOCATE( pair_label( neigh, my_nbspx ), stat=ierr ); pair_label=0.0_DP 235 ALLOCATE( pair_step( neigh, my_nbspx ), stat=ierr ); pair_step=0.0_DP 236 ALLOCATE( pair_status( neigh, my_nbspx ), stat=ierr ); pair_status=0.0_DP 237 else 238 ALLOCATE( selfv ( np_in_sp_s, psgsn, my_nbspx), stat=ierr ); selfv=0.0_DP 239 ALLOCATE( pairv ( np_in_sp_p, psgsn, neigh, my_nbspx), stat=ierr ); pairv=0.0_DP 240 ALLOCATE( pair_dist( psgsn, neigh, my_nbspx), stat=ierr ); pair_dist=0.0_DP 241 end if 242 ! 243 END IF 244 ! 245 !------------------------------------------------------------------------- 246 ! 247 ! update exx step ... 248 ! 249 n_exx = n_exx + 1 250 ! 251 !========================================================================= 252 ! 253 ! obtain orbitals on each local processor, stored in psi 254 ! 255 ALLOCATE ( psi( nnrtot, my_nbsp(me ) ) ); psi=0.0_DP 256 ALLOCATE ( vpsil(nnrtot, my_nbsp(me ) ) ); vpsil=0.0_DP 257 ! 258 CALL start_clock('r_orbital') 259 ! 260 ! In c variable, the plane wave wavefunction, is distributed in nr3 parts 261 ! and stored in each parallel mpitasks (or processors). 262 ! The exx_psi maps c in to psi, which is in real space grid. 263 ! 264 ! c -- one processor has a part of information from all bands 265 ! psi -- one processor has complete information of one band (or more) 266 ! 267 CALL exx_psi(c, psi, nnrtot, my_nbsp, my_nxyz, nbsp) 268 ! 269 CALL stop_clock('r_orbital') 270 ! 271 !=============================================================================== 272 ! 273 ! PAIR AND SELF POTENTIALS ARE CALCULATED IN ONE BIG LOOP 274 ! 275 !=============================================================================== 276 ! 277 !======================================================================== 278 ! THE MOST OUTER LOOP STARTS: 279 !======================================================================== 280 ! 281 ! flag identifying whether the pair communication is done or not 282 ! 283 ALLOCATE( irecvreq( neigh * my_nbsp(me) ) ) 284 ALLOCATE( isendreq( neigh * my_nbsp(me) ) ) 285 ! 286 ! obtain psi in sphere (psil) for neighbors 287 ! 288 call start_clock('exx_big_alloc') 289 if (texx_cube) then 290 if (.not.allocated(psime_pair_send)) then 291 ALLOCATE ( psime_pair_send(n_p_me, neigh, my_nbsp(me)) ); psime_pair_send=0.0_DP 292 end if 293 if (.not.allocated(psime_pair_recv)) then 294 ALLOCATE ( psime_pair_recv(n_p_me, neigh, my_nbsp(me)) ); psime_pair_recv=0.0_DP 295 end if 296 else 297 if (.not.allocated(psime_pair_send)) then 298 ALLOCATE ( psime_pair_send(np_in_sp_me_p, neigh, my_nbsp(me)) ); psime_pair_send=0.0_DP 299 end if 300 if (.not.allocated(psime_pair_recv)) then 301 ALLOCATE ( psime_pair_recv(np_in_sp_me_p, neigh, my_nbsp(me)) ); psime_pair_recv=0.0_DP 302 end if 303 end if 304 call stop_clock('exx_big_alloc') 305 ! 306 ! initialize totalenergy and derivatives 307 ! 308 totalenergy = 0.0_DP 309 total_exx_derv(:,:) =0.0_DP 310 ! 311 ! my_var is the maximum of nbsp or nproc_image 312 my_var = MAX(nproc_image, nbsp) 313 ! 314#if defined(__MPI) 315 irecv_count = 0 316 isend_count = 0 317 ! ---------- start of Non-blocking communication (1) ----------- 318 ! In this region of the code, we start with non-blocking send of 319 ! local orbitals to form MLWF product potentials on the non-self 320 ! multipole expansion domain. This part allows all pairs to be 321 ! send simultaneously instead of one-by-one... 322 ! -------------------------------------------------------------- 323 CALL start_clock('send_psi') 324 ! 325 ! we should use my_nbspx (maxval(my_nbsp(me))) here 326 ! 327 DO iobtl = 1, my_nbspx 328 ! 329 gindex_of_iobtl = index_my_nbsp(iobtl, me) 330 if (gindex_of_iobtl.gt.nbsp) cycle ! index_my_nbsp is set to nbsp+1 when not assigned 331 ! 332 !======================================================================== 333 ! 334 !prep receives 335 DO itr = 1, neigh/2 336 ! 337 obtl_tbs = obtl_recv( itr, gindex_of_iobtl ) 338 ! 339 IF ( obtl_tbs .NE. 0 ) THEN 340 ! 341 rk_of_obtl_tbs = rk_of_obtl(obtl_tbs) 342 ! 343 irecv_count = irecv_count + 1 344 if (texx_cube) then 345 CALL MPI_IRECV( psime_pair_recv(1, itr, iobtl), n_p_me, MPI_DOUBLE_PRECISION, rk_of_obtl_tbs, & 346 obtl_tbs*ndim+gindex_of_iobtl, intra_image_comm, irecvreq(irecv_count), ierr) 347 else 348 CALL MPI_IRECV( psime_pair_recv(1, itr, iobtl), np_in_sp_me_p, MPI_DOUBLE_PRECISION, rk_of_obtl_tbs, & 349 obtl_tbs*ndim+gindex_of_iobtl, intra_image_comm, irecvreq(irecv_count), ierr) 350 end if 351 ! 352 END IF 353 ! 354 END DO 355 ! 356 !prep sends 357 DO itr = 1, neigh 358 ! 359 obtl_trcv = obtl_send( itr, gindex_of_iobtl ) 360 ! 361 IF ( obtl_trcv .NE. 0 ) THEN 362 ! 363 rk_of_obtl_trcv = rk_of_obtl(obtl_trcv) 364 ! 365 ! calculate mid point of two wannier centers 366 CALL getmiddlewc( wannierc(1, gindex_of_iobtl), wannierc(1, obtl_trcv), h, ainv, middle ) 367 ! 368 ! calculate translation vector from the center of the box 369 CALL getsftv( nr1s, nr2s, nr3s, h, ainv, middle, tran) 370 ! 371 ! get the localized psi around the mid point of two wannier centers 372 ! note: the psime is centered at the center of the box 373 ! (using the translation vector "tran" from middle of wfc to the center of box) 374 if (texx_cube) then 375 CALL getpsicb( nrg, p_me_r, psi(1,iobtl), psime_pair_send(1, itr, iobtl), tran) 376 else 377 CALL getpsil( nnrtot, np_in_sp_me_p, psi(1, iobtl), psime_pair_send(1, itr, iobtl), tran) 378 end if 379 ! 380 isend_count = isend_count + 1 381 if (texx_cube) then 382 CALL MPI_ISEND( psime_pair_send(1, itr, iobtl), n_p_me, MPI_DOUBLE_PRECISION, rk_of_obtl_trcv, & 383 gindex_of_iobtl*ndim+obtl_trcv, intra_image_comm, isendreq(isend_count), ierr) 384 else 385 CALL MPI_ISEND( psime_pair_send(1, itr, iobtl), np_in_sp_me_p, MPI_DOUBLE_PRECISION, rk_of_obtl_trcv, & 386 gindex_of_iobtl*ndim+obtl_trcv, intra_image_comm, isendreq(isend_count), ierr) 387 end if 388 ! 389 END IF 390 ! 391 END DO 392 !======================================================================== 393 ! 394 END DO 395 ! 396 !========================================================================== 397 ! 398 CALL stop_clock('send_psi') 399 ! 400 !========================================================================== 401 ! 402 CALL start_clock('send_psi_wait') 403 ! 404 DO itr = 1, isend_count 405 CALL MPI_WAIT(isendreq(itr), istatus, ierr) 406 END DO 407 ! 408 DO itr = 1, irecv_count 409 CALL MPI_WAIT(irecvreq(itr), istatus, ierr) 410 END DO 411 ! 412 CALL stop_clock('send_psi_wait') 413 ! ------- end of Non-blocking communication (1): synchronize by wait ------ 414#endif 415 ! 416 !========================================================================= 417 ! after this loop ( do irank ), all the processor got all the overlapping orbitals 418 ! for the i_obtl orbital and ready to calculate pair potential 419 !========================================================================= 420 ! 421 !========================================================================= 422 ! CALCULATION STARTS HERE 423 !========================================================================= 424 ! printout header for the cgsteps 425 ! 426 IF ((MOD(nfi,iprint_stdout).EQ.0)) THEN 427 ! 428 IF (ionode) THEN 429 ! 430 iunit=printout_base_unit("ncg") 431 ! 432 CALL printout_base_open("ncg") 433 ! 434 WRITE(iunit,'(I8,F16.8)')nfi,tps 435 ! 436 END IF 437 ! 438 END IF 439 ! 440 CALL start_clock('getpairv') 441 ! 442 if (texx_cube) then 443 ! Do some allocations 444 IF(.not.ALLOCATED(psime )) ALLOCATE( psime(max(n_p_me,n_s_me)) ); 445 IF(.not.ALLOCATED(rhome )) ALLOCATE( rhome(max(n_p_me,n_s_me)) ); 446 IF(.not.ALLOCATED(rhops )) ALLOCATE( rhops(max(n_p_ps,n_s_ps)) ); 447 IF(.not.ALLOCATED(potme )) ALLOCATE( potme(max(n_p_me,n_s_me)) ); 448 end if 449 DO iobtl = 1, my_nbspx 450 ! 451 middle(:)=0.0_DP 452 ! 453 gindex_of_iobtl = index_my_nbsp(iobtl, me) 454 if (gindex_of_iobtl.gt.nbsp) cycle ! index_my_nbsp is set to nbsp+1 when not assigned 455 ! 456 IF ( gindex_of_iobtl .LE. my_var) THEN 457 ! 458 !-- second loop starts: calculate overlapping potential with the j_th orbitals -- 459 ! 460 ! my_var3 is the unique neighbor number for gindex_of_iobtl 461 my_var3=num_recv( gindex_of_iobtl ) 462 ! 463 do j = 1, my_var3 464 ! 465 ! my_var2 is the global index of the unique pair to gindex_of_iobtl 466 my_var2=obtl_recv(j,gindex_of_iobtl) 467 ! 468 IF (my_var2 .NE. 0) THEN 469 !================================================================================== 470 ! find the position of previous v/rho 471 !================================================================================== 472 ! 473 if (texx_cube) then 474 call solve_a_nonself_pair_cube 475 else 476 call solve_a_nonself_pair_sphere 477 end if 478 ! 479 paire(j) = paire(j) * 0.5_DP* hcub ! volume element hcub and trapezoidal rule prefactor 0.5_DP are included 480 totalenergy = totalenergy + 2.0_DP*paire(j) ! the factor of two comes from the identity of ij and ji pair 481 ! 482 IF (.NOT. (isotropic .AND. (ibrav.EQ.1) )) THEN 483 CALL start_clock('exx_cell_derv') 484 ! 485 ! EXX cell derivative (note: exxalfa is included in vofrho.f90 when calculate stress) 486 ! 487 if (texx_cube) then 488 CALL exx_energy_cell_derivative_cube(p_me_r, p_ps_r, tran, rhome, potme, & 489 ha_proj, hb_proj, hc_proj, Jim, pair_dexx_dhab(:,:,j)) 490 else 491 CALL exx_energy_cell_derivative(np_in_sp_me_p, np_in_sp_p, tran,& 492 vl, ha, hb, hc, rhol, pair_dexx_dhab(:,:,j)) 493 end if 494 ! 495 ! volume element hcub and trapezoidal rule prefactor 0.5_DP are included 496 ! 497 pair_dexx_dhab(:,:,j) = pair_dexx_dhab(:,:,j)*0.5_DP*hcub 498 ! 499 ! accumulate the derivative from different pair terms 500 ! 501 total_exx_derv(:,:) = total_exx_derv(:,:) + 2.0_DP*pair_dexx_dhab(:,:,j) 502 ! 503 ! 504 ! if isotropic => calculate the stress tensor in vofrho.f90 505 ! 506 CALL stop_clock('exx_cell_derv') 507 END IF 508 ! 509 if (.not.texx_cube) then 510 IF (ALLOCATED(psil)) DEALLOCATE(psil) 511 IF (ALLOCATED(rhol)) DEALLOCATE(rhol) 512 IF (ALLOCATED(rho_in_sp)) DEALLOCATE(rho_in_sp) 513 IF (ALLOCATED(vl)) DEALLOCATE(vl) 514 end if 515 ! 516 END IF 517 ! 518 END DO !for j 519 ! 520 END IF !gindex_of_iobtl <= nbsp 521 ! 522 END DO 523 CALL stop_clock('getpairv') 524 ! 525 !=============================================================================== 526 ! After this loop, each processor finished the pair potentials for the 527 ! iobtl orbital, and shall talk to send/recv vpsiforj 528 !=============================================================================== 529 ! 530 !=============================================================================== 531 ! INITIALIZE SEND VPSI BEFORE CALCULATE PAIR 532 !=============================================================================== 533 ! 534#if defined(__MPI) 535 irecv_count = 0 536 isend_count = 0 537 ! 538 ! ---------- start of Non-blocking communication (2) ----------- 539 ! In this region of the code, we use non-blocking MPI feature to 540 ! send the EXX contributions to orbital force and asynchronously 541 ! overlap with the self-exchange computation... 542 ! -------------------------------------------------------------- 543 CALL start_clock('send_v') 544 ! 545 !======================================================================== 546 ! 547 DO iobtl = 1, my_nbspx 548 ! 549 !======================================================================== 550 ! 551 gindex_of_iobtl = index_my_nbsp(iobtl, me) 552 if (gindex_of_iobtl.gt.nbsp) cycle ! index_my_nbsp is set to nbsp+1 when not assigned 553 ! 554 DO itr = 1, neigh/2 555 ! 556 obtl_trcv = obtl_recv( itr, gindex_of_iobtl ) 557 ! 558 IF ( obtl_trcv .NE. 0 ) THEN 559 ! 560 rk_of_obtl_trcv = rk_of_obtl(obtl_trcv) 561 ! 562 isend_count = isend_count + 1 563 if (texx_cube) then 564 CALL MPI_ISEND( psime_pair_recv(1,itr,iobtl), n_p_me, MPI_DOUBLE_PRECISION, rk_of_obtl_trcv, & 565 gindex_of_iobtl*ndim+obtl_trcv, intra_image_comm, isendreq(isend_count), ierr) 566 else 567 CALL MPI_ISEND( psime_pair_recv(1,itr,iobtl), np_in_sp_me_p, MPI_DOUBLE_PRECISION, rk_of_obtl_trcv, & 568 gindex_of_iobtl*ndim+obtl_trcv, intra_image_comm, isendreq(isend_count), ierr) 569 end if 570 ! 571 ! WRITE(my_unit,*) "itr = ", itr 572 ! WRITE(my_unit,*) "iobtl = ", iobtl 573 ! WRITE(my_unit,*) "gindex_of_iobtl = ",gindex_of_iobtl 574 ! WRITE(my_unit,*) "obtl_trcv: ", obtl_trcv 575 ! WRITE(my_unit,*) "rk_of_obtl_trcv: ", rk_of_obtl_trcv 576 ! WRITE(my_unit,*) "tag: ", gindex_of_iobtl*nbsp+obtl_trcv 577 ! 578 END IF 579 ! 580 END DO 581 ! 582 !======================================================================== 583 ! 584 !MCA: send and receive now change their roles. Before, we were communicating 585 !the orbitals. Now, it is the potential that we got out of Poisson. Send is 586 !recieving and recv is sending. For now on, only psime_pair_send is used for 587 !computation. 588 ! 589 DO itr = 1, neigh 590 ! 591 obtl_tbs = obtl_send( itr, gindex_of_iobtl ) 592 ! 593 IF ( obtl_tbs .NE. 0 ) THEN 594 ! 595 rk_of_obtl_tbs = rk_of_obtl(obtl_tbs) 596 ! 597 irecv_count = irecv_count + 1 598 if (texx_cube) then 599 CALL MPI_IRECV( psime_pair_send(1, itr, iobtl), n_p_me, MPI_DOUBLE_PRECISION, rk_of_obtl_tbs, & 600 obtl_tbs*ndim+gindex_of_iobtl, intra_image_comm, irecvreq(irecv_count), ierr) 601 else 602 CALL MPI_IRECV( psime_pair_send(1, itr, iobtl), np_in_sp_me_p, MPI_DOUBLE_PRECISION, rk_of_obtl_tbs, & 603 obtl_tbs*ndim+gindex_of_iobtl, intra_image_comm, irecvreq(irecv_count), ierr) 604 end if 605 ! 606 ! WRITE(my_unit,*) "itr = ", itr 607 ! WRITE(my_unit,*) "iobtl = ", iobtl 608 ! WRITE(my_unit,*) "gindex_of_iobtl = ", gindex_of_iobtl 609 ! WRITE(my_unit,*) "obtl_tbs: ", obtl_tbs 610 ! WRITE(my_unit,*) "rk_of_obtl_tbs: ", rk_of_obtl_tbs 611 ! WRITE(my_unit,*) "tag: ", obtl_tbs*nbsp+gindex_of_iobtl 612 ! 613 ! 614 END IF 615 ! 616 END DO 617 ! 618 !======================================================================== 619 ! 620 END DO 621 ! 622 !======================================================================== 623 ! 624 CALL stop_clock('send_v') 625#endif 626 ! 627 !========================================================================= 628 ! 629 ! SELF POTENTIAL FOR EACH ORBITAL STARTS HERE 630 ! 631 !========================================================================= 632 ! 633 CALL start_clock('getselfv') 634 ! 635 !========================================================================= 636 DO iobtl = 1, my_nbspx 637 ! 638 IF (iobtl.LE.my_nbsp(me)) THEN ! skip when the loop of my_nbspx goes outside of scope 639 ! 640 IF (me.GT.(nbsp*(sc_fac-1))) THEN ! compatible with more processors than nbsp 641 ! 642 gindex_of_iobtl = index_my_nbsp(iobtl, me) 643 if (gindex_of_iobtl.gt.nbsp) cycle ! index_my_nbsp is set to nbsp+1 when not assigned 644 ! 645 if (texx_cube) then 646 call solve_a_self_pair_cube 647 else 648 call solve_a_self_pair_sphere 649 end if 650 selfe = selfe * 0.5_DP * hcub ! volume element hcub and trapezoidal rule prefactor 0.5_DP are included 651 totalenergy = totalenergy + selfe 652 ! 653 !IF(me .GT. nbsp) THEN 654 ! vpsil(:,iobtl) = 0.0_DP 655 !END IF 656 ! 657 IF (.NOT. (isotropic .AND. (ibrav.EQ.1))) THEN 658 ! 659 ! EXX cell derivative (note: need to include exxalfa later) 660 CALL start_clock('exx_cell_derv') 661 ! 662 if (texx_cube) then 663 CALL exx_energy_cell_derivative_cube(s_me_r, s_ps_r, tran, rhome, potme, & 664 ha_proj, hb_proj, hc_proj, Jim, self_dexx_dhab(:,:)) 665 else 666 CALL exx_energy_cell_derivative(np_in_sp_me_s, np_in_sp_s, tran,& 667 vl, ha, hb, hc, rhol, self_dexx_dhab(:,:)) 668 end if 669 ! 670 ! volume element hcub and trapezoidal rule prefactor 0.5_DP are included 671 ! 672 self_dexx_dhab(:,:) = self_dexx_dhab(:,:)*0.5_DP*hcub 673 ! 674 ! combine derivative with pair terms 675 ! 676 total_exx_derv(:,:) = total_exx_derv(:,:) + self_dexx_dhab(:,:) 677 ! 678 CALL stop_clock('exx_cell_derv') 679 ! 680 ! if isotropic => calculate the stress tensor in vofrho.f90 681 ! 682 END IF 683 ! 684 if (.not.texx_cube) then 685 IF (ALLOCATED(psil)) DEALLOCATE(psil) 686 IF (ALLOCATED(rhol)) DEALLOCATE(rhol) 687 IF (ALLOCATED(rho_in_sp)) DEALLOCATE(rho_in_sp) 688 IF (ALLOCATED(vl)) DEALLOCATE(vl) 689 end if 690 ! 691 END IF ! me 692 ! 693 END IF !iobtl 694 ! 695 END DO ! iobtl 696 if (texx_cube) then 697 IF (ALLOCATED(psime)) DEALLOCATE(psime) 698 IF (ALLOCATED(rhome)) DEALLOCATE(rhome) 699 IF (ALLOCATED(rhops)) DEALLOCATE(rhops) 700 IF (ALLOCATED(potme)) DEALLOCATE(potme) 701 end if 702 CALL stop_clock('getselfv') 703 !======================================================================== 704 ! 705#if defined(__MPI) 706 CALL start_clock('send_v_wait') 707 ! 708 DO itr = 1, isend_count 709 CALL MPI_WAIT(isendreq(itr), istatus, ierr) 710 END DO 711 ! 712 DO itr = 1, irecv_count 713 CALL MPI_WAIT(irecvreq(itr), istatus, ierr) 714 END DO 715 ! 716 CALL stop_clock('send_v_wait') 717 ! ------- end of Non-blocking communication (2): synchronize by wait ------ 718 ! 719 !======================================================================== 720 CALL start_clock('force_rec') 721 ! 722 DO iobtl = 1, my_nbspx 723 ! 724 gindex_of_iobtl = index_my_nbsp(iobtl, me) 725 if (gindex_of_iobtl.gt.nbsp) cycle ! index_my_nbsp is set to nbsp+1 when not assigned 726 ! 727 DO itr = 1, neigh 728 ! 729 obtl_tbadd = obtl_send(itr, gindex_of_iobtl) 730 ! 731 IF ( obtl_tbadd .NE. 0 ) THEN 732 ! 733 CALL getmiddlewc( wannierc(1,gindex_of_iobtl), wannierc(1,obtl_tbadd), h, ainv, middle ) 734 ! 735 ! calculate translation vector from the center of the box 736 CALL getsftv( nr1s, nr2s, nr3s, h, ainv, middle, tran ) 737 ! 738 ! upadate vpsil PBE0 739 ! 740 if (texx_cube) then 741 CALL updateforce_rec(nrg, p_me_r, vpsil(1,iobtl), psime_pair_send(1,itr,iobtl), tran) 742 else 743 !$omp parallel do private(ir) 744 DO ip = 1, np_in_sp_me_p 745 CALL l2goff (ip,ir,tran) ! local is centered at box center; global index is offset by tran 746 vpsil(ir,iobtl) = vpsil(ir,iobtl) + psime_pair_send(ip,itr,iobtl) 747 END DO 748 !$omp end parallel do 749 end if 750 ! 751 END IF 752 ! 753 END DO 754 ! 755 END DO ! iobtl 756 ! 757 CALL stop_clock('force_rec') 758 !======================================================================== 759#endif 760 ! 761 CALL start_clock('totalenergy') 762 ! 763 totalenergyg=0.0_DP ! mpi reduction variable initialization 764 exx=0.0_DP ! exx energy (used to handle the open/closed shell energy) 765 ! 766#if defined(__MPI) 767 ! collect the totalenergy of each mpi task to totalenergyg 768 CALL MPI_ALLREDUCE(totalenergy, totalenergyg, 1, MPI_DOUBLE_PRECISION, & 769 & MPI_SUM, intra_image_comm, ierr) 770 ! 771#else 772 totalenergyg = totalenergy 773#endif 774 exx = totalenergyg 775 IF (nspin .EQ. 1) exx = exx + totalenergyg ! if closed shell double the totalenergy 776 ! 777 !WRITE(stdout, '("EXX Energy",2F30.14," step",I7)')exx,totalenergyg*2.0_DP, nfi 778 ! 779 CALL stop_clock('totalenergy') 780 ! 781 CALL start_clock('exx_cell_derv') 782 ! 783 total_exx_derv_g(:,:) = 0.0_DP ! mpi reduction variable initialization 784 ! 785 IF (.NOT. (isotropic .AND. (ibrav.EQ.1))) THEN 786#if defined(__MPI) 787 ! collect the total_exx_derv of each mpi task to total_exx_derv_g 788 CALL MPI_ALLREDUCE(total_exx_derv(:,:), total_exx_derv_g(:,:), 9, & 789 MPI_DOUBLE_PRECISION, MPI_SUM, intra_image_comm, ierr) 790 ! 791#else 792 total_exx_derv_g(:,:) = total_exx_derv(:,:) 793#endif 794 END IF 795 ! 796 ! for closed shell case inclued spin factor of 2 797 dexx_dh(:,:) = total_exx_derv_g(:,:) 798 IF (nspin .EQ. 1) dexx_dh(:,:) = dexx_dh(:,:) + total_exx_derv_g(:,:) 799 ! 800 CALL stop_clock('exx_cell_derv') 801 ! 802 ! Local to global distribution of EXX potential 803 ! vpsil (local) --> exx_potential (global) 804 ! 805 CALL start_clock('vl2vg') 806 exx_potential=0.0_DP 807 ! 808 IF (nproc_image .LE. nbsp) THEN 809 ! 810#ifdef __MPI 811 CALL redistwfr ( exx_potential, vpsil, my_nxyz, my_nbsp, intra_image_comm, -1 ) 812#else 813 exx_potential = vpsil 814#endif 815 ! 816 ELSE 817 ! 818 !-----------Zhaofeng's vpsil (local) to exx_potential (global) ----------- 819 ! 820 nogrp = fftx_ntgrp(dffts) 821 ! 822 ALLOCATE( sdispls(nproc_image), sendcount(nproc_image) ); sdispls=0; sendcount=0 823 ALLOCATE( rdispls(nproc_image), recvcount(nproc_image) ); rdispls=0; recvcount=0 824 ALLOCATE( sdispls1(nogrp), sendcount1(nogrp) ); sdispls1=0; sendcount1=0 825 ALLOCATE( rdispls1(nogrp), recvcount1(nogrp) ); rdispls1=0; recvcount1=0 826 ! 827 DO proc = 1, nproc_image 828 ! 829 IF (me <= nogrp*nr3s) THEN 830 sendcount(proc) =nr1s*nr2s/nogrp 831 ELSE 832 sendcount(proc) = 0 833 END IF 834 !proc 1 holds the nr1s*nr2s/nogrp information of 1024 orbital(duplicate) 835 !proc 640 as well 836 !however, 641 to 1024 idle 837 IF (proc <= nogrp*nr3s) THEN 838 recvcount(proc)=nr1s*nr2s/nogrp 839 ELSE 840 recvcount(proc)=0 841 END IF 842 ! 843 END DO 844 ! 845 sdispls(1) = 0 846 rdispls(1) = 0 847 ! 848 DO proc = 2, nproc_image 849 sdispls(proc)= sdispls(proc-1) + sendcount(proc-1) 850 rdispls(proc) = rdispls(proc-1) + recvcount(proc-1) 851 END DO 852 ! 853 ALLOCATE(exx_tmp (dffts%nnr,nproc_image/nogrp)); exx_tmp=0.0_DP 854 ALLOCATE(exx_tmp3(dffts%nnr,nproc_image/nogrp)); exx_tmp3=0.0_DP 855 ! 856#if defined(__MPI) 857 ! 858 CALL mp_barrier( intra_image_comm ) 859 CALL MPI_ALLTOALLV(vpsil(1,1), recvcount,rdispls,MPI_DOUBLE_PRECISION, & 860 & exx_tmp, sendcount,sdispls, MPI_DOUBLE_PRECISION, & 861 & intra_image_comm, ierr) 862#endif 863 ! 864 va = dffts%nnr/nogrp 865 DO j=1,nproc_image/nogrp,2 866 DO i=1,2*nogrp 867 ii=((i-1)/2)*va 868 jj=j+mod(i-1,2) 869 ia=(i-1-((i-1)/nogrp)*nogrp)*va 870 ib=j+(i-1)/nogrp 871 !$omp parallel do 872 DO ir=1,va 873 exx_tmp3(ii+ir,jj)=exx_tmp(ia+ir,ib) 874 END DO 875 !$omp end parallel do 876 END DO 877 END DO 878 ! 879 DO proc = 1 , nogrp 880 sendcount1(proc) = dffts%nnr/nogrp 881 recvcount1(proc) = dffts%nnr/nogrp 882 END DO 883 ! 884 rdispls1(1) = 0 885 sdispls1(1) = 0 886 ! 887 DO proc = 2, nogrp 888 sdispls1(proc) = sdispls1(proc-1) + sendcount1(proc-1) 889 rdispls1(proc) = rdispls1(proc-1) + recvcount1(proc-1) 890 END DO 891 ! 892#if defined(__MPI) 893 ! 894 DO ir=1,nproc_image/nogrp 895 CALL mp_barrier( fftx_tgcomm(dffts) ) 896 CALL MPI_ALLTOALLV(exx_tmp3(1,ir), sendcount1, sdispls1, MPI_DOUBLE_PRECISION, & 897 & exx_potential(1,ir),recvcount1, rdispls1, MPI_DOUBLE_PRECISION, & 898 & fftx_tgcomm(dffts), ierr) 899 END DO 900#endif 901 ! 902 DO ir=1,nbsp/nogrp 903 DO i=1,sc_fac-1 904 ii=i*nbsp/nogrp 905 !$omp parallel do 906 DO ia=1,dffts%nnr 907 exx_potential(ia,ir)=exx_potential(ia,ir)+exx_potential(ia,ir+ii) 908 END DO 909 !$omp end parallel do 910 END DO 911 END DO 912 ! 913 !-----------Zhaofeng's vpsil (local) to exx_potential (global) ----------- 914 ! 915 END IF ! vl2vg 916 ! 917 CALL stop_clock('vl2vg') 918 ! 919 !============================================================================== 920 IF (ALLOCATED(vpsil)) DEALLOCATE(vpsil) 921 IF (ALLOCATED(psi)) DEALLOCATE(psi) 922 IF (ALLOCATED(isendreq)) DEALLOCATE(isendreq) 923 IF (ALLOCATED(irecvreq)) DEALLOCATE(irecvreq) 924 IF (ALLOCATED(wannierc)) DEALLOCATE(wannierc) 925 IF (ALLOCATED(obtl_recv)) DEALLOCATE(obtl_recv) 926 IF (ALLOCATED(obtl_send)) DEALLOCATE(obtl_send) 927 IF (ALLOCATED(num_recv)) DEALLOCATE(num_recv) 928 IF (ALLOCATED(num_send)) DEALLOCATE(num_send) 929 IF (ALLOCATED(exx_tmp)) DEALLOCATE(exx_tmp) 930 IF (ALLOCATED(exx_tmp3)) DEALLOCATE(exx_tmp3) 931 IF (ALLOCATED(sdispls)) DEALLOCATE(sdispls) 932 IF (ALLOCATED(rdispls)) DEALLOCATE(rdispls) 933 IF (ALLOCATED(sdispls1)) DEALLOCATE(sdispls1) 934 IF (ALLOCATED(rdispls1)) DEALLOCATE(rdispls1) 935 IF (ALLOCATED(sendcount)) DEALLOCATE(sendcount) 936 IF (ALLOCATED(recvcount)) DEALLOCATE(recvcount) 937 IF (ALLOCATED(sendcount1)) DEALLOCATE(sendcount1) 938 IF (ALLOCATED(recvcount1)) DEALLOCATE(recvcount1) 939 ! 940 RETURN 941 contains 942 943 SUBROUTINE exx_gs_setup_common() 944 IMPLICIT NONE 945 ! 946 ! make processor index start from 1 947 ! 948 me = me_image+1 949 ! 950 ! number of real space gird along each lattice parameter directions (a1, a2, a3) 951 ! 952 nr1s=dfftp%nr1; nr2s=dfftp%nr2; nr3s=dfftp%nr3 953 ! 954 ! the length of each lattice parameters 955 ! 956 a(1)=DSQRT(h(1,1)*h(1,1)+h(2,1)*h(2,1)+h(3,1)*h(3,1)) ! lattice 1 957 a(2)=DSQRT(h(1,2)*h(1,2)+h(2,2)*h(2,2)+h(3,2)*h(3,2)) ! lattice 2 958 a(3)=DSQRT(h(1,3)*h(1,3)+h(2,3)*h(2,3)+h(3,3)*h(3,3)) ! lattice 3 959 ! 960 ! grid spacing in each lattice parameters 961 ! 962 ha = a(1) / DBLE(nr1s) !grid spacing in Lattice 1 direction 963 hb = a(2) / DBLE(nr2s) !grid spacing in Lattice 2 direction 964 hc = a(3) / DBLE(nr3s) !grid spacing in Lattice 3 direction 965 ! 966 ! total number of real space grid points in the global mesh 967 ! and the corresponding volume elements for each grid point 968 ! 969 nnrtot = nr1s * nr2s * nr3s 970 hcub = omega / DBLE(nnrtot) !nnrtot in parallel 971 ! 972 ! the x,y,z coordinates of the center of the box (gird center) 973 ! NOTE: center of the box is set to grid point at int(nr1/2), int(nr2/2), int(nr3/2) for every cell 974 ! 975 centerx = h(1,1)*DBLE(INT(nr1s/2))+h(1,2)*DBLE(INT(nr2s/2))+h(1,3)*DBLE(INT(nr3s/2)) ! r = h s 976 centery = h(2,1)*DBLE(INT(nr1s/2))+h(2,2)*DBLE(INT(nr2s/2))+h(2,3)*DBLE(INT(nr3s/2)) ! r = h s 977 centerz = h(3,1)*DBLE(INT(nr1s/2))+h(3,2)*DBLE(INT(nr2s/2))+h(3,3)*DBLE(INT(nr3s/2)) ! r = h s 978 ! 979 ! inverse volume 980 ! 981 sa1 = 1.0_DP/omega 982 ! 983 ! Compute coeke and renormalize 984 ! This part needs to be done once in constant volume simulation and 985 ! needs to be done every step in variable cell simulationulations ... 986 ! 987 ! get ha*d/da, ha^2*d^2/da^2 stencil and cross coefficients 988 ! 1. for the finite difference coefficients, we follow B. Fornberg in 989 ! Math. Comp. 51 (1988), 699-706 990 ! 991 CALL fornberg(nord1, nord2,coe_1st_derv(:,1),coeke(:,1,1),coeke(:,1,2),ierr) 992 ! 993 IF (ierr .ne. 0) THEN 994 WRITE(stdout,*) ' ERROR: Wrong parameter in CALL of Fornberg' 995 WRITE(stdout,*) ' STOP in exx_gs' 996 RETURN 997 END IF 998 !RENORMALIZE COEKES WITH RESPECT TO THE GRID SPACING 999 ! first derivative coefficients 1000 ! 1001 coe_1st_derv(:,3) = coe_1st_derv(:,1)/hc ! d/dc stencil 1002 coe_1st_derv(:,2) = coe_1st_derv(:,1)/hb ! d/db stencil 1003 coe_1st_derv(:,1) = coe_1st_derv(:,1)/ha ! d/da stencil 1004 ! 1005 ! NOTE: in the second derivatives there is a additional factor of 1006 ! -4*pi because we merege that from the Poisson equation 1007 ! 1008 ! \nabla^2 V = -4*\pi \rho 1009 ! 1010 ! axial derivatives 1011 ! 1012 coeke(:,3,3) = -coeke(:,1,1)/(hc*hc*fpi) ! -d^2/dc^2/4pi stencil 1013 coeke(:,2,2) = -coeke(:,1,1)/(hb*hb*fpi) ! -d^2/db^2/4pi stencil 1014 coeke(:,1,1) = -coeke(:,1,1)/(ha*ha*fpi) ! -d^2/da^2/4pi stencil 1015 ! 1016 ! cross derivatives 1017 ! 1018 coeke(:,2,3) = -coeke(:,1,2)/(hb*hc*fpi) ! -d^2/dbdc/4pi stencil 1019 coeke(:,1,3) = -coeke(:,1,2)/(ha*hc*fpi) ! -d^2/dadc/4pi stencil 1020 coeke(:,1,2) = -coeke(:,1,2)/(ha*hb*fpi) ! -d^2/dadb/4pi stencil 1021 ! 1022 ! -- Jacobian for the general (non-orthogonal) -- 1023 ! please see the following reference for details <todo: EXX paper> 1024 ! 1025 ! J = transpose(ainv).(diag(a)) 1026 ! 1027 Jim(:,1) = ainv(1,:)*a(1) 1028 Jim(:,2) = ainv(2,:)*a(2) ! i={xyz}, m={abc} 1029 Jim(:,3) = ainv(3,:)*a(3) 1030 ! 1031 ! -- weigh coeke with the Jacobian -- 1032 ! 1033 ! axial derivatives 1034 ! 1035 coeke(:,3,3) = (Jim(1,3)**2+Jim(2,3)**2+Jim(3,3)**2)*coeke(:,3,3) 1036 coeke(:,2,2) = (Jim(1,2)**2+Jim(2,2)**2+Jim(3,2)**2)*coeke(:,2,2) 1037 coeke(:,1,1) = (Jim(1,1)**2+Jim(2,1)**2+Jim(3,1)**2)*coeke(:,1,1) 1038 ! 1039 ! cross derivatives (needed for non-othogonal grids in the second derivatives) 1040 ! 1041 coeke(:,2,3) = 2.0_DP*(Jim(1,2)*Jim(1,3)+Jim(2,2)*Jim(2,3)+Jim(3,2)*Jim(3,3))*coeke(:,2,3) 1042 coeke(:,1,3) = 2.0_DP*(Jim(1,1)*Jim(1,3)+Jim(2,1)*Jim(2,3)+Jim(3,1)*Jim(3,3))*coeke(:,1,3) 1043 coeke(:,1,2) = 2.0_DP*(Jim(1,1)*Jim(1,2)+Jim(2,1)*Jim(2,2)+Jim(3,1)*Jim(3,2))*coeke(:,1,2) 1044 coeke(:,3,2) = coeke(:,2,3) ! symmetry of coeke 1045 coeke(:,2,1) = coeke(:,1,2) ! symmetry of coeke 1046 coeke(:,3,1) = coeke(:,1,3) ! symmetry of coeke 1047 ! 1048 ! a samall check on the shape of user defined cell (if any) 1049 ! 1050 IF ((ibrav.EQ.0).AND.(nfi.EQ.1)) THEN 1051 WRITE(stdout,*) 'EXX info: If you are using an orthogonal cell without its cell vectors& 1052 & aligned to the xyz directions, the EXX calculation may be twice more expensive.' 1053 END IF 1054 RETURN 1055 END SUBROUTINE exx_gs_setup_common 1056 1057 subroutine exx_gs_setup_cube() 1058 implicit none 1059 psgsn=1 1060 ! consider a grid an unit cell, the lattice vectors for the grid 1061 ha_proj(:) = ha*h(:,1)/a(1) 1062 hb_proj(:) = hb*h(:,2)/a(2) 1063 hc_proj(:) = hc*h(:,3)/a(3) 1064 inv_omega = 1.0_DP/omega 1065 s_me_r1=s_me_r(1) 1066 s_me_r2=s_me_r(2) 1067 s_me_r3=s_me_r(3) 1068 s_me_r4=s_me_r(4) 1069 s_me_r5=s_me_r(5) 1070 s_me_r6=s_me_r(6) 1071 DO k = s_me_r(3),s_me_r(6) 1072 DO j = s_me_r(2),s_me_r(5) 1073 DO i = s_me_r(1),s_me_r(4) 1074 !--------------------------------------------------------------------------------------- 1075 dqs1 = (DBLE(i)/DBLE(nr1s)) - DBLE(INT(nr1s/2))/DBLE(nr1s) 1076 dqs2 = (DBLE(j)/DBLE(nr2s)) - DBLE(INT(nr2s/2))/DBLE(nr2s) 1077 dqs3 = (DBLE(k)/DBLE(nr3s)) - DBLE(INT(nr3s/2))/DBLE(nr3s) 1078 ! 1079 ! Here we are computing distances between Grid points and center of the simulation cell, so no MIC is needed ... 1080 ! Compute distance between grid point and the center of the simulation cell in R space 1081 ! 1082 dq1=h(1,1)*dqs1+h(1,2)*dqs2+h(1,3)*dqs3 !r_i = h s_i 1083 dq2=h(2,1)*dqs1+h(2,2)*dqs2+h(2,3)*dqs3 !r_i = h s_i 1084 dq3=h(3,1)*dqs1+h(3,2)*dqs2+h(3,3)*dqs3 !r_i = h s_i 1085 ! 1086 dist = DSQRT(dq1*dq1+dq2*dq2+dq3*dq3) 1087 !------------------------------------------------- 1088 me_cs(1,i,j,k)=dq1 1089 me_cs(2,i,j,k)=dq2 1090 me_cs(3,i,j,k)=dq3 1091 !------------------------------------------------- 1092 me_rs(0,i,j,k)=1.0_DP 1093 me_rs(1,i,j,k)=dist 1094 !------------------------------------------------- 1095 me_ri(0,i,j,k)=1.0_DP 1096 ! 1097 IF(dist.GE.1.0E-10) THEN 1098 me_ri(1,i,j,k)=1.0_DP/dist 1099 ELSE 1100 me_ri(1,i,j,k)=0.0_DP ! JJ: this seems wrong, but is correct 1101 END IF 1102 !------------------------------------------------- 1103 DO l=2,lmax 1104 me_rs(l,i,j,k)=me_rs(l-1,i,j,k)*me_rs(1,i,j,k) 1105 END DO 1106 !------------------------------------------------- 1107 DO l=2,lmax+1 1108 me_ri(l,i,j,k)=me_ri(l-1,i,j,k)*me_ri(1,i,j,k) 1109 END DO 1110 !------------------------------------------------- 1111 IF( (i.GE.s_me_r1).AND.(i.LE.s_me_r4).AND. & 1112 (j.GE.s_me_r2).AND.(j.LE.s_me_r5).AND. & 1113 (k.GE.s_me_r3).AND.(k.LE.s_me_r6) ) THEN 1114 !------------------------------------------------- 1115 dxy = DSQRT(dq1*dq1+dq2*dq2) 1116 !------------------------------------------------- 1117 me_rc(0,i,j,k) =1.0_DP 1118 me_rc(1:lmax,i,j,k)=0.0_DP 1119 !------------------------------------------------- 1120 IF (dxy .GT. 1.0E-10) THEN 1121 !----------------------------------------------- 1122 cxy = CMPLX(dq1,dq2)/dxy 1123 !----------------------------------------------- 1124 DO m=1,lmax 1125 me_rc(m,i,j,k)=me_rc(m-1,i,j,k)*cxy 1126 END DO 1127 !----------------------------------------------- 1128 END IF 1129 !------------------------------------------------- 1130 END IF 1131 !------------------------------------------------- 1132 END DO 1133 END DO 1134 END DO 1135 !--------------------------------------------------------------------------------------------- 1136 return 1137 end subroutine exx_gs_setup_cube 1138 1139 subroutine exx_gs_setup_sphere() 1140 implicit none 1141 !======================================================================== 1142 ! Compute distances between grid points and the center of the simulation cell in R space 1143 ! This part needs to be done once in constant volume simulation and 1144 ! needs to be done every step in variable cell simulationulations ... 1145 ! 1146 !======================================================================== 1147 DO i=1,np_in_sp_me_s 1148 xx_in_sp(i)=h(1,1)*sc_xx_in_sp(i)+h(1,2)*sc_yy_in_sp(i)+h(1,3)*sc_zz_in_sp(i) ! r = h s 1149 yy_in_sp(i)=h(2,1)*sc_xx_in_sp(i)+h(2,2)*sc_yy_in_sp(i)+h(2,3)*sc_zz_in_sp(i) ! r = h s 1150 zz_in_sp(i)=h(3,1)*sc_xx_in_sp(i)+h(3,2)*sc_yy_in_sp(i)+h(3,3)*sc_zz_in_sp(i) ! r = h s 1151 END DO 1152 return 1153 end subroutine exx_gs_setup_sphere 1154 1155 subroutine solve_a_nonself_pair_cube() 1156 implicit none 1157 call generate_pe_guess_cube 1158 call start_clock('exx_grid_trans') 1159 CALL getmiddlewc(wannierc(1,gindex_of_iobtl),wannierc(1,my_var2), h, ainv, middle ) 1160 ! 1161 ! calculate translation vector from the center of the box 1162 CALL getsftv( nr1s, nr2s, nr3s, h, ainv, middle, tran) 1163 call stop_clock('exx_grid_trans') 1164 ! 1165 ! get the localized psi around the mid point of two wannier centers 1166 ! note: the psime is centered at the center of the box 1167 ! (using the translation vector "tran" from middle of wfc to the center of box) 1168 call start_clock('exx_psicb') 1169 CALL getpsicb( nrg, p_me_r, psi(1,iobtl), psime(1), tran) 1170#if ! defined(__MPI) 1171 ! does not need communication so construct it here; my_var2 is simultaneously the global and local index (serial case) 1172 CALL getpsicb( nrg, p_me_r, psi(1,my_var2), psime_pair_recv(1, j, iobtl), tran) 1173#endif 1174 call stop_clock('exx_psicb') 1175 ! 1176 ! the localized density rhome 1177 call start_clock('exx_getrhol') 1178 CALL getrhol_cube(p_me_r, p_ps_r, psime(1), psime_pair_recv(1, j, iobtl), rhome, rhops, inv_omega) 1179 call stop_clock('exx_getrhol') 1180 ! 1181 ! calculate the exx potential from the pair density by solving Poisson 1182 ! 1183 !-------------------------------------------------------------------------------------- 1184 call start_clock('exx_vofr') 1185 CALL getvofr_cube( p_me_r, p_ps_r, max(n_p_me,n_s_me), max(n_p_ps,n_s_ps), hcub, & 1186 rhops, potme, pair_status(pos, iobtl), psgsn, pairrho(:,:,pos,iobtl), & 1187 pairv(:,:,pos,iobtl), cgstep) 1188 call stop_clock('exx_vofr') 1189 !-------------------------------------------------------------------------------------- 1190 ! 1191 !-------------------------------------------------------------------------------------- 1192 ! write cgsteps in the suffix.ncg (unit=44) 1193 !-------------------------------------------------------------------------------------- 1194 IF ((MOD(nfi,iprint_stdout).EQ.0)) THEN 1195 IF (ionode) THEN ! maybe not needed for ionode (if one want more information) 1196 WRITE(iunit,'(3X,"(i,j,cgsteps)",3I6)') gindex_of_iobtl, my_var2, cgstep 1197 END IF 1198 END IF 1199 !-------------------------------------------------------------------------------------- 1200 ! 1201 !-------------------------------------------------------------------------------------- 1202 ! update force and energy 1203 !-------------------------------------------------------------------------------------- 1204 call start_clock('exx_force_loc') 1205 CALL updateforce_loc(nrg, p_me_r, vpsil(:,iobtl), potme, psime, psime_pair_recv(1,j,iobtl),tran) 1206#if ! defined(__MPI) 1207 ! does not need communication so construct it here; my_var2 is simultaneously the global and local index (serial case) 1208 CALL updateforce_loc(nrg, p_me_r, vpsil(:,my_var2), potme, psime_pair_recv(1,j,iobtl), psime, tran) 1209#endif 1210 call stop_clock('exx_force_loc') 1211 ! 1212 call start_clock('exx_penergy') 1213 CALL vvprod_cube(p_me_r, rhome, potme, paire(j)) ! dot product of the rho and potme 1214 call stop_clock('exx_penergy') 1215 return 1216 end subroutine solve_a_nonself_pair_cube 1217 1218 subroutine solve_a_nonself_pair_sphere() 1219 implicit none 1220 call start_clock('exx_grid_trans') 1221 CALL getmiddlewc(wannierc(1,gindex_of_iobtl),wannierc(1,my_var2), h, ainv, middle ) 1222 ! d_pair is used in the extrapolation scheme (sphere only) 1223 CALL get_pair_dist(wannierc(1,gindex_of_iobtl),wannierc(1,my_var2),d_pair) 1224 ! 1225 ! calculate translation vector from the center of the box 1226 CALL getsftv( nr1s, nr2s, nr3s, h, ainv, middle, tran) 1227 call stop_clock('exx_grid_trans') 1228 ! 1229 ! get the localized psi around the mid point of two wannier centers 1230 ! note: the psil is centered at the center of the box 1231 ! (using the translation vector "tran" from middle of wfc to the center of box) 1232 ALLOCATE ( psil(np_in_sp_me_p) ); psil=0.0_DP ! HK/MCA: (TODO) the allocation can to be done in a reusable fashion 1233 CALL getpsil( nnrtot, np_in_sp_me_p, psi(1, iobtl), psil(1), tran) 1234#if ! defined(__MPI) 1235 ! does not need communication so construct it here; my_var2 is simultaneously the global and local index (serial case) 1236 CALL getpsil( nnrtot, np_in_sp_me_p, psi(1, my_var2), psime_pair_recv(1, j, iobtl), tran) 1237#endif 1238 ! 1239 ! the localized density rhol 1240 ! HK/MCA: (TODO) need to make these array allocation in a reusable fashion 1241 ALLOCATE ( rhol(np_in_sp_me_p) ); rhol=0.0_DP 1242 ALLOCATE ( rho_in_sp(np_in_sp_p) ); rho_in_sp=0.0_DP 1243 CALL getrhol_sphere( np_in_sp_me_p, np_in_sp_p, psil(1), psime_pair_recv(1, j, iobtl), rhol, rho_in_sp, tran, sa1) 1244 ! 1245 ! calculate the exx potential from the pair density by solving Poisson 1246 ! 1247 ! calculate the exx potential from the pair density by solving Poisson 1248 ! 1249 ALLOCATE ( vl(np_in_sp_me_p) ); vl=0.0_DP ! compute potential (vl) in ME sphere 1250 CALL start_clock('getvofr') 1251 ! HK/MCA: d_pair is used in the extrapolation scheme (check if still working) see also ``call get_pair_dist'' 1252 CALL getvofr_sphere( np_in_sp_me_p, np_in_sp_p, hcub, rho_in_sp, vl,& 1253 pairv(1,1,j,iobtl), pairv(1,2,j,iobtl), pairv(1,3,j,iobtl),& 1254 .FALSE., d_pair, pair_dist(1,j,iobtl), pair_dist(2,j,iobtl),& 1255 pair_dist(3,j,iobtl),cgstep) 1256 CALL stop_clock('getvofr') 1257 ! 1258 ! write cgsteps in the suffix.ncg (unit=44) 1259 ! 1260 IF ((MOD(nfi,iprint_stdout).EQ.0)) THEN 1261 ! 1262 IF (ionode) THEN ! maybe not needed for ionode (if one want more information) 1263 ! 1264 WRITE(iunit,'(3X,"(i,j,cgsteps)",3I6)') gindex_of_iobtl, my_var2, cgstep 1265 ! 1266 END IF 1267 ! 1268 END IF 1269 ! 1270 ! update vpsil in the global grid (exxalfa is 0.25 for PBE0) 1271 !$omp parallel do private(ir) 1272 DO ip = 1, np_in_sp_me_p 1273 CALL l2goff (ip,ir,tran) ! local is centered at box center; global index is offset by tran 1274 vpsil(ir,iobtl) = vpsil(ir,iobtl) - exxalfa*vl(ip)*psime_pair_recv(ip,j,iobtl) ! to remain 1275#if defined(__MPI) 1276 psime_pair_recv(ip,j,iobtl) = - exxalfa*vl(ip)*psil(ip) ! to be sent 1277#else 1278 ! does not need communication so construct it here; my_var2 is simultaneously the global and local index (serial case) 1279 vpsil(ir,my_var2) = vpsil(ir,my_var2) - exxalfa*vl(ip)*psil(ip) 1280#endif 1281 END DO 1282 !$omp end parallel do 1283 ! 1284 CALL vvprod_sphere(np_in_sp_me_p, rhol, vl, paire(j)) ! dot product of the rho and vl !HK (todo): do we need to do PS+ME ?? rho_in_sp may be enough 1285 return 1286 end subroutine solve_a_nonself_pair_sphere 1287 1288 subroutine generate_pe_guess_cube() 1289 implicit none 1290 guess_status = 1 ! guess what? 1291 pos = 0 1292 oldest_step = 100000000 1293 DO itr = 1, neigh 1294 IF ( pair_label(itr, iobtl) .EQ. 0 ) THEN 1295 pos = itr 1296 EXIT 1297 ELSE IF ( pair_label(itr, iobtl) .EQ. my_var2 ) THEN 1298 guess_status = 1 1299 pos = itr 1300 EXIT 1301 ELSE IF ( pair_step(itr, iobtl) < oldest_step ) THEN 1302 pos = itr 1303 oldest_step = pair_step(itr, iobtl) 1304 END IF 1305 END DO 1306 ! 1307 IF (guess_status .EQ. 1) THEN 1308 IF (pair_step(pos, iobtl) .EQ. n_exx - 1) THEN 1309 pair_status(pos, iobtl) = pair_status(pos, iobtl) + 1 1310 ELSE 1311 pair_status(pos, iobtl) = 1 1312 END IF 1313 ELSE 1314 pair_status(pos, iobtl) = 0 1315 END IF 1316 ! 1317 pair_label(pos, iobtl) = my_var2 1318 pair_step(pos, iobtl) = n_exx 1319 return 1320 end subroutine generate_pe_guess_cube 1321 1322 subroutine solve_a_self_pair_cube() 1323 implicit none 1324 ! calculate translation vector from the center of the box 1325 CALL getsftv(nr1s, nr2s, nr3s, h, ainv, wannierc(1, gindex_of_iobtl), tran) 1326 ! 1327 ! get the localized psi around the wannier centers 1328 ! note: the psime is centered at the center of the box 1329 ! (using the translation vector "tran" from the wfc to the center of box) 1330 ! 1331 CALL getpsicb( nrg, s_me_r, psi(1,iobtl), psime(1), tran) 1332 ! get the localized density rhome 1333 CALL getrhol_cube(s_me_r, s_ps_r, psime(1), psime(1), rhome, rhops, inv_omega) 1334 ! 1335 ! calculate the exx potential from the pair density by solving Poisson 1336 ! 1337 !-------------------------------------------------------------------------------------- 1338 CALL start_clock('getvofr') 1339 ! 1340 CALL getvofr_cube( s_me_r, s_ps_r, n_s_me, n_s_ps, hcub, rhops, potme, n_exx-1, psgsn, & 1341 selfrho(:,:,iobtl), selfv(:,:,iobtl), cgstep) 1342 ! 1343 CALL stop_clock('getvofr') 1344 !-------------------------------------------------------------------------------------- 1345 ! 1346 !-------------------------------------------------------------------------------------- 1347 ! write cgsteps in the suffix.ncg (unit=44) 1348 !-------------------------------------------------------------------------------------- 1349 IF ((MOD(nfi,iprint_stdout).EQ.0)) THEN 1350 IF (ionode) THEN ! maybe not needed for ionode (if one want more information) 1351 WRITE(44,'(3X,"(i,i,cgsteps)",3I6)') gindex_of_iobtl, gindex_of_iobtl, cgstep 1352 CALL printout_base_close("ncg") 1353 END IF 1354 END IF 1355 !-------------------------------------------------------------------------------------- 1356 ! 1357 !-------------------------------------------------------------------------------------- 1358 ! update force and energy 1359 !-------------------------------------------------------------------------------------- 1360 ! 1361 CALL updateforce_slf(nrg, s_me_r, vpsil(1,iobtl), potme, psime, tran) 1362 CALL vvprod_cube(s_me_r, rhome, potme, selfe) ! dot product of the rho and potme 1363 return 1364 end subroutine solve_a_self_pair_cube 1365 1366 subroutine solve_a_self_pair_sphere() 1367 implicit none 1368 ! calculate translation vector from the center of the box 1369 CALL getsftv(nr1s, nr2s, nr3s, h, ainv, wannierc(1, gindex_of_iobtl), tran) 1370 ! 1371 ! get the localized psi around the wannier centers 1372 ! note: the psil is centered at the center of the box 1373 ! (using the translation vector "tran" from the wfc to the center of box) 1374 ALLOCATE ( psil(np_in_sp_me_s) ); psil=0.0_DP 1375 CALL getpsil( nnrtot, np_in_sp_me_s, psi(1, iobtl), psil(1), tran) 1376 ! 1377 ! get the localized density rhol 1378 ALLOCATE ( rhol(np_in_sp_me_s) ); rhol=0.0_DP 1379 ALLOCATE ( rho_in_sp(np_in_sp_s) ); rho_in_sp=0.0_DP 1380 CALL getrhol_sphere( np_in_sp_me_s, np_in_sp_s, psil(1), psil(1), rhol, rho_in_sp, tran, sa1) 1381 ! 1382 ! calculate the exx potential from the pair density by solving Poisson 1383 ! 1384 ALLOCATE ( vl(np_in_sp_me_s) ); vl=0.0_DP ! compute potential (vl) in ME sphere 1385 CALL start_clock('getvofr') 1386 CALL getvofr_sphere( np_in_sp_me_s,np_in_sp_s,& 1387 hcub, rho_in_sp, vl, selfv(1,1,iobtl), selfv(1,2,iobtl),& 1388 selfv(1,3,iobtl), .TRUE., 0.0, 0.0, 0.0, 0.0,cgstep) 1389 ! 1390 CALL stop_clock('getvofr') 1391 ! 1392 ! write cgsteps in the suffix.ncg (unit=44) 1393 ! 1394 IF ((MOD(nfi,iprint_stdout).EQ.0)) THEN 1395 ! 1396 IF (ionode) THEN ! maybe not needed for ionode (if one want more information) 1397 ! 1398 WRITE(44,'(3X,"(i,i,cgsteps)",3I6)') gindex_of_iobtl, gindex_of_iobtl, cgstep 1399 ! 1400 CALL printout_base_close("ncg") 1401 ! 1402 END IF 1403 ! 1404 END IF 1405 ! 1406 ! update vpsil in the global grid (exxalfa is 0.25 for PBE0) 1407 !$omp parallel do private(ir) 1408 DO ip = 1, np_in_sp_me_s 1409 CALL l2goff (ip,ir,tran) ! local is centered at box center; global index is offset by tran 1410 vpsil(ir,iobtl) = vpsil(ir,iobtl) - exxalfa*vl(ip)*psil(ip) ! PBE0 1411 END DO 1412 !$omp end parallel do 1413 ! 1414 ! compute exchange energy in ME sphere 1415 CALL vvprod_sphere(np_in_sp_me_s, rhol, vl, selfe) ! dot product of the rho and vl !HK (todo): do we need to do PS+ME ?? rho_in_sp may be enough 1416 return 1417 end subroutine solve_a_self_pair_sphere 1418 1419END SUBROUTINE exx_gs 1420!==================================================================================== 1421 1422!============================================================================== 1423SUBROUTINE getsftv(nr1s, nr2s, nr3s, h, ainv, wc, tran) 1424 ! 1425 USE kinds, ONLY : DP 1426 ! 1427 IMPLICIT NONE 1428 ! 1429 INTEGER nr1s, nr2s, nr3s, tran(3) 1430 REAL(DP) wc(3), wcs(3), h(3,3), ainv(3,3) 1431 ! 1432 INTEGER i, bcm(3), wcm(3) 1433 ! 1434 bcm(1) = INT(nr1s/2) 1435 bcm(2) = INT(nr2s/2) 1436 bcm(3) = INT(nr3s/2) 1437 ! 1438 wcs(1)=ainv(1,1)*wc(1)+ainv(1,2)*wc(2)+ainv(1,3)*wc(3) ! s_ij = h^-1 r_ij 1439 wcs(2)=ainv(2,1)*wc(1)+ainv(2,2)*wc(2)+ainv(2,3)*wc(3) ! s_ij = h^-1 r_ij 1440 wcs(3)=ainv(3,1)*wc(1)+ainv(3,2)*wc(2)+ainv(3,3)*wc(3) ! s_ij = h^-1 r_ij 1441 ! 1442 ! we map to nearest grid point along each lattice vector 1443 ! 1444 wcm(1) = IDNINT( wcs(1)*DBLE(nr1s) ) 1445 wcm(2) = IDNINT( wcs(2)*DBLE(nr2s) ) 1446 wcm(3) = IDNINT( wcs(3)*DBLE(nr3s) ) 1447 ! 1448 DO i = 1, 3 1449 tran(i) = bcm(i) - wcm(i) 1450 ENDDO 1451 ! 1452 RETURN 1453END SUBROUTINE getsftv 1454!============================================================================== 1455 1456!============================================================================== 1457SUBROUTINE getmiddlewc(wc1, wc2, h, ainv, mid) 1458 ! 1459 USE kinds, ONLY : DP 1460 ! 1461 IMPLICIT NONE 1462 ! 1463 REAL(DP) wc1(3), wc2(3), mid(3) 1464 REAL(DP) dij(3), dij2(3), h(3,3), ainv(3,3), tmp(3) 1465 ! 1466 INTEGER i 1467 ! 1468 dij(1)=wc2(1)-wc1(1) ! r_ij = r_i - r_j 1469 dij(2)=wc2(2)-wc1(2) ! r_ij = r_i - r_j 1470 dij(3)=wc2(3)-wc1(3) ! r_ij = r_i - r_j 1471 ! 1472 dij2(1)=ainv(1,1)*dij(1)+ainv(1,2)*dij(2)+ainv(1,3)*dij(3) ! s_ij = h^-1 r_ij 1473 dij2(2)=ainv(2,1)*dij(1)+ainv(2,2)*dij(2)+ainv(2,3)*dij(3) ! s_ij = h^-1 r_ij 1474 dij2(3)=ainv(3,1)*dij(1)+ainv(3,2)*dij(2)+ainv(3,3)*dij(3) ! s_ij = h^-1 r_ij 1475 ! 1476 dij2(1)=dij2(1)-IDNINT(dij2(1)) ! impose MIC on s_ij in range: [-0.5,+0.5] 1477 dij2(2)=dij2(2)-IDNINT(dij2(2)) ! impose MIC on s_ij in range: [-0.5,+0.5] 1478 dij2(3)=dij2(3)-IDNINT(dij2(3)) ! impose MIC on s_ij in range: [-0.5,+0.5] 1479 ! 1480 dij(1)=h(1,1)*dij2(1)+h(1,2)*dij2(2)+h(1,3)*dij2(3) ! r_ij = h s_ij (MIC) 1481 dij(2)=h(2,1)*dij2(1)+h(2,2)*dij2(2)+h(2,3)*dij2(3) ! r_ij = h s_ij (MIC) 1482 dij(3)=h(3,1)*dij2(1)+h(3,2)*dij2(2)+h(3,3)*dij2(3) ! r_ij = h s_ij (MIC) 1483 ! 1484 mid(1) = wc1(1) + 0.5_DP*dij(1) 1485 mid(2) = wc1(2) + 0.5_DP*dij(2) 1486 mid(3) = wc1(3) + 0.5_DP*dij(3) 1487 ! 1488 RETURN 1489END SUBROUTINE getmiddlewc 1490!============================================================================== 1491 1492!============================================================================== 1493SUBROUTINE get_pair_dist (wc1, wc2, P_dist) 1494 ! 1495 USE kinds, ONLY : DP 1496 USE cell_base, ONLY : omega, ainv, h 1497 ! 1498 IMPLICIT NONE 1499 ! P_dist is the pair distance with in the minimum image convention 1500 REAL(DP) wc1(3), wc2(3), P_dist 1501 REAL(DP) dist_vec_in_r(3), dist_vec_in_s(3) 1502 ! 1503 INTEGER i 1504 ! 1505 P_dist = 0.D0 1506 ! dist vector 1507 DO i = 1,3 1508 dist_vec_in_r(i) = wc1(i) - wc2(i) 1509 END DO 1510 ! 1511 dist_vec_in_s(1)=ainv(1,1)*dist_vec_in_r(1)+ainv(1,2)*dist_vec_in_r(2)+ainv(1,3)*dist_vec_in_r(3) ! s = h^-1 r 1512 dist_vec_in_s(2)=ainv(2,1)*dist_vec_in_r(1)+ainv(2,2)*dist_vec_in_r(2)+ainv(2,3)*dist_vec_in_r(3) ! s = h^-1 r 1513 dist_vec_in_s(3)=ainv(3,1)*dist_vec_in_r(1)+ainv(3,2)*dist_vec_in_r(2)+ainv(3,3)*dist_vec_in_r(3) ! s = h^-1 r 1514 ! 1515 DO i = 1,3 1516 dist_vec_in_s(i)=dist_vec_in_s(i)-IDNINT(dist_vec_in_s(i)) 1517 END DO 1518 ! 1519 dist_vec_in_r(1)=h(1,1)*dist_vec_in_s(1)+h(1,2)*dist_vec_in_s(2)+h(1,3)*dist_vec_in_s(3) ! r = h s 1520 dist_vec_in_r(2)=h(2,1)*dist_vec_in_s(1)+h(2,2)*dist_vec_in_s(2)+h(2,3)*dist_vec_in_s(3) ! r = h s 1521 dist_vec_in_r(3)=h(3,1)*dist_vec_in_s(1)+h(3,2)*dist_vec_in_s(2)+h(3,3)*dist_vec_in_s(3) ! r = h s 1522 ! 1523 DO i = 1, 3 1524 P_dist = P_dist + dist_vec_in_r(i)*dist_vec_in_r(i) 1525 END DO 1526 ! 1527 P_dist = DSQRT(P_dist) 1528 ! 1529 RETURN 1530END SUBROUTINE get_pair_dist 1531!============================================================================== 1532 1533!============================================================================== 1534SUBROUTINE vvprod_sphere(n, v1, v2, prod) 1535 ! 1536 USE kinds, ONLY : DP 1537 ! 1538 IMPLICIT NONE 1539 ! 1540 INTEGER n 1541 REAL(DP) prod, v1(n), v2(n), vp 1542 ! 1543 INTEGER i 1544 ! 1545 prod = 0.0_DP 1546 vp = 0.0_DP 1547 ! 1548 !$omp parallel do reduction(+:vp) 1549 DO i = 1, n 1550 vp = vp + v1(i) * v2(i) 1551 END DO 1552 !$omp end parallel do 1553 ! 1554 prod = vp 1555 ! 1556 RETURN 1557END SUBROUTINE vvprod_sphere 1558!============================================================================== 1559 1560!============================================================================== 1561SUBROUTINE vvprod_cube(me_r, v1, v2, prod) 1562 ! 1563 USE kinds, ONLY : DP 1564 ! 1565 IMPLICIT NONE 1566 ! 1567 !---------------------------------------------------------------- 1568 INTEGER :: me_r(6) 1569 REAL(DP) :: v1(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1570 REAL(DP) :: v2(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1571 REAL(DP) :: prod 1572 !---------------------------------------------------------------- 1573 INTEGER :: i,j,k 1574 REAL(DP) :: prodp 1575 !---------------------------------------------------------------- 1576 ! 1577 prodp=0.0D0 1578 ! 1579 ! WRITE(*,*) "vvprod" 1580 ! 1581 !$omp parallel do private(i,j,k) reduction(+:prodp) 1582 DO k=me_r(3),me_r(6) 1583 DO j=me_r(2),me_r(5) 1584 DO i=me_r(1),me_r(4) 1585 !---------------------------------------------------------- 1586 prodp = prodp + v1(i,j,k)*v2(i,j,k) 1587 !---------------------------------------------------------- 1588 ! WRITE(*,"(I4,I4,I4,F15.11,F15.11,F15.11)") i,j,k,v1(i,j,k),v2(i,j,k),v1(i,j,k)*v2(i,j,k) 1589 !---------------------------------------------------------- 1590 END DO 1591 END DO 1592 END DO 1593 !---------------------------------------------------------------- 1594 !$omp end parallel do 1595 ! 1596 prod = prodp 1597 ! 1598 RETURN 1599END SUBROUTINE vvprod_cube 1600!============================================================================== 1601 1602!============================================================================== 1603SUBROUTINE getpsil( ntot, np_in_sp_me, psi, psi2, tran) 1604 ! 1605 USE kinds, ONLY : DP 1606 ! 1607 IMPLICIT NONE 1608 ! 1609 INTEGER ntot, tran(3), np_in_sp_me 1610 REAL(DP) psi(ntot), psi2(np_in_sp_me) 1611 ! 1612 INTEGER ir, ip, i, j, k, ii, jj, kk 1613 ! 1614 !$omp parallel do private(ir) 1615 DO ip = 1, np_in_sp_me 1616 CALL l2goff (ip,ir,tran) 1617 psi2(ip) = psi(ir) 1618 END DO 1619 !$omp end parallel do 1620 ! 1621 RETURN 1622END SUBROUTINE getpsil 1623!============================================================================== 1624 1625!============================================================================== 1626SUBROUTINE getrhol_cube(me_r, ps_r, psi1, psi2, rhome, rhops, inv_omega) 1627 ! 1628 USE kinds, ONLY : DP 1629 ! 1630 IMPLICIT NONE 1631 ! 1632 INTEGER me_r(6), ps_r(6) 1633 REAL(DP) psi1 (me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1634 REAL(DP) psi2 (me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1635 REAL(DP) rhome(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1636 REAL(DP) rhops(ps_r(1):ps_r(4),ps_r(2):ps_r(5),ps_r(3):ps_r(6)) 1637 REAL(DP) inv_omega 1638 ! 1639 INTEGER i, j, k 1640 ! 1641 !$omp parallel do private(i,j,k) 1642 DO k=ps_r(3),ps_r(6) 1643 DO j=ps_r(2),ps_r(5) 1644 DO i=ps_r(1),ps_r(4) 1645 rhops(i,j,k) = psi1(i,j,k) * psi2(i,j,k) * inv_omega 1646 END DO 1647 END DO 1648 END DO 1649 !$omp end parallel do 1650 !-------------------------------------------------------------------------- 1651 ! 1652 !-------------------------------------------------------------------------- 1653 !$omp parallel do private(i,j,k) 1654 DO k=me_r(3),me_r(6) 1655 DO j=me_r(2),me_r(5) 1656 DO i=me_r(1),me_r(4) 1657 rhome(i,j,k) = psi1(i,j,k)*psi2(i,j,k) * inv_omega 1658 END DO 1659 END DO 1660 END DO 1661 !$omp end parallel do 1662 !-------------------------------------------------------------------------- 1663 ! 1664 RETURN 1665END SUBROUTINE getrhol_cube 1666!============================================================================== 1667 1668!============================================================================== 1669SUBROUTINE getrhol_sphere( np_in_sp_me, np_in_sp, psi, psi2, rho, rho_in_sp, tran, sa1) 1670 ! 1671 USE kinds, ONLY : DP 1672 ! 1673 IMPLICIT NONE 1674 ! 1675 INTEGER np_in_sp_me, tran(3),np_in_sp 1676 REAL(DP) psi(np_in_sp_me), psi2(np_in_sp_me), rho(np_in_sp_me),sa1, rho_in_sp(np_in_sp) 1677 ! 1678 INTEGER ir, ip, i, j, k, ii, jj, kk 1679 rho_in_sp(:) = 0.D0 1680 ! 1681 !$omp parallel do 1682 DO ip = 1, np_in_sp_me 1683 rho(ip) = psi(ip) * psi2(ip) * sa1 1684 IF( ip.LE.np_in_sp ) THEN 1685 rho_in_sp( ip ) = rho(ip) 1686 END IF 1687 ENDDO 1688 !$omp end parallel do 1689 ! 1690 RETURN 1691END SUBROUTINE getrhol_sphere 1692!============================================================================== 1693 1694!============================================================================== 1695SUBROUTINE l2goff (lind,gind,tran) 1696 ! 1697 USE exx_module, ONLY : odtothd_in_sp, thdtood 1698 USE fft_base, ONLY : dfftp 1699 ! 1700 IMPLICIT NONE 1701 ! 1702 INTEGER tran(3),lind,gind 1703 INTEGER ir, ip, i, j, k, ii, jj, kk, nr1s, nr2s, nr3s 1704 ! 1705 nr1s=dfftp%nr1; nr2s=dfftp%nr2; nr3s=dfftp%nr3 1706 ! 1707 i = odtothd_in_sp(1, lind) 1708 j = odtothd_in_sp(2, lind) 1709 k = odtothd_in_sp(3, lind) 1710 ! 1711 ii = i - tran(1) 1712 jj = j - tran(2) 1713 kk = k - tran(3) 1714 ! 1715 IF ( ii .GT. nr1s)ii = ii - nr1s 1716 IF ( jj .GT. nr2s)jj = jj - nr2s 1717 IF ( kk .GT. nr3s)kk = kk - nr3s 1718 ! 1719 IF ( ii .LT. 1)ii = ii + nr1s 1720 IF ( jj .LT. 1)jj = jj + nr2s 1721 IF ( kk .LT. 1)kk = kk + nr3s 1722 ! 1723 gind = thdtood(ii, jj, kk) 1724 ! 1725 RETURN 1726END SUBROUTINE l2goff 1727!============================================================================== 1728SUBROUTINE getpsicb(nrg,nrl,psig,psil,tran) 1729 ! 1730 USE kinds, ONLY : DP 1731 USE fft_base, ONLY : dfftp 1732 ! 1733 IMPLICIT NONE 1734 ! 1735 INTEGER :: nrg(3) 1736 INTEGER :: nrl(6) 1737 REAL(DP) :: psig(nrg(1),nrg(2),nrg(3)) 1738 REAL(DP) :: psil(nrl(1):nrl(4),nrl(2):nrl(5),nrl(3):nrl(6)) 1739 INTEGER :: tran(3) 1740 INTEGER :: gid(3) 1741 !INTEGER :: lid(3) 1742 INTEGER :: i,j,k 1743 INTEGER :: ti, tj, tk 1744 INTEGER :: gi, gj, gk 1745 integer, external :: l2gcb 1746 ! 1747 ti = tran(1); tj = tran(2); tk = tran(3) 1748 !$omp parallel do private(i,j,k,gi,gj,gk) 1749 DO k = nrl(3),nrl(6) 1750 DO j = nrl(2),nrl(5) 1751 DO i = nrl(1),nrl(4) 1752 !---------------------------------------------------- 1753 gi = l2gcb(dfftp%nr1,i,ti) 1754 gj = l2gcb(dfftp%nr2,j,tj) 1755 gk = l2gcb(dfftp%nr3,k,tk) 1756 psil(i,j,k)=psig(gi,gj,gk) 1757 !---------------------------------------------------- 1758 END DO 1759 END DO 1760 END DO 1761 !$omp end parallel do 1762 !---------------------------------------------------------- 1763 ! 1764 RETURN 1765END SUBROUTINE getpsicb 1766!============================================================================== 1767 1768integer function l2gcb(n,l,t) 1769 implicit none 1770 integer :: n, l, t 1771 l2gcb = MOD(l-t-1+n, n)+1 1772end function l2gcb 1773SUBROUTINE updateforce_loc(nrg, me_r, vpsil, potme, psime1, psime2, tran) 1774 ! 1775 USE kinds, ONLY : DP 1776 USE exx_module, ONLY : exxalfa 1777 USE fft_base, ONLY : dfftp 1778 ! 1779 IMPLICIT NONE 1780 ! 1781 INTEGER :: nrg(3) 1782 INTEGER :: me_r(6) 1783 REAL(DP) :: vpsil(nrg(1),nrg(2),nrg(3)) 1784 REAL(DP) :: potme(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1785 REAL(DP) :: psime1(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1786 REAL(DP) :: psime2(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1787 INTEGER :: tran(3) 1788 !---------------------------------------------------------------- 1789 INTEGER :: gid(3) 1790 INTEGER :: lid(3) 1791 INTEGER :: i,j,k 1792 INTEGER :: gi,gj,gk,ti,tj,tk 1793 integer, external :: l2gcb 1794 ! 1795 ti=tran(1);tj=tran(2);tk=tran(3) 1796 !---------------------------------------------------------------- 1797 ! update vpsil in the global grid (exxalfa is 0.25 for PBE0) 1798 !---------------------------------------------------------------- 1799 !$omp parallel do private(i,j,k,gi,gj,gk) 1800 !---------------------------------------------------------------- 1801 DO k=me_r(3),me_r(6) 1802 DO j=me_r(2),me_r(5) 1803 DO i=me_r(1),me_r(4) 1804 !---------------------------------------------------------- 1805 !CALL l2gcb(lid,gid,tran) 1806 gi = l2gcb(dfftp%nr1,i,ti) 1807 gj = l2gcb(dfftp%nr2,j,tj) 1808 gk = l2gcb(dfftp%nr3,k,tk) 1809 !---------------------------------------------------------- 1810 vpsil(gi,gj,gk) = vpsil(gi,gj,gk) & 1811 - exxalfa*potme(i,j,k)*psime2(i,j,k) 1812#if defined(__MPI) 1813 psime2(i,j,k) = - exxalfa*potme(i,j,k)*psime1(i,j,k) 1814#endif 1815 !---------------------------------------------------------- 1816 END DO 1817 END DO 1818 END DO 1819 !---------------------------------------------------------------- 1820 !$omp end parallel do 1821 !---------------------------------------------------------------- 1822 1823 !---------------------------------------------------------------- 1824 RETURN 1825 !---------------------------------------------------------------- 1826END SUBROUTINE updateforce_loc 1827!============================================================================== 1828 1829!============================================================================== 1830SUBROUTINE updateforce_slf(nrg, me_r, vpsil, potme, psime, tran) 1831 ! 1832 USE kinds, ONLY : DP 1833 USE exx_module, ONLY : exxalfa 1834 USE fft_base, ONLY : dfftp 1835 ! 1836 IMPLICIT NONE 1837 ! 1838 INTEGER :: nrg(3) 1839 INTEGER :: me_r(6) 1840 REAL(DP) :: vpsil(nrg(1),nrg(2),nrg(3)) 1841 REAL(DP) :: potme(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1842 REAL(DP) :: psime(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1843 INTEGER :: tran(3) 1844 !---------------------------------------------------------------- 1845 INTEGER :: gid(3) 1846 INTEGER :: lid(3) 1847 INTEGER :: i,j,k 1848 ! 1849 INTEGER :: gi,gj,gk,ti,tj,tk 1850 integer, external :: l2gcb 1851 ! 1852 ti=tran(1);tj=tran(2);tk=tran(3) 1853 !---------------------------------------------------------------- 1854 ! update vpsil in the global grid (exxalfa is 0.25 for PBE0) 1855 !---------------------------------------------------------------- 1856 !$omp parallel do private(i,j,k,gi,gj,gk) 1857 DO k=me_r(3),me_r(6) 1858 DO j=me_r(2),me_r(5) 1859 DO i=me_r(1),me_r(4) 1860 !---------------------------------------------------------- 1861 gi = l2gcb(dfftp%nr1,i,ti) 1862 gj = l2gcb(dfftp%nr2,j,tj) 1863 gk = l2gcb(dfftp%nr3,k,tk) 1864 !---------------------------------------------------------- 1865 vpsil(gi,gj,gk) = vpsil(gi,gj,gk) & 1866 - exxalfa*potme(i,j,k)*psime(i,j,k) 1867 !---------------------------------------------------------- 1868 END DO 1869 END DO 1870 END DO 1871 !---------------------------------------------------------------- 1872 !$omp end parallel do 1873 !---------------------------------------------------------------- 1874 1875 !---------------------------------------------------------------- 1876 RETURN 1877 !---------------------------------------------------------------- 1878END SUBROUTINE updateforce_slf 1879!============================================================================== 1880 1881!============================================================================== 1882SUBROUTINE updateforce_rec(nrg, me_r, vpsil, force, tran) 1883 ! 1884 USE kinds, ONLY : DP 1885 USE exx_module, ONLY : exxalfa 1886 USE fft_base, ONLY : dfftp 1887 ! 1888 IMPLICIT NONE 1889 ! 1890 INTEGER :: nrg(3) 1891 INTEGER :: me_r(6) 1892 REAL(DP) :: vpsil(nrg(1),nrg(2),nrg(3)) 1893 REAL(DP) :: force(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6)) 1894 INTEGER :: tran(3) 1895 !---------------------------------------------------------------- 1896 INTEGER :: gi, gj, gk 1897 INTEGER :: i,j,k 1898 INTEGER :: ti,tj,tk 1899 integer, external :: l2gcb 1900 ! 1901 ti=tran(1);tj=tran(2);tk=tran(3) 1902 ! 1903 !---------------------------------------------------------------- 1904 ! update vpsil in the global grid (exxalfa is 0.25 for PBE0) 1905 !---------------------------------------------------------------- 1906 !$omp parallel do private(i,j,k,gi,gj,gk) 1907 !---------------------------------------------------------------- 1908 DO k=me_r(3),me_r(6) 1909 DO j=me_r(2),me_r(5) 1910 DO i=me_r(1),me_r(4) 1911 !---------------------------------------------------------- 1912 gi = l2gcb(dfftp%nr1,i,ti) 1913 gj = l2gcb(dfftp%nr2,j,tj) 1914 gk = l2gcb(dfftp%nr3,k,tk) 1915 !---------------------------------------------------------- 1916 vpsil(gi,gj,gk) = vpsil(gi,gj,gk) + force(i,j,k) 1917 !---------------------------------------------------------- 1918 END DO 1919 END DO 1920 END DO 1921 !---------------------------------------------------------------- 1922 !$omp end parallel do 1923 !---------------------------------------------------------------- 1924 1925 !---------------------------------------------------------------- 1926 RETURN 1927 !---------------------------------------------------------------- 1928END SUBROUTINE updateforce_rec 1929!============================================================================== 1930