1! Copyrigh(C) 2005-2018 Quantum ESPRESSO group 2! This file is distributed under the terms of the 3! GNU General Public License. See the file `License' 4! in the root directory of the present distribution, 5! or http://www.gnu.org/copyleft/gpl.txt . 6! 7!----------------------------------------------------------------------------- 8MODULE exx 9 !----------------------------------------------------------------------------- 10 !! Variables and subroutines for calculation of exact-exchange contribution. 11 !! Implements ACE: Lin Lin, J. Chem. Theory Comput. 2016, 12, 2242. 12 !! Contains code for band parallelization over pairs of bands: see T. Barnes, 13 !! T. Kurth, P. Carrier, N. Wichmann, D. Prendergast, P.R.C. Kent, J. Deslippe 14 !! Computer Physics Communications 2017, dx.doi.org/10.1016/j.cpc.2017.01.008. 15 ! 16 USE kinds, ONLY : DP 17 USE noncollin_module, ONLY : noncolin, npol 18 USE io_global, ONLY : ionode, stdout 19 ! 20 USE control_flags, ONLY : gamma_only, tqr 21 USE fft_types, ONLY : fft_type_descriptor 22 USE stick_base, ONLY : sticks_map, sticks_map_deallocate 23 ! 24 IMPLICIT NONE 25 ! 26 SAVE 27 ! 28 ! ... general purpose vars 29 ! 30 REAL(DP):: exxalfa=0._DP 31 !! the parameter multiplying the exact-exchange part 32 REAL(DP), ALLOCATABLE :: x_occupation(:,:) 33 !! the weights of auxiliary functions in the density matrix 34 INTEGER :: x_nbnd_occ 35 !! number of bands of auxiliary functions with at least 36 !! some x_occupation > eps_occ 37 REAL(DP), PARAMETER :: eps_occ = 1.d-8 38 !! occupation threshold 39 COMPLEX(DP), ALLOCATABLE :: exxbuff(:,:,:) 40 !! Buffers: temporary (complex) buffer for wfc storage 41 REAL(DP), ALLOCATABLE :: locbuff(:,:,:) 42 !! temporary (real) buffer for wfc storage 43 REAL(DP), ALLOCATABLE :: locmat(:,:,:) 44 !! buffer for matrix of localization integrals 45 REAL(DP), ALLOCATABLE :: exxmat(:,:,:,:) 46 !! buffer for matrix of localization integrals (K) 47 ! 48#if defined(__USE_INTEL_HBM_DIRECTIVES) 49!DIR$ ATTRIBUTES FASTMEM :: exxbuff 50#elif defined(__USE_CRAY_HBM_DIRECTIVES) 51!DIR$ memory(bandwidth) exxbuff 52#endif 53 ! 54 LOGICAL :: use_ace 55 !! if .TRUE. use Lin Lin's ACE method, if .FALSE. do not use ACE, 56 !! use old algorithm instead 57 COMPLEX(DP), ALLOCATABLE :: xi(:,:,:) 58 !! ACE projectors 59 COMPLEX(DP), ALLOCATABLE :: evc0(:,:,:) 60 !! old wfc (G-space) needed to compute fock3 61 INTEGER :: nbndproj 62 !! 63 LOGICAL :: domat 64 !! 65 REAL(DP):: local_thr 66 !! threshold for Lin Lin's SCDM localized orbitals: discard 67 !! contribution to V_x if overlap between localized orbitals 68 !! is smaller than "local_thr". 69 ! 70 ! ... energy related variables 71 ! 72 REAL(DP) :: fock0 = 0.0_DP 73 !! sum <old|Vx(old)|old> 74 REAL(DP) :: fock1 = 0.0_DP 75 !! sum <new|vx(old)|new> 76 REAL(DP) :: fock2 = 0.0_DP 77 !! sum <new|vx(new)|new> 78 REAL(DP) :: fock3 = 0.0_DP 79 !! sum <old|vx(new)|old> 80 REAL(DP) :: dexx = 0.0_DP 81 !! fock1 - 0.5*(fock2+fock0) 82 ! 83 ! ... custom fft grid and related G-vectors 84 ! 85 TYPE(fft_type_descriptor) :: dfftt 86 LOGICAL :: exx_fft_initialized = .FALSE. 87 REAL(kind=DP), DIMENSION(:), POINTER :: ggt => null() 88 !! G-vectors in custom gri 89 REAL(kind=DP), DIMENSION(:,:),POINTER :: gt => null() 90 !! G-vectors in custom grid 91 INTEGER :: gstart_t 92 !! gstart_t=2 if ggt(1)=0, =1 otherwise 93 INTEGER :: npwt 94 !! number of plane waves in custom grid (Gamma-only) 95 INTEGER :: ngmt_g 96 !! Total number of G-vectors in custom grid 97 REAL(DP) :: ecutfock 98 !! energy cutoff for custom grid 99 INTEGER :: ibnd_start = 0 100 !! starting band index used in bgrp parallelization 101 INTEGER :: ibnd_end = 0 102 !! ending band index used in bgrp parallelization 103 INTEGER :: ibnd_buff_start 104 !! starting buffer index used in bgrp parallelization 105 INTEGER :: ibnd_buff_end 106 !! ending buffer index used in bgrp parallelization 107 ! 108 CONTAINS 109#define _CX(A) CMPLX(A,0._dp,kind=DP) 110#define _CY(A) CMPLX(0._dp,-A,kind=DP) 111 ! 112 !------------------------------------------------------------------------ 113 SUBROUTINE exx_fft_create() 114 !------------------------------------------------------------------------ 115 !! Initialise the custom grid that allows to put the wavefunction 116 !! onto the new (smaller) grid for \rho=\psi_{k+q}\psi^*_k and vice versa. 117 !! Set up fft descriptors, including parallel stuff: sticks, planes, etc. 118 ! 119 USE gvecw, ONLY : ecutwfc 120 USE gvect, ONLY : ecutrho, ngm, g, gg, gstart, mill 121 USE cell_base, ONLY : at, bg, tpiba2 122 USE recvec_subs, ONLY : ggen, ggens 123 USE fft_base, ONLY : smap 124 USE fft_types, ONLY : fft_type_init 125 USE symm_base, ONLY : fft_fact 126 USE mp_exx, ONLY : nproc_egrp, negrp, intra_egrp_comm 127 USE mp_bands, ONLY : nproc_bgrp, intra_bgrp_comm, nyfft 128 ! 129 USE klist, ONLY : nks, xk 130 USE mp_pools, ONLY : inter_pool_comm 131 USE mp, ONLY : mp_max, mp_sum 132 ! 133 USE control_flags, ONLY : tqr 134 USE realus, ONLY : qpointlist, tabxx, tabp 135 USE exx_band, ONLY : smap_exx 136 USE command_line_options, ONLY : nmany_ 137 ! 138 IMPLICIT NONE 139 ! 140 ! ... local variables 141 ! 142 INTEGER :: ik, ngmt 143 INTEGER, ALLOCATABLE :: ig_l2gt(:), millt(:,:) 144 INTEGER, EXTERNAL :: n_plane_waves 145 REAL(DP) :: gkcut, gcutmt 146 LOGICAL :: lpara 147 ! 148 IF ( exx_fft_initialized ) RETURN 149 ! 150 ! Initialise the custom grid that allows us to put the wavefunction 151 ! onto the new (smaller) grid for \rho=\psi_{k+q}\psi^*_k and vice versa 152 ! 153 ! gkcut is such that all |k+G|^2 < gkcut (in units of (2pi/a)^2) 154 ! Note that with k-points, gkcut > ecutwfc/(2pi/a)^2 155 ! gcutmt is such that |q+G|^2 < gcutmt 156 ! 157 IF ( gamma_only ) THEN 158 gkcut = ecutwfc / tpiba2 159 gcutmt = ecutfock / tpiba2 160 ELSE 161 gkcut = 0.0_DP 162 DO ik = 1, nks 163 gkcut = MAX(gkcut, SQRT(SUM(xk(:,ik)**2))) 164 ENDDO 165 CALL mp_max( gkcut, inter_pool_comm ) 166 ! Alternatively, variable "qnorm" earlier computed in "exx_grid_init" 167 ! could be used as follows: 168 ! gkcut = ( SQRT(ecutwfc/tpiba2) + qnorm )**2 169 gkcut = ( SQRT(ecutwfc/tpiba2) + gkcut )**2 170 ! 171 ! ... the following instruction may be needed if ecutfock \simeq ecutwfc 172 ! and guarantees that all k+G are included 173 ! 174 gcutmt = MAX(ecutfock/tpiba2, gkcut) 175 ENDIF 176 ! 177 ! ... set up fft descriptors, including parallel stuff: sticks, planes, etc. 178 ! 179 IF (negrp == 1) THEN 180 ! 181 ! ... no band parallelization: exx grid is a subgrid of general grid 182 ! 183 lpara = ( nproc_bgrp > 1 ) 184 CALL fft_type_init( dfftt, smap, "rho", gamma_only, lpara, & 185 intra_bgrp_comm, at, bg, gcutmt, gcutmt/gkcut, & 186 fft_fact=fft_fact, nyfft=nyfft, nmany=nmany_ ) 187 CALL ggens( dfftt, gamma_only, at, g, gg, mill, gcutmt, ngmt, gt, ggt ) 188 gstart_t = gstart 189 npwt = n_plane_waves(ecutwfc/tpiba2, nks, xk, gt, ngmt) 190 ngmt_g = ngmt 191 CALL mp_sum( ngmt_g, intra_bgrp_comm ) 192 ! 193 ELSE 194 ! 195 WRITE( 6, "(5X,'Exchange parallelized over bands (',i4,' band groups)')" ) & 196 negrp 197 lpara = ( nproc_egrp > 1 ) 198 CALL fft_type_init( dfftt, smap_exx, "rho", gamma_only, lpara, & 199 intra_egrp_comm, at, bg, gcutmt, gcutmt/gkcut, & 200 fft_fact=fft_fact, nyfft=nyfft, nmany=nmany_ ) 201 ngmt = dfftt%ngm 202 ngmt_g = ngmt 203 CALL mp_sum( ngmt_g, intra_egrp_comm ) 204 ALLOCATE( gt(3,dfftt%ngm) ) 205 ALLOCATE( ggt(dfftt%ngm) ) 206 ALLOCATE( millt(3,dfftt%ngm) ) 207 ALLOCATE( ig_l2gt(dfftt%ngm) ) 208 ! 209 CALL ggen( dfftt, gamma_only, at, bg, gcutmt, ngmt_g, ngmt, & 210 gt, ggt, millt, ig_l2gt, gstart_t ) 211 ! 212 DEALLOCATE( ig_l2gt ) 213 DEALLOCATE( millt ) 214 npwt = n_plane_waves( ecutwfc/tpiba2, nks, xk, gt, ngmt ) 215 ! 216 ENDIF 217 ! define clock labels (this enables the corresponding fft too) 218 dfftt%rho_clock_label = 'fftc' ; dfftt%wave_clock_label = 'fftcw' 219 ! 220 WRITE( stdout, '(/5x,"EXX grid: ",i8," G-vectors", 5x, & 221 & "FFT dimensions: (",i4,",",i4,",",i4,")")') ngmt_g, & 222 & dfftt%nr1, dfftt%nr2, dfftt%nr3 223 ! 224 exx_fft_initialized = .TRUE. 225 ! 226 IF (tqr) THEN 227 IF (ecutfock == ecutrho) THEN 228 WRITE( stdout, '(5x,"Real-space augmentation: EXX grid -> DENSE grid")' ) 229 tabxx => tabp 230 ELSE 231 WRITE( stdout, '(5x,"Real-space augmentation: initializing EXX grid")' ) 232 CALL qpointlist( dfftt, tabxx ) 233 ENDIF 234 ENDIF 235 ! 236 RETURN 237 ! 238 END SUBROUTINE exx_fft_create 239 ! 240 ! 241 !------------------------------------------------------------------------ 242 SUBROUTINE exx_gvec_reinit( at_old ) 243 !---------------------------------------------------------------------- 244 !! Re-initialize g-vectors after rescaling. 245 ! 246 USE cell_base, ONLY : bg 247 ! 248 IMPLICIT NONE 249 ! 250 REAL(DP), INTENT(IN) :: at_old(3,3) 251 !! the lattice vectors at the previous step 252 ! 253 ! ... local variables 254 ! 255 INTEGER :: ig 256 REAL(DP) :: gx, gy, gz 257 ! 258 ! ... rescale g-vectors 259 ! 260 CALL cryst_to_cart( dfftt%ngm, gt, at_old, -1 ) 261 CALL cryst_to_cart( dfftt%ngm, gt, bg, +1 ) 262 ! 263 DO ig = 1, dfftt%ngm 264 gx = gt(1,ig) 265 gy = gt(2,ig) 266 gz = gt(3,ig) 267 ggt(ig) = gx*gx + gy*gy + gz*gz 268 ENDDO 269 ! 270 END SUBROUTINE exx_gvec_reinit 271 ! 272 ! 273 !------------------------------------------------------------------------ 274 SUBROUTINE deallocate_exx() 275 !------------------------------------------------------------------------ 276 !! Deallocates exx objects. 277 ! 278 USE becmod, ONLY : deallocate_bec_type, is_allocated_bec_type, bec_type 279 USE us_exx, ONLY : becxx 280 USE exx_base, ONLY : xkq_collect, index_xkq, index_xk, index_sym, rir, & 281 working_pool, exx_grid_initialized 282 ! 283 IMPLICIT NONE 284 ! 285 INTEGER :: ikq 286 ! 287 exx_grid_initialized = .FALSE. 288 ! 289 IF ( ALLOCATED(index_xkq) ) DEALLOCATE( index_xkq ) 290 IF ( ALLOCATED(index_xk ) ) DEALLOCATE( index_xk ) 291 IF ( ALLOCATED(index_sym) ) DEALLOCATE( index_sym ) 292 IF ( ALLOCATED(rir) ) DEALLOCATE( rir ) 293 IF ( ALLOCATED(x_occupation) ) DEALLOCATE( x_occupation ) 294 IF ( ALLOCATED(xkq_collect ) ) DEALLOCATE( xkq_collect ) 295 IF ( ALLOCATED(exxbuff) ) DEALLOCATE( exxbuff ) 296 IF ( ALLOCATED(locbuff) ) DEALLOCATE( locbuff ) 297 IF ( ALLOCATED(locmat) ) DEALLOCATE( locmat ) 298 IF ( ALLOCATED(exxmat) ) DEALLOCATE( exxmat ) 299 IF ( ALLOCATED(xi) ) DEALLOCATE( xi ) 300 IF ( ALLOCATED(evc0) ) DEALLOCATE( evc0 ) 301 ! 302 IF ( ALLOCATED(becxx) ) THEN 303 DO ikq = 1, SIZE(becxx) 304 IF (is_allocated_bec_type(becxx(ikq))) CALL deallocate_bec_type( becxx(ikq) ) 305 ENDDO 306 ! 307 DEALLOCATE( becxx ) 308 ENDIF 309 ! 310 IF ( ALLOCATED(working_pool) ) DEALLOCATE( working_pool ) 311 ! 312 exx_fft_initialized = .FALSE. 313 IF ( ASSOCIATED(gt) ) DEALLOCATE( gt ) 314 IF ( ASSOCIATED(ggt) ) DEALLOCATE( ggt ) 315 ! 316 END SUBROUTINE deallocate_exx 317 ! 318 ! 319 !------------------------------------------------------------------------ 320 SUBROUTINE exxinit( DoLoc ) 321 !------------------------------------------------------------------------ 322 !! This subroutine is run before the first H_psi() of each iteration. 323 !! It saves the wavefunctions for the right density matrix, in real space. 324 ! 325 USE wavefunctions, ONLY : evc 326 USE io_files, ONLY : nwordwfc, iunwfc_exx 327 USE buffers, ONLY : get_buffer 328 USE wvfct, ONLY : nbnd, npwx, wg, current_k 329 USE klist, ONLY : ngk, nks, nkstot, xk, wk, igk_k 330 USE symm_base, ONLY : nsym, s, sr 331 USE mp_pools, ONLY : npool, nproc_pool, me_pool, inter_pool_comm 332 USE mp_exx, ONLY : me_egrp, negrp, init_index_over_band, & 333 my_egrp_id, inter_egrp_comm, & 334 intra_egrp_comm, iexx_start, iexx_end, & 335 all_start, all_end 336 USE mp, ONLY : mp_sum, mp_bcast 337 USE funct, ONLY : get_exx_fraction, start_exx,exx_is_active, & 338 get_screening_parameter, get_gau_parameter 339 USE scatter_mod, ONLY : gather_grid, scatter_grid 340 USE fft_interfaces, ONLY : invfft 341 USE uspp, ONLY : nkb, vkb, okvan 342 USE us_exx, ONLY : rotate_becxx 343 USE paw_variables, ONLY : okpaw 344 USE paw_exx, ONLY : PAW_init_fock_kernel 345 USE mp_orthopools, ONLY : intra_orthopool_comm 346 USE exx_base, ONLY : nkqs, xkq_collect, index_xk, index_sym, & 347 exx_set_symm, rir, working_pool, exxdiv, & 348 erfc_scrlen, gau_scrlen, exx_divergence 349 USE exx_band, ONLY : change_data_structure, nwordwfc_exx, & 350 transform_evc_to_exx, igk_exx, evc_exx 351 ! 352 IMPLICIT NONE 353 ! 354 LOGICAL :: DoLoc 355 !! TRUE: Real Array locbuff(ir, nbnd, nkqs); 356 !! FALSE: Complex Array exxbuff(ir, nbnd/2, nkqs). 357 ! 358 ! ... local variables 359 ! 360 INTEGER :: ik, ibnd, i, j, k, ir, isym, ikq, ig 361 INTEGER :: ibnd_loop_start 362 INTEGER :: ipol, jpol 363 REAL(DP), ALLOCATABLE :: occ(:,:) 364 COMPLEX(DP),ALLOCATABLE :: temppsic(:) 365#if defined(__USE_INTEL_HBM_DIRECTIVES) 366!DIR$ ATTRIBUTES FASTMEM :: temppsic 367#elif defined(__USE_CRAY_HBM_DIRECTIVES) 368!DIR$ memory(bandwidth) temppsic 369#endif 370 COMPLEX(DP),ALLOCATABLE :: temppsic_nc(:,:), psic_nc(:,:) 371 COMPLEX(DP),ALLOCATABLE :: psic_exx(:) 372 INTEGER :: nxxs, nrxxs 373#if defined(__MPI) 374 COMPLEX(DP),ALLOCATABLE :: temppsic_all(:), psic_all(:) 375 COMPLEX(DP), ALLOCATABLE :: temppsic_all_nc(:,:), psic_all_nc(:,:) 376#endif 377 COMPLEX(DP) :: d_spin(2,2,48) 378 INTEGER :: npw, current_ik 379 INTEGER, EXTERNAL :: global_kpoint_index 380 INTEGER :: ibnd_start_new, ibnd_end_new, max_buff_bands_per_egrp 381 INTEGER :: ibnd_exx, evc_offset 382 ! 383 CALL start_clock ('exxinit') 384 IF ( Doloc ) THEN 385 WRITE(stdout,'(/,5X,"Using localization algorithm with threshold: ",& 386 & D10.2)') local_thr 387 ! IF (.NOT.gamma_only) CALL errore('exxinit','SCDM with K-points NYI',1) 388 IF (okvan .OR. okpaw) CALL errore( 'exxinit','SCDM with USPP/PAW not & 389 &implemented', 1 ) 390 ENDIF 391 IF ( use_ace ) & 392 WRITE(stdout,'(/,5X,"Using ACE for calculation of exact exchange")') 393 ! 394 CALL transform_evc_to_exx( 2 ) 395 ! 396 ! ... prepare the symmetry matrices for the spin part 397 ! 398 IF (noncolin) THEN 399 DO isym = 1, nsym 400 CALL find_u( sr(:,:,isym), d_spin(:,:,isym) ) 401 ENDDO 402 ENDIF 403 ! 404 CALL exx_fft_create() 405 ! 406 ! Note that nxxs is not the same as nrxxs in parallel case 407 nxxs = dfftt%nr1x * dfftt%nr2x * dfftt%nr3x 408 nrxxs = dfftt%nnr 409#if defined(__MPI) 410 IF (noncolin) THEN 411 ALLOCATE( psic_all_nc(nxxs,npol), temppsic_all_nc(nxxs,npol) ) 412 ELSEIF ( .NOT. gamma_only ) THEN 413 ALLOCATE( psic_all(nxxs), temppsic_all(nxxs) ) 414 ENDIF 415#endif 416 IF (noncolin) THEN 417 ALLOCATE( temppsic_nc(nrxxs, npol), psic_nc(nrxxs, npol) ) 418 ELSEIF ( .NOT. gamma_only ) THEN 419 ALLOCATE( temppsic(nrxxs) ) 420 ENDIF 421 ! 422 ALLOCATE( psic_exx(nrxxs) ) 423 ! 424 IF (.NOT.exx_is_active()) THEN 425 ! 426 erfc_scrlen = get_screening_parameter() 427 gau_scrlen = get_gau_parameter() 428 exxdiv = exx_divergence() 429 exxalfa = get_exx_fraction() 430 ! 431 CALL start_exx() 432 ENDIF 433 ! 434 IF (.NOT. gamma_only) CALL exx_set_symm( dfftt%nr1, dfftt%nr2, dfftt%nr3, & 435 dfftt%nr1x, dfftt%nr2x, dfftt%nr3x ) 436 ! set occupations of wavefunctions used in the calculation of exchange term 437 IF (.NOT. ALLOCATED(x_occupation)) ALLOCATE( x_occupation(nbnd,nkstot) ) 438 ALLOCATE( occ(nbnd,nks) ) 439 ! 440 DO ik = 1, nks 441 IF (ABS(wk(ik)) > eps_occ) THEN 442 occ(1:nbnd,ik) = wg(1:nbnd,ik) / wk(ik) 443 ELSE 444 occ(1:nbnd,ik) = 0._DP 445 ENDIF 446 ENDDO 447 ! 448 CALL poolcollect( nbnd, nks, occ, nkstot, x_occupation ) 449 ! 450 DEALLOCATE( occ ) 451 ! 452 ! ... find an upper bound to the number of bands with non zero occupation. 453 ! Useful to distribute bands among band groups 454 ! 455 x_nbnd_occ = 0 456 DO ik = 1, nkstot 457 DO ibnd = MAX(1,x_nbnd_occ), nbnd 458 IF (ABS(x_occupation(ibnd,ik)) > eps_occ) x_nbnd_occ = ibnd 459 ENDDO 460 ENDDO 461 ! 462 IF (nbndproj == 0) nbndproj = nbnd 463 ! 464 CALL divide( inter_egrp_comm, x_nbnd_occ, ibnd_start, ibnd_end ) 465 CALL init_index_over_band( inter_egrp_comm, nbnd, nbnd ) 466 ! 467 ! ... this will cause exxbuff to be calculated for every band 468 ibnd_start_new = iexx_start 469 ibnd_end_new = iexx_end 470 ! 471 IF ( gamma_only ) THEN 472 ibnd_buff_start = (ibnd_start_new+1)/2 473 ibnd_buff_end = (ibnd_end_new+1)/2 474 max_buff_bands_per_egrp = MAXVAL((all_end(:)+1)/2-(all_start(:)+1)/2)+1 475 ELSE 476 ibnd_buff_start = ibnd_start_new 477 ibnd_buff_end = ibnd_end_new 478 max_buff_bands_per_egrp = MAXVAL(all_end(:)-all_start(:))+1 479 ENDIF 480 ! 481 IF (DoLoc) THEN 482 ! 483 IF (gamma_only) THEN 484 IF (.NOT. ALLOCATED(locbuff)) ALLOCATE( locbuff(nrxxs*npol,nbnd,nks) ) 485 IF (.NOT. ALLOCATED(locmat)) ALLOCATE( locmat(nbnd,nbnd,nks) ) 486 locbuff = 0.0d0 487 locmat = 0.0d0 488 ELSE 489 IF (.NOT. ALLOCATED(exxbuff)) ALLOCATE( exxbuff(nrxxs*npol,nbnd,nkqs) ) 490 IF (.NOT. ALLOCATED(exxmat) ) ALLOCATE( exxmat(nbnd,nkqs,nbnd,nks) ) 491 exxbuff = (0.0d0, 0.0d0) 492 exxmat = 0.0d0 493 ENDIF 494 ! 495 IF (.NOT. ALLOCATED(evc0)) then 496 ALLOCATE( evc0(npwx*npol,nbndproj,nks) ) 497 evc0 = (0.0d0,0.0d0) 498 ENDIF 499 ! 500 ELSE 501 ! 502 IF (.NOT. ALLOCATED(exxbuff)) THEN 503 IF (gamma_only) THEN 504 ALLOCATE( exxbuff(nrxxs*npol,ibnd_buff_start:ibnd_buff_start + & 505 max_buff_bands_per_egrp-1,nkqs) ) ! THIS WORKS as for k 506 ELSE 507 ALLOCATE( exxbuff(nrxxs*npol,ibnd_buff_start:ibnd_buff_start + & 508 max_buff_bands_per_egrp-1,nkqs) ) 509 ENDIF 510 ENDIF 511 ENDIF 512 ! 513 !assign buffer 514 IF(DoLoc) THEN 515 IF(gamma_only) THEN 516!$omp parallel do collapse(3) default(shared) firstprivate(npol,nrxxs,nkqs, & 517!$omp ibnd_buff_start,ibnd_buff_end) private(ir,ibnd,ikq,ipol) 518 DO ikq=1,SIZE(locbuff,3) 519 DO ibnd=1, x_nbnd_occ 520 DO ir=1,nrxxs*npol 521 locbuff(ir,ibnd,ikq)=0.0_DP 522 ENDDO 523 ENDDO 524 ENDDO 525 ENDIF 526 ELSE 527!$omp parallel do collapse(3) default(shared) firstprivate(npol,nrxxs,nkqs, & 528!$omp ibnd_buff_start,ibnd_buff_end) private(ir,ibnd,ikq,ipol) 529 DO ikq = 1, SIZE(exxbuff,3) 530 DO ibnd = ibnd_buff_start, ibnd_buff_end 531 DO ir = 1, nrxxs*npol 532 exxbuff(ir,ibnd,ikq) = (0.0_DP,0.0_DP) 533 ENDDO 534 ENDDO 535 ENDDO 536 ! the above loops will replaced with the following line soon 537 !CALL threaded_memset(exxbuff, 0.0_DP, nrxxs*npol*SIZE(exxbuff,2)*nkqs*2) 538 ENDIF 539 ! 540 ! ... This is parallelized over pools. Each pool computes only its k-points 541 ! 542 KPOINTS_LOOP : & 543 DO ik = 1, nks 544 ! 545 IF ( nks > 1 ) CALL get_buffer( evc_exx, nwordwfc_exx, iunwfc_exx, ik ) 546 ! 547 ! ik = index of k-point in this pool 548 ! current_ik = index of k-point over all pools 549 ! 550 current_ik = global_kpoint_index( nkstot, ik ) 551 ! 552 IF_GAMMA_ONLY : & 553 IF (gamma_only) THEN 554 ! 555 IF (MOD(iexx_start,2) == 0) THEN 556 ibnd_loop_start = iexx_start-1 557 ELSE 558 ibnd_loop_start = iexx_start 559 ENDIF 560 ! 561 evc_offset = 0 562 DO ibnd = ibnd_loop_start, iexx_end, 2 563 ! 564 psic_exx(:) = ( 0._DP, 0._DP ) 565 ! 566 IF ( ibnd < iexx_end ) THEN 567 IF ( ibnd == ibnd_loop_start .AND. MOD(iexx_start,2) == 0 ) THEN 568 DO ig = 1, npwt 569 psic_exx(dfftt%nl(ig)) = ( 0._DP, 1._DP )*evc_exx(ig,1) 570 psic_exx(dfftt%nlm(ig)) = ( 0._DP, 1._DP )*CONJG(evc_exx(ig,1)) 571 ENDDO 572 evc_offset = -1 573 ELSE 574 DO ig = 1, npwt 575 psic_exx(dfftt%nl(ig)) = evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+1) & 576 + ( 0._DP, 1._DP ) * evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+2) 577 psic_exx(dfftt%nlm(ig)) = CONJG( evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+1) ) & 578 + ( 0._DP, 1._DP ) * CONJG( evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+2) ) 579 ENDDO 580 ENDIF 581 ELSE 582 DO ig=1,npwt 583 psic_exx(dfftt%nl (ig)) = evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+1) 584 psic_exx(dfftt%nlm(ig)) = CONJG( evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+1) ) 585 ENDDO 586 ENDIF 587 ! 588 CALL invfft( 'Wave', psic_exx, dfftt ) 589 ! 590 IF (DoLoc) THEN 591 locbuff(1:nrxxs,ibnd-ibnd_loop_start+evc_offset+1,ik) = DBLE( psic_exx(1:nrxxs) ) 592 IF (ibnd-ibnd_loop_start+evc_offset+2 <= nbnd) & 593 locbuff(1:nrxxs,ibnd-ibnd_loop_start+evc_offset+2,ik) = AIMAG( psic_exx(1:nrxxs) ) 594 ELSE 595 exxbuff(1:nrxxs,(ibnd+1)/2,current_ik)=psic_exx(1:nrxxs) 596 ENDIF 597 ! 598 ENDDO 599 ! 600 ELSE IF_GAMMA_ONLY 601 ! 602 npw = ngk (ik) 603 IBND_LOOP_K : & 604 DO ibnd = iexx_start, iexx_end 605 ! 606 ibnd_exx = ibnd 607 IF (noncolin) THEN 608!$omp parallel do default(shared) private(ir) firstprivate(nrxxs) 609 DO ir = 1, nrxxs 610 temppsic_nc(ir,1) = ( 0._DP, 0._DP ) 611 temppsic_nc(ir,2) = ( 0._DP, 0._DP ) 612 ENDDO 613!$omp parallel do default(shared) private(ig) firstprivate(npw,ik,ibnd_exx) 614 DO ig = 1, npw 615 temppsic_nc(dfftt%nl(igk_exx(ig,ik)),1) = evc_exx(ig,ibnd-iexx_start+1) 616 ENDDO 617!$omp end parallel do 618 CALL invfft( 'Wave', temppsic_nc(:,1), dfftt ) 619!$omp parallel do default(shared) private(ig) firstprivate(npw,ik,ibnd_exx,npwx) 620 DO ig = 1, npw 621 temppsic_nc(dfftt%nl(igk_exx(ig,ik)),2) = evc_exx(ig+npwx,ibnd-iexx_start+1) 622 ENDDO 623!$omp end parallel do 624 CALL invfft( 'Wave', temppsic_nc(:,2), dfftt ) 625 ELSE 626!$omp parallel do default(shared) private(ir) firstprivate(nrxxs) 627 DO ir = 1, nrxxs 628 temppsic(ir) = ( 0._DP, 0._DP ) 629 ENDDO 630!$omp parallel do default(shared) private(ig) firstprivate(npw,ik,ibnd_exx) 631 DO ig = 1, npw 632 temppsic(dfftt%nl(igk_exx(ig,ik))) = evc_exx(ig,ibnd-iexx_start+1) 633 ENDDO 634!$omp end parallel do 635 CALL invfft( 'Wave', temppsic, dfftt ) 636 ENDIF 637 ! 638 DO ikq = 1, nkqs 639 ! 640 IF (index_xk(ikq) /= current_ik) CYCLE 641 isym = ABS(index_sym(ikq) ) 642 ! 643 IF (noncolin) THEN ! noncolinear 644#if defined(__MPI) 645 DO ipol = 1, npol 646 CALL gather_grid( dfftt, temppsic_nc(:,ipol), temppsic_all_nc(:,ipol) ) 647 ENDDO 648 ! 649 IF ( me_egrp == 0 ) THEN 650!$omp parallel do default(shared) private(ir) firstprivate(npol,nxxs) 651 DO ir = 1, nxxs 652 !DIR$ UNROLL_AND_JAM (2) 653 DO ipol = 1, npol 654 psic_all_nc(ir,ipol) = (0.0_DP, 0.0_DP) 655 ENDDO 656 ENDDO 657!$omp end parallel do 658!$omp parallel do default(shared) private(ir) firstprivate(npol,isym,nxxs) reduction(+:psic_all_nc) 659 DO ir = 1, nxxs 660 !DIR$ UNROLL_AND_JAM (4) 661 DO ipol = 1, npol 662 DO jpol = 1, npol 663 psic_all_nc(ir,ipol) = psic_all_nc(ir,ipol) + & 664 CONJG(d_spin(jpol,ipol,isym)) * & 665 temppsic_all_nc(rir(ir,isym),jpol) 666 ENDDO 667 ENDDO 668 ENDDO 669!$omp end parallel do 670 ENDIF 671 ! 672 DO ipol = 1, npol 673 CALL scatter_grid( dfftt, psic_all_nc(:,ipol), psic_nc(:,ipol) ) 674 ENDDO 675#else 676!$omp parallel do default(shared) private(ir) firstprivate(npol,nxxs) 677 DO ir = 1, nxxs 678 !DIR$ UNROLL_AND_JAM (2) 679 DO ipol = 1, npol 680 psic_nc(ir,ipol) = (0._DP,0._DP) 681 ENDDO 682 ENDDO 683!$omp end parallel do 684!$omp parallel do default(shared) private(ipol,jpol,ir) firstprivate(npol,isym,nxxs) reduction(+:psic_nc) 685 DO ir = 1, nxxs 686 !DIR$ UNROLL_AND_JAM (4) 687 DO ipol = 1, npol 688 DO jpol = 1, npol 689 psic_nc(ir,ipol) = psic_nc(ir,ipol) + CONJG(d_spin(jpol,ipol,isym))* & 690 temppsic_nc(rir(ir,isym),jpol) 691 ENDDO 692 ENDDO 693 ENDDO 694!$omp end parallel do 695#endif 696 IF (index_sym(ikq) > 0 ) THEN 697 ! sym. op. without time reversal: normal case 698!$omp parallel do default(shared) private(ir) firstprivate(ibnd,isym,ikq) 699 DO ir = 1, nrxxs 700 exxbuff(ir,ibnd,ikq) = psic_nc(ir,1) 701 exxbuff(ir+nrxxs,ibnd,ikq) = psic_nc(ir,2) 702 ENDDO 703!$omp end parallel do 704 ELSE 705 ! sym. op. with time reversal: spin 1->2*, 2->-1* 706!$omp parallel do default(shared) private(ir) firstprivate(ibnd,isym,ikq) 707 DO ir = 1, nrxxs 708 exxbuff(ir,ibnd,ikq) = CONJG(psic_nc(ir,2)) 709 exxbuff(ir+nrxxs,ibnd,ikq) = -CONJG(psic_nc(ir,1)) 710 ENDDO 711!$omp end parallel do 712 ENDIF 713 ELSE ! noncolinear 714#if defined(__MPI) 715 CALL gather_grid( dfftt, temppsic, temppsic_all ) 716 IF ( me_egrp == 0 ) THEN 717!$omp parallel do default(shared) private(ir) firstprivate(isym) 718 DO ir = 1, nxxs 719 psic_all(ir) = temppsic_all(rir(ir,isym)) 720 ENDDO 721!$omp end parallel do 722 ENDIF 723 CALL scatter_grid( dfftt, psic_all, psic_exx ) 724#else 725!$omp parallel do default(shared) private(ir) firstprivate(isym) 726 DO ir = 1, nrxxs 727 psic_exx(ir) = temppsic(rir(ir,isym)) 728 ENDDO 729!$omp end parallel do 730#endif 731!$omp parallel do default(shared) private(ir) firstprivate(isym,ibnd,ikq) 732 DO ir = 1, nrxxs 733 IF (index_sym(ikq) < 0 ) THEN 734 psic_exx(ir) = CONJG(psic_exx(ir)) 735 ENDIF 736 exxbuff(ir,ibnd,ikq) = psic_exx(ir) 737 ENDDO 738!$omp end parallel do 739 ! 740 ENDIF ! noncolinear 741 ! 742 ENDDO 743 ! 744 ENDDO& 745 IBND_LOOP_K 746 ! 747 ENDIF& 748 IF_GAMMA_ONLY 749 ENDDO& 750 KPOINTS_LOOP 751 ! 752 DEALLOCATE( psic_exx ) 753 IF (noncolin) THEN 754 DEALLOCATE( temppsic_nc, psic_nc ) 755#if defined(__MPI) 756 DEALLOCATE( temppsic_all_nc, psic_all_nc ) 757#endif 758 ELSE IF ( .NOT. gamma_only ) THEN 759 DEALLOCATE( temppsic ) 760#if defined(__MPI) 761 DEALLOCATE( temppsic_all, psic_all ) 762#endif 763 ENDIF 764 ! 765 ! Each wavefunction in exxbuff is computed by a single pool, collect among 766 ! pools in a smart way (i.e. without doing all-to-all sum and bcast) 767 ! See also the initialization of working_pool in exx_mp_init 768 ! Note that in Gamma-only LSDA can be parallelized over two pools, and there 769 ! is no need to communicate anything: each pools deals with its own spin 770 ! 771 IF ( .NOT. gamma_only ) THEN 772 DO ikq = 1, nkqs 773 CALL mp_bcast( exxbuff(:,:,ikq), working_pool(ikq), intra_orthopool_comm ) 774 ENDDO 775 ENDIF 776 ! 777 ! For US/PAW only: compute <beta_I|psi_j,k+q> for the entire 778 ! de-symmetrized k+q grid by rotating the ones from the irreducible wedge 779 ! 780 IF (okvan) CALL rotate_becxx( nkqs, index_xk, index_sym, xkq_collect ) 781 ! 782 ! Initialize 4-wavefunctions one-center Fock integrals 783 ! \int \psi_a(r)\phi_a(r)\phi_b(r')\psi_b(r')/|r-r'| 784 ! 785 IF (okpaw) CALL PAW_init_fock_kernel() 786 ! 787 CALL change_data_structure( .FALSE. ) 788 ! 789 CALL stop_clock( 'exxinit' ) 790 ! 791 END SUBROUTINE exxinit 792 ! 793 ! 794 !----------------------------------------------------------------------- 795 SUBROUTINE vexx( lda, n, m, psi, hpsi, becpsi ) 796 !----------------------------------------------------------------------- 797 !! Wrapper routine computing V_x\psi, V_x = exchange potential. 798 !! Calls generic version vexx_k or Gamma-specific one vexx_gamma. 799 ! 800 USE becmod, ONLY : bec_type 801 USE uspp, ONLY : okvan 802 USE paw_variables, ONLY : okpaw 803 USE us_exx, ONLY : becxx 804 USE mp_exx, ONLY : negrp, inter_egrp_comm, init_index_over_band 805 USE wvfct, ONLY : nbnd 806 USE exx_band, ONLY : transform_psi_to_exx, transform_hpsi_to_local, & 807 psi_exx, hpsi_exx, igk_exx 808 ! 809 IMPLICIT NONE 810 ! 811 INTEGER :: lda 812 !! input: leading dimension of arrays psi and hpsi 813 INTEGER :: n 814 !! input: true dimension of psi and hpsi 815 INTEGER :: m 816 !! input: number of states psi 817 COMPLEX(DP) :: psi(lda*npol,m) 818 !! input: m wavefunctions 819 COMPLEX(DP) :: hpsi(lda*npol,m) 820 !! output: V_x*psi 821 TYPE(bec_type), OPTIONAL :: becpsi 822 !! input: <beta|psi>, optional but needed for US and PAW case 823 ! 824 INTEGER :: i 825 ! 826 IF ((okvan.OR.okpaw) .AND. .NOT. PRESENT(becpsi)) & 827 CALL errore( 'vexx','becpsi needed for US/PAW case', 1 ) 828 CALL start_clock( 'vexx' ) 829 ! 830 IF (negrp > 1) THEN 831 CALL init_index_over_band( inter_egrp_comm, nbnd, m ) 832 ! 833 ! ... transform psi to the EXX data structure 834 CALL transform_psi_to_exx( lda, n, m, psi ) 835 ENDIF 836 ! 837 ! ... calculate the EXX contribution to hpsi 838 ! 839 IF ( gamma_only ) THEN 840 IF (negrp == 1)THEN 841 CALL vexx_gamma( lda, n, m, psi, hpsi, becpsi ) 842 ELSE 843 CALL vexx_gamma( lda, n, m, psi_exx, hpsi_exx, becpsi ) 844 ENDIF 845 ELSE 846 IF (negrp.eq.1)THEN 847 CALL vexx_k( lda, n, m, psi, hpsi, becpsi ) 848 ELSE 849 CALL vexx_k( lda, n, m, psi_exx, hpsi_exx, becpsi ) 850 ENDIF 851 ENDIF 852 ! 853 IF (negrp > 1) THEN 854 ! 855 ! ... transform hpsi to the local data structure 856 ! 857 CALL transform_hpsi_to_local(lda,n,m,hpsi) 858 ! 859 ENDIF 860 ! 861 CALL stop_clock( 'vexx' ) 862 ! 863 END SUBROUTINE vexx 864 ! 865 ! 866 !----------------------------------------------------------------------- 867 SUBROUTINE vexx_gamma( lda, n, m, psi, hpsi, becpsi ) 868 !----------------------------------------------------------------------- 869 !! Gamma-specific version of vexx. 870 ! 871 USE constants, ONLY : fpi, e2, pi 872 USE cell_base, ONLY : omega 873 USE gvect, ONLY : ngm, g 874 USE wvfct, ONLY : npwx, current_k, nbnd 875 USE klist, ONLY : xk, nks, nkstot, igk_k 876 USE fft_interfaces, ONLY : fwfft, invfft 877 USE becmod, ONLY : bec_type 878 USE mp_exx, ONLY : inter_egrp_comm, my_egrp_id, & 879 intra_egrp_comm, me_egrp, & 880 negrp, max_pairs, egrp_pairs, ibands, nibands, & 881 iexx_istart, iexx_iend, & 882 all_start, all_end, iexx_start, jblock 883 USE mp, ONLY : mp_sum, mp_barrier, mp_circular_shift_left 884 USE uspp, ONLY : nkb, okvan 885 USE paw_variables, ONLY : okpaw 886 USE us_exx, ONLY : bexg_merge, becxx, addusxx_g, addusxx_r, & 887 newdxx_g, newdxx_r, add_nlxx_pot, & 888 qvan_init, qvan_clean 889 USE paw_exx, ONLY : PAW_newdxx 890 USE exx_base, ONLY : nqs, index_xkq, index_xk, xkq_collect, & 891 coulomb_fac, g2_convolution_all 892 USE exx_band, ONLY : result_sum, igk_exx 893 ! 894 IMPLICIT NONE 895 ! 896 INTEGER :: lda 897 !! input: leading dimension of arrays psi and hpsi 898 INTEGER :: n 899 !! input: true dimension of psi and hpsi 900 INTEGER :: m 901 !! input: number of states psi 902 COMPLEX(DP) :: psi(lda*npol,m) 903 !! input: m wavefunctions 904 COMPLEX(DP) :: hpsi(lda*npol,m) 905 !! output: V_x*psi 906 TYPE(bec_type), OPTIONAL :: becpsi ! or call a calbec(...psi) instead 907 !! input: <beta|psi>, optional but needed for US and PAW case 908 ! 909 ! ... local variables 910 ! 911 COMPLEX(DP), ALLOCATABLE :: result(:,:) 912 REAL(DP), ALLOCATABLE :: temppsic_dble (:) 913 REAL(DP), ALLOCATABLE :: temppsic_aimag(:) 914 ! 915 COMPLEX(DP), ALLOCATABLE :: vc(:,:), deexx(:,:) 916 REAL(DP), ALLOCATABLE :: fac(:) 917 INTEGER :: ibnd, ik, im , ikq, iq, ipol 918 INTEGER :: ir, ig 919 INTEGER :: current_ik 920 INTEGER :: ibnd_loop_start 921 INTEGER :: nrxxs 922 REAL(DP) :: x1, x2, xkp(3) 923 REAL(DP) :: xkq(3) 924 INTEGER, EXTERNAL :: global_kpoint_index 925 INTEGER :: ialloc 926 COMPLEX(DP), ALLOCATABLE :: big_result(:,:) 927 INTEGER :: iproc, nproc_egrp, ii, ipair 928 INTEGER :: jbnd, jstart, jend 929 ! ... scratch space for fft of psi and rho 930 COMPLEX(DP), ALLOCATABLE :: psi_rhoc_work(:) 931 INTEGER :: jblock_start, jblock_end 932 INTEGER :: iegrp, wegrp 933 INTEGER :: exxbuff_index 934 INTEGER :: ending_im 935 ! 936 ialloc = nibands(my_egrp_id+1) 937 ! 938 ALLOCATE( fac(dfftt%ngm) ) 939 ! 940 nrxxs = dfftt%nnr 941 ! 942 !ALLOCATE( result(nrxxs), temppsic_DBLE(nrxxs), temppsic_aimag(nrxxs) ) 943 ALLOCATE( result(nrxxs,ialloc), temppsic_DBLE(nrxxs) ) 944 ALLOCATE( temppsic_aimag(nrxxs) ) 945 ALLOCATE( psi_rhoc_work(nrxxs) ) 946 ! 947 ALLOCATE( vc(nrxxs,ialloc) ) 948 IF (okvan) ALLOCATE( deexx(nkb,ialloc) ) 949 ! 950 current_ik = global_kpoint_index( nkstot, current_k ) 951 xkp = xk(:,current_k) 952 ! 953 ALLOCATE( big_result(n,m) ) 954 big_result = 0.0_DP 955 result = 0.0_DP 956 ! 957 DO ii = 1, nibands(my_egrp_id+1) 958 IF (okvan) deexx(:,ii) = 0.0_DP 959 ENDDO 960 ! 961 ! Here the loops start 962 ! 963 INTERNAL_LOOP_ON_Q : & 964 DO iq = 1, nqs 965 ! 966 ikq = index_xkq(current_ik,iq) 967 ik = index_xk(ikq) 968 xkq = xkq_collect(:,ikq) 969 ! 970 ! calculate the 1/|r-r'| (actually, k+q+g) factor and place it in fac 971 CALL g2_convolution_all( dfftt%ngm, gt, xkp, xkq, iq, current_k ) 972 IF ( okvan .AND..NOT.tqr ) CALL qvan_init( dfftt%ngm, xkq, xkp ) 973 ! 974 DO iegrp = 1, negrp 975 ! 976 ! compute the id of group whose data is currently worked on 977 wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 978 ! 979 jblock_start = all_start(wegrp) 980 jblock_end = all_end(wegrp) 981 ! 982 LOOP_ON_PSI_BANDS : & 983 DO ii = 1, nibands(my_egrp_id+1) 984 ! 985 ibnd = ibands(ii,my_egrp_id+1) 986 ! 987 IF (ibnd==0 .OR. ibnd>m) CYCLE 988 ! 989 IF (MOD(ii,2) == 1) THEN 990 ! 991 psi_rhoc_work = (0._DP,0._DP) 992 ! 993 IF ((ii+1) <= MIN(m,nibands(my_egrp_id+1))) THEN 994 ! deal with double bands 995!$omp parallel do default(shared), private(ig) 996 DO ig = 1, npwt 997 psi_rhoc_work( dfftt%nl(ig) ) = psi(ig, ii) + (0._DP,1._DP) * psi(ig, ii+1) 998 psi_rhoc_work( dfftt%nlm(ig) ) = CONJG(psi(ig, ii) - (0._DP,1._DP) * psi(ig, ii+1)) 999 ENDDO 1000!$omp end parallel do 1001 ENDIF 1002 ! 1003 IF ( ii == MIN(m,nibands(my_egrp_id+1)) ) THEN 1004 ! deal with a single last band 1005!$omp parallel do default(shared), private(ig) 1006 DO ig = 1, npwt 1007 psi_rhoc_work( dfftt%nl(ig) ) = psi(ig,ii) 1008 psi_rhoc_work( dfftt%nlm(ig) ) = CONJG(psi(ig,ii)) 1009 ENDDO 1010!$omp end parallel do 1011 ENDIF 1012 ! 1013 CALL invfft( 'Wave', psi_rhoc_work, dfftt ) 1014!$omp parallel do default(shared), private(ir) 1015 DO ir = 1, nrxxs 1016 temppsic_DBLE(ir) = DBLE( psi_rhoc_work(ir) ) 1017 temppsic_aimag(ir) = AIMAG( psi_rhoc_work(ir) ) 1018 ENDDO 1019!$omp end parallel do 1020 ! 1021 ENDIF 1022 ! 1023 ! 1024 !determine which j-bands to calculate 1025 jstart = 0 1026 jend = 0 1027 DO ipair = 1, max_pairs 1028 IF (egrp_pairs(1,ipair,my_egrp_id+1) == ibnd) THEN 1029 IF (jstart == 0)THEN 1030 jstart = egrp_pairs(2,ipair,my_egrp_id+1) 1031 jend = jstart 1032 ELSE 1033 jend = egrp_pairs(2,ipair,my_egrp_id+1) 1034 ENDIF 1035 ENDIF 1036 ENDDO 1037 ! 1038 jstart = MAX(jstart,jblock_start) 1039 jend = MIN(jend,jblock_end) 1040 ! 1041 IF (MOD(jstart,2) ==0 ) THEN 1042 ibnd_loop_start = jstart-1 1043 ELSE 1044 ibnd_loop_start = jstart 1045 ENDIF 1046 ! 1047 IBND_LOOP_GAM : & 1048 DO jbnd = ibnd_loop_start, jend, 2 !for each band of psi 1049 ! 1050 exxbuff_index = (jbnd+1)/2-(all_start(wegrp)+1)/2+(iexx_start+1)/2 1051 ! 1052 IF ( jbnd < jstart ) THEN 1053 x1 = 0.0_DP 1054 ELSE 1055 x1 = x_occupation(jbnd,ik) 1056 ENDIF 1057 IF ( jbnd == jend) THEN 1058 x2 = 0.0_DP 1059 ELSE 1060 x2 = x_occupation(jbnd+1,ik) 1061 ENDIF 1062 IF ( ABS(x1) < eps_occ .AND. ABS(x2) < eps_occ ) CYCLE 1063 ! 1064 ! ... calculate rho in real space. Gamma tricks are used. 1065 ! temppsic is real; tempphic contains one band in the real part, 1066 ! another one in the imaginary part; the same applies to rhoc 1067 ! 1068 IF ( MOD(ii,2) == 0 ) THEN 1069!$omp parallel do default(shared), private(ir) 1070 DO ir = 1, nrxxs 1071 psi_rhoc_work(ir) = exxbuff(ir,exxbuff_index,ikq) * temppsic_aimag(ir) / omega 1072 ENDDO 1073!$omp end parallel do 1074 ELSE 1075!$omp parallel do default(shared), private(ir) 1076 DO ir = 1, nrxxs 1077 psi_rhoc_work(ir) = exxbuff(ir,exxbuff_index,ikq) * temppsic_DBLE(ir) / omega 1078 ENDDO 1079!$omp end parallel do 1080 ENDIF 1081 ! 1082 ! ... bring rho to G-space 1083 ! 1084 ! >>> add augmentation in REAL SPACE here 1085 IF (okvan .AND. tqr) THEN 1086 IF (jbnd >= jstart) & 1087 CALL addusxx_r( psi_rhoc_work, & 1088 _CX(becxx(ikq)%r(:,jbnd)), _CX(becpsi%r(:,ibnd))) 1089 IF (jbnd < jend) & 1090 CALL addusxx_r( psi_rhoc_work, & 1091 _CY(becxx(ikq)%r(:,jbnd+1)),_CX(becpsi%r(:,ibnd))) 1092 ENDIF 1093 ! 1094 CALL fwfft( 'Rho', psi_rhoc_work, dfftt ) 1095 ! >>>> add augmentation in G SPACE here 1096 IF (okvan .AND. .NOT. tqr) THEN 1097 ! ... contribution from one band added to real (in real space) part of rhoc 1098 IF (jbnd >= jstart) & 1099 CALL addusxx_g( dfftt, psi_rhoc_work, xkq, xkp, 'r', & 1100 becphi_r=becxx(ikq)%r(:,jbnd), becpsi_r=becpsi%r(:,ibnd) ) 1101 ! ... contribution from following band added to imaginary (in real space) part of rhoc 1102 IF (jbnd < jend) & 1103 CALL addusxx_g( dfftt, psi_rhoc_work, xkq, xkp, 'i', & 1104 becphi_r=becxx(ikq)%r(:,jbnd+1), becpsi_r=becpsi%r(:,ibnd) ) 1105 ENDIF 1106 ! >>>> charge density done 1107 ! 1108 vc(:,ii) = 0._DP 1109 ! 1110!$omp parallel do default(shared), private(ig) 1111 DO ig = 1, dfftt%ngm 1112 ! 1113 vc(dfftt%nl(ig),ii) = coulomb_fac(ig,iq,current_k) * psi_rhoc_work(dfftt%nl(ig)) 1114 vc(dfftt%nlm(ig),ii) = coulomb_fac(ig,iq,current_k) * psi_rhoc_work(dfftt%nlm(ig)) 1115 ! 1116 ENDDO 1117!$omp end parallel do 1118 ! 1119 ! >>>> compute <psi|H_fock G SPACE here 1120 IF (okvan .AND. .NOT. tqr) THEN 1121 IF (jbnd >= jstart) & 1122 CALL newdxx_g( dfftt, vc(:,ii), xkq, xkp, 'r', deexx(:,ii), & 1123 becphi_r=x1*becxx(ikq)%r(:,jbnd) ) 1124 IF (jbnd<jend) & 1125 CALL newdxx_g( dfftt, vc(:,ii), xkq, xkp, 'i', deexx(:,ii), & 1126 becphi_r=x2*becxx(ikq)%r(:,jbnd+1) ) 1127 ENDIF 1128 ! 1129 !brings back v in real space 1130 CALL invfft( 'Rho', vc(:,ii), dfftt ) 1131 ! 1132 ! >>>> compute <psi|H_fock REAL SPACE here 1133 IF (okvan .AND. tqr) THEN 1134 IF (jbnd >= jstart) & 1135 CALL newdxx_r( dfftt,vc(:,ii), _CX(x1*becxx(ikq)%r(:,jbnd)), deexx(:,ii) ) 1136 IF (jbnd < jend) & 1137 CALL newdxx_r( dfftt,vc(:,ii), _CY(x2*becxx(ikq)%r(:,jbnd+1)), deexx(:,ii) ) 1138 ENDIF 1139 ! 1140 IF (okpaw) THEN 1141 IF (jbnd >= jstart) & 1142 CALL PAW_newdxx( x1/nqs, _CX(becxx(ikq)%r(:,jbnd)),& 1143 _CX(becpsi%r(:,ibnd)), deexx(:,ii) ) 1144 IF (jbnd < jend) & 1145 CALL PAW_newdxx( x2/nqs, _CX(becxx(ikq)%r(:,jbnd+1)),& 1146 _CX(becpsi%r(:,ibnd)), deexx(:,ii) ) 1147 ENDIF 1148 ! 1149 ! ... accumulates over bands and k points 1150 ! 1151!$omp parallel do default(shared), private(ir) 1152 DO ir = 1, nrxxs 1153 result(ir,ii) = result(ir,ii) & 1154 + x1* DBLE(vc(ir,ii))* DBLE(exxbuff(ir,exxbuff_index,ikq)) & 1155 + x2*AIMAG(vc(ir,ii))*AIMAG(exxbuff(ir,exxbuff_index,ikq)) 1156 ENDDO 1157!$omp end parallel do 1158 ! 1159 ENDDO & 1160 IBND_LOOP_GAM 1161 ! 1162 ENDDO & 1163 LOOP_ON_PSI_BANDS 1164 ! 1165 ! get the next nbnd/negrp data 1166 IF (negrp > 1) CALL mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, inter_egrp_comm ) 1167 ! 1168 ENDDO ! iegrp 1169 IF (okvan .AND. .NOT.tqr) CALL qvan_clean() 1170 ENDDO & 1171 INTERNAL_LOOP_ON_Q 1172 ! 1173 DO ii = 1, nibands(my_egrp_id+1) 1174 ! 1175 ibnd = ibands(ii,my_egrp_id+1) 1176 ! 1177 IF (ibnd == 0 .OR. ibnd>m) CYCLE 1178 ! 1179 IF (okvan) THEN 1180 CALL mp_sum( deexx(:,ii), intra_egrp_comm ) 1181 ENDIF 1182 ! 1183 ! ... brings back result in G-space 1184 ! 1185 CALL fwfft( 'Wave' , result(:,ii), dfftt ) 1186 ! 1187 ! ... communicate result 1188 DO ig = 1, n 1189 big_result(ig,ibnd) = big_result(ig,ibnd) - exxalfa*result(dfftt%nl(igk_exx(ig,current_k)),ii) 1190 ENDDO 1191 ! 1192 ! ... add non-local \sum_I |beta_I> \alpha_Ii (the sum on i is outside) 1193 IF (okvan) CALL add_nlxx_pot( lda, big_result(:,ibnd), xkp, n, & 1194 igk_exx(1,current_k), deexx(:,ii), eps_occ, exxalfa ) 1195 ENDDO 1196 ! 1197 CALL result_sum( n*npol, m, big_result ) 1198 ! 1199 IF (iexx_istart(my_egrp_id+1) > 0) THEN 1200 IF (negrp == 1) THEN 1201 ending_im = m 1202 ELSE 1203 ending_im = iexx_iend(my_egrp_id+1) - iexx_istart(my_egrp_id+1) + 1 1204 END IF 1205 DO im = 1, ending_im 1206!$omp parallel do default(shared), private(ig) firstprivate(im,n) 1207 DO ig = 1, n 1208 hpsi(ig,im) = hpsi(ig,im) + big_result(ig,im+iexx_istart(my_egrp_id+1)-1) 1209 ENDDO 1210!$omp end parallel do 1211 ENDDO 1212 ENDIF 1213 ! 1214 DEALLOCATE( big_result ) 1215 DEALLOCATE( result, temppsic_dble, temppsic_aimag ) 1216 DEALLOCATE( psi_rhoc_work ) 1217 DEALLOCATE( vc, fac ) 1218 IF (okvan) DEALLOCATE( deexx ) 1219 ! 1220 END SUBROUTINE vexx_gamma 1221 ! 1222 ! 1223 !----------------------------------------------------------------------- 1224 SUBROUTINE vexx_k( lda, n, m, psi, hpsi, becpsi ) 1225 !----------------------------------------------------------------------- 1226 !! Generic, k-point version of vexx. 1227 ! 1228 USE constants, ONLY : fpi, e2, pi 1229 USE cell_base, ONLY : omega 1230 USE gvect, ONLY : ngm, g 1231 USE wvfct, ONLY : npwx, current_k, nbnd 1232 USE klist, ONLY : xk, nks, nkstot 1233 USE fft_interfaces, ONLY : fwfft, invfft 1234 USE becmod, ONLY : bec_type 1235 USE mp_exx, ONLY : inter_egrp_comm, my_egrp_id, negrp, & 1236 intra_egrp_comm, me_egrp, & 1237 max_pairs, egrp_pairs, ibands, nibands, & 1238 max_ibands, iexx_istart, iexx_iend, & 1239 all_start, all_end, iexx_start, jblock 1240 USE mp, ONLY : mp_sum, mp_barrier, mp_circular_shift_left 1241 USE uspp, ONLY : nkb, okvan 1242 USE paw_variables, ONLY : okpaw 1243 USE us_exx, ONLY : bexg_merge, becxx, addusxx_g, addusxx_r, & 1244 newdxx_g, newdxx_r, add_nlxx_pot, & 1245 qvan_init, qvan_clean 1246 USE paw_exx, ONLY : PAW_newdxx 1247 USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, & 1248 coulomb_fac, g2_convolution_all 1249 USE exx_band, ONLY : result_sum, igk_exx 1250 USE io_global, ONLY : stdout 1251 ! 1252 ! 1253 IMPLICIT NONE 1254 ! 1255 INTEGER :: lda 1256 !! input: leading dimension of arrays psi and hpsi 1257 INTEGER :: n 1258 !! input: true dimension of psi and hpsi 1259 INTEGER :: m 1260 !! input: number of states psi 1261 COMPLEX(DP) :: psi(lda*npol,max_ibands) 1262 !! input: m wavefunctions 1263 COMPLEX(DP) :: hpsi(lda*npol,max_ibands) 1264 !! output: V_x*psi 1265 TYPE(bec_type), OPTIONAL :: becpsi ! or call a calbec(...psi) instead 1266 !! input: <beta|psi>, optional but needed for US and PAW case 1267 ! 1268 ! ... local variables 1269 ! 1270 COMPLEX(DP),ALLOCATABLE :: temppsic(:,:), result(:,:) 1271#if defined(__USE_INTEL_HBM_DIRECTIVES) 1272!DIR$ ATTRIBUTES FASTMEM :: result 1273#elif defined(__USE_CRAY_HBM_DIRECTIVES) 1274!DIR$ memory(bandwidth) result 1275#endif 1276 COMPLEX(DP),ALLOCATABLE :: temppsic_nc(:,:,:),result_nc(:,:,:) 1277 INTEGER :: request_send, request_recv 1278 ! 1279 COMPLEX(DP),ALLOCATABLE :: deexx(:,:) 1280 COMPLEX(DP),ALLOCATABLE,TARGET :: rhoc(:,:), vc(:,:) 1281#if defined(__USE_MANY_FFT) 1282 COMPLEX(DP),POINTER :: prhoc(:), pvc(:) 1283#endif 1284#if defined(__USE_INTEL_HBM_DIRECTIVES) 1285!DIR$ ATTRIBUTES FASTMEM :: rhoc, vc 1286#elif defined(__USE_CRAY_HBM_DIRECTIVES) 1287!DIR$ memory(bandwidth) rhoc, vc 1288#endif 1289 REAL(DP), ALLOCATABLE :: fac(:), facb(:) 1290 INTEGER :: ibnd, ik, im , ikq, iq, ipol 1291 INTEGER :: ir, ig, ir_start, ir_end 1292 INTEGER :: irt, nrt, nblock 1293 INTEGER :: current_ik 1294 INTEGER :: ibnd_loop_start 1295 INTEGER :: nrxxs 1296 REAL(DP) :: x1, x2, xkp(3), omega_inv, nqs_inv 1297 REAL(DP) :: xkq(3) 1298 INTEGER, EXTERNAL :: global_kpoint_index 1299 DOUBLE PRECISION :: max, tempx 1300 COMPLEX(DP), ALLOCATABLE :: big_result(:,:) 1301 INTEGER :: ir_out, ipair, jbnd 1302 INTEGER :: ii, jstart, jend, jcount, jind 1303 INTEGER :: ialloc, ending_im 1304 INTEGER :: ijt, njt, jblock_start, jblock_end 1305 INTEGER :: iegrp, wegrp 1306 ! 1307 ialloc = nibands(my_egrp_id+1) 1308 ! 1309 ALLOCATE( fac(dfftt%ngm) ) 1310 nrxxs= dfftt%nnr 1311 ALLOCATE( facb(nrxxs) ) 1312 ! 1313 IF (noncolin) THEN 1314 ALLOCATE( temppsic_nc(nrxxs,npol,ialloc), result_nc(nrxxs,npol,ialloc) ) 1315 ELSE 1316 ALLOCATE( temppsic(nrxxs,ialloc), result(nrxxs,ialloc) ) 1317 ENDIF 1318 ! 1319 IF (okvan) ALLOCATE( deexx(nkb,ialloc) ) 1320 ! 1321 current_ik = global_kpoint_index( nkstot, current_k ) 1322 xkp = xk(:,current_k) 1323 ! 1324 ALLOCATE( big_result(n*npol,m) ) 1325 big_result = 0.0_DP 1326 ! 1327 ! allocate arrays for rhoc and vc 1328 ALLOCATE( rhoc(nrxxs,jblock), vc(nrxxs,jblock) ) 1329#if defined(__USE_MANY_FFT) 1330 prhoc(1:nrxxs*jblock) => rhoc(:,:) 1331 pvc(1:nrxxs*jblock) => vc(:,:) 1332#endif 1333 ! 1334 DO ii = 1, nibands(my_egrp_id+1) 1335 ! 1336 ibnd = ibands(ii,my_egrp_id+1) 1337 ! 1338 IF (ibnd==0 .OR. ibnd>m) CYCLE 1339 IF (okvan) deexx(:,ii) = 0._DP 1340 ! 1341 IF (noncolin) THEN 1342 temppsic_nc(:,:,ii) = 0._DP 1343 ELSE 1344!$omp parallel do default(shared), private(ir) firstprivate(nrxxs) 1345 DO ir = 1, nrxxs 1346 temppsic(ir,ii) = 0._DP 1347 ENDDO 1348 ENDIF 1349 ! 1350 IF (noncolin) THEN 1351 ! 1352!$omp parallel do default(shared), private(ig) 1353 DO ig = 1, n 1354 temppsic_nc(dfftt%nl(igk_exx(ig,current_k)),1,ii) = psi(ig,ii) 1355 temppsic_nc(dfftt%nl(igk_exx(ig,current_k)),2,ii) = psi(npwx+ig,ii) 1356 ENDDO 1357!$omp end parallel do 1358 ! 1359 CALL invfft( 'Wave', temppsic_nc(:,1,ii), dfftt ) 1360 CALL invfft( 'Wave', temppsic_nc(:,2,ii), dfftt ) 1361 ! 1362 ELSE 1363 ! 1364!$omp parallel do default(shared), private(ig) 1365 DO ig = 1, n 1366 temppsic( dfftt%nl(igk_exx(ig,current_k)), ii ) = psi(ig,ii) 1367 ENDDO 1368!$omp end parallel do 1369 ! 1370 CALL invfft( 'Wave', temppsic(:,ii), dfftt ) 1371 ! 1372 ENDIF 1373 ! 1374 IF (noncolin) THEN 1375!$omp parallel do default(shared) firstprivate(nrxxs) private(ir) 1376 DO ir = 1, nrxxs 1377 result_nc(ir,1,ii) = 0.0_DP 1378 result_nc(ir,2,ii) = 0.0_DP 1379 ENDDO 1380 ELSE 1381!$omp parallel do default(shared) firstprivate(nrxxs) private(ir) 1382 DO ir = 1, nrxxs 1383 result(ir,ii) = 0.0_DP 1384 ENDDO 1385 ENDIF 1386 ! 1387 ENDDO 1388 ! 1389 !precompute these guys 1390 omega_inv = 1.0 / omega 1391 nqs_inv = 1.0 / nqs 1392 ! 1393 !------------------------------------------------------------------------! 1394 ! Beginning of main loop 1395 !------------------------------------------------------------------------! 1396 DO iq = 1, nqs 1397 ! 1398 ikq = index_xkq(current_ik,iq) 1399 ik = index_xk(ikq) 1400 xkq = xkq_collect(:,ikq) 1401 ! 1402 ! calculate the 1/|r-r'| (actually, k+q+g) factor and place it in fac 1403 CALL g2_convolution_all( dfftt%ngm, gt, xkp, xkq, iq, current_k ) 1404 ! 1405! JRD - below not threaded 1406 facb = 0D0 1407 DO ig = 1, dfftt%ngm 1408 facb(dfftt%nl(ig)) = coulomb_fac(ig,iq,current_k) 1409 ENDDO 1410 ! 1411 IF ( okvan .AND..NOT.tqr ) CALL qvan_init( dfftt%ngm, xkq, xkp ) 1412 ! 1413 DO iegrp = 1, negrp 1414 ! 1415 ! compute the id of group whose data is currently worked on 1416 wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 1417 njt = (all_end(wegrp)-all_start(wegrp)+jblock)/jblock 1418 ! 1419 DO ijt = 1, njt 1420 ! 1421 jblock_start = (ijt - 1) * jblock + all_start(wegrp) 1422 jblock_end = MIN(jblock_start+jblock-1,all_end(wegrp)) 1423 ! 1424 DO ii = 1, nibands(my_egrp_id+1) 1425 ! 1426 ibnd = ibands(ii,my_egrp_id+1) 1427 ! 1428 IF (ibnd==0 .OR. ibnd>m) CYCLE 1429 ! 1430 !determine which j-bands to calculate 1431 jstart = 0 1432 jend = 0 1433 ! 1434 DO ipair = 1, max_pairs 1435 IF (egrp_pairs(1,ipair,my_egrp_id+1) == ibnd) THEN 1436 IF (jstart == 0) THEN 1437 jstart = egrp_pairs(2,ipair,my_egrp_id+1) 1438 jend = jstart 1439 ELSE 1440 jend = egrp_pairs(2,ipair,my_egrp_id+1) 1441 ENDIF 1442 ENDIF 1443 ENDDO 1444 ! 1445 jstart = MAX( jstart, jblock_start ) 1446 jend = MIN( jend, jblock_end ) 1447 ! 1448 !how many iters 1449 jcount = jend-jstart+1 1450 IF (jcount <= 0) CYCLE 1451 ! 1452 !----------------------------------------------------------------------! 1453 !INNER LOOP START 1454 !----------------------------------------------------------------------! 1455 ! 1456 nblock = 2048 1457 nrt = (nrxxs+nblock-1)/nblock 1458 ! 1459!$omp parallel do collapse(2) private(ir_start,ir_end) 1460 DO irt = 1, nrt 1461 DO jbnd = jstart, jend 1462 ir_start = (irt - 1) * nblock + 1 1463 ir_end = MIN(ir_start+nblock-1, nrxxs) 1464 IF (noncolin) THEN 1465 DO ir = ir_start, ir_end 1466 rhoc(ir,jbnd-jstart+1) = ( CONJG(exxbuff(ir,jbnd-all_start(wegrp)+ & 1467 iexx_start,ikq))*temppsic_nc(ir,1,ii) + & 1468 CONJG(exxbuff(nrxxs+ir,jbnd-all_start(wegrp)+ & 1469 iexx_start,ikq))*temppsic_nc(ir,2,ii) )/omega 1470 ENDDO 1471 ELSE 1472!DIR$ vector nontemporal (rhoc) 1473 DO ir = ir_start, ir_end 1474 rhoc(ir,jbnd-jstart+1) = CONJG(exxbuff(ir,jbnd-all_start(wegrp)+ & 1475 iexx_start,ikq))*temppsic(ir,ii)*omega_inv 1476 ENDDO 1477 ENDIF 1478 ENDDO 1479 ENDDO 1480!$omp end parallel do 1481 ! 1482 ! ... add augmentation in REAL space HERE 1483 IF (okvan .AND. tqr) THEN ! augment the "charge" in real space 1484 DO jbnd = jstart, jend 1485 CALL addusxx_r(rhoc(:,jbnd-jstart+1), becxx(ikq)%k(:,jbnd), becpsi%k(:,ibnd)) 1486 ENDDO 1487 ENDIF 1488 ! 1489 ! ... brings it to G-space 1490#if defined(__USE_MANY_FFT) 1491 CALL fwfft( 'Rho', prhoc, dfftt, howmany=jcount ) 1492#else 1493 DO jbnd=jstart, jend 1494 CALL fwfft( 'Rho', rhoc(:,jbnd-jstart+1), dfftt ) 1495 ENDDO 1496#endif 1497 ! 1498 ! ... add augmentation in G space HERE 1499 IF (okvan .AND. .NOT. tqr) THEN 1500 DO jbnd = jstart, jend 1501 CALL addusxx_g( dfftt, rhoc(:,jbnd-jstart+1), xkq, xkp, & 1502 'c', becphi_c=becxx(ikq)%k(:,jbnd),becpsi_c=becpsi%k(:,ibnd) ) 1503 ENDDO 1504 ENDIF 1505 ! ... charge done 1506 ! 1507!call start_collection() 1508!$omp parallel do collapse(2) private(ir_start,ir_end) 1509 DO irt = 1, nrt 1510 DO jbnd = jstart, jend 1511 ir_start = (irt - 1) * nblock + 1 1512 ir_end = MIN(ir_start+nblock-1,nrxxs) 1513!DIR$ vector nontemporal (vc) 1514 DO ir = ir_start, ir_end 1515 vc(ir,jbnd-jstart+1) = facb(ir) * rhoc(ir,jbnd-jstart+1)*& 1516 x_occupation(jbnd,ik) * nqs_inv 1517 ENDDO 1518 ENDDO 1519 ENDDO 1520!$omp end parallel do 1521!call stop_collection() 1522 ! 1523 ! Add ultrasoft contribution (RECIPROCAL SPACE) 1524 ! compute alpha_I,j,k+q = \sum_J \int <beta_J|phi_j,k+q> V_i,j,k,q Q_I,J(r) d3r 1525 IF (okvan .AND. .NOT. tqr) THEN 1526 DO jbnd=jstart, jend 1527 CALL newdxx_g( dfftt, vc(:,jbnd-jstart+1), xkq, xkp, 'c',& 1528 deexx(:,ii), becphi_c=becxx(ikq)%k(:,jbnd) ) 1529 ENDDO 1530 ENDIF 1531 ! 1532 !brings back v in real space 1533#if defined(__USE_MANY_FFT) 1534 !fft many 1535 CALL invfft( 'Rho', pvc, dfftt, howmany=jcount ) 1536#else 1537 DO jbnd = jstart, jend 1538 CALL invfft( 'Rho', vc(:,jbnd-jstart+1), dfftt ) 1539 ENDDO 1540#endif 1541 ! 1542 ! Add ultrasoft contribution (REAL SPACE) 1543 IF (okvan .AND. tqr) THEN 1544 DO jbnd = jstart, jend 1545 CALL newdxx_r( dfftt, vc(:,jbnd-jstart+1), becxx(ikq)%k(:,jbnd),deexx(:,ii) ) 1546 ENDDO 1547 ENDIF 1548 ! 1549 ! ... Add PAW one-center contribution 1550 IF (okpaw) THEN 1551 DO jbnd = jstart, jend 1552 CALL PAW_newdxx( x_occupation(jbnd,ik)/nqs, becxx(ikq)%k(:,jbnd), & 1553 becpsi%k(:,ibnd), deexx(:,ii) ) 1554 ENDDO 1555 ENDIF 1556 ! 1557 ! ... accumulates over bands and k points 1558 ! 1559!call start_collection() 1560!$omp parallel do private(ir_start,ir_end) 1561 DO irt = 1, nrt 1562 DO jbnd = jstart, jend 1563 ir_start = (irt - 1) * nblock + 1 1564 ir_end = MIN(ir_start+nblock-1, nrxxs) 1565 IF (noncolin) THEN 1566 DO ir = ir_start, ir_end 1567 result_nc(ir,1,ii) = result_nc(ir,1,ii) + vc(ir,jbnd-jstart+1) * & 1568 exxbuff(ir,jbnd-all_start(wegrp)+iexx_start,ikq) 1569 result_nc(ir,2,ii) = result_nc(ir,2,ii) + vc(ir,jbnd-jstart+1) * & 1570 exxbuff(ir+nrxxs,jbnd-all_start(wegrp)+iexx_start,ikq) 1571 ENDDO 1572 ELSE 1573!!dir$ vector nontemporal (result) 1574 DO ir = ir_start, ir_end 1575 result(ir,ii) = result(ir,ii) + vc(ir,jbnd-jstart+1)* & 1576 exxbuff(ir,jbnd-all_start(wegrp)+iexx_start,ikq) 1577 ENDDO 1578 ENDIF 1579 ENDDO 1580 ENDDO 1581!$omp end parallel do 1582!call stop_collection() 1583 ! 1584 !----------------------------------------------------------------------! 1585 !INNER LOOP END 1586 !----------------------------------------------------------------------! 1587 ! 1588 ENDDO !I-LOOP 1589 ENDDO !IJT 1590 ! 1591 ! get the next nbnd/negrp data 1592 IF (negrp > 1) CALL mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, inter_egrp_comm ) 1593 ! 1594 ENDDO !iegrp 1595 ! 1596 IF ( okvan .AND..NOT.tqr ) CALL qvan_clean() 1597 ! 1598 ENDDO 1599 ! 1600 ! 1601 ! 1602 DO ii = 1, nibands(my_egrp_id+1) 1603 ! 1604 ibnd = ibands(ii,my_egrp_id+1) 1605 ! 1606 IF (ibnd==0 .OR. ibnd>m) CYCLE 1607 ! 1608 IF (okvan) THEN 1609 CALL mp_sum( deexx(:,ii), intra_egrp_comm ) 1610 ENDIF 1611 ! 1612 IF (noncolin) THEN 1613 !brings back result in G-space 1614 CALL fwfft( 'Wave', result_nc(:,1,ii), dfftt ) 1615 CALL fwfft( 'Wave', result_nc(:,2,ii), dfftt ) 1616 ! 1617 DO ig = 1, n 1618 big_result(ig,ibnd) = big_result(ig,ibnd) - exxalfa* & 1619 result_nc(dfftt%nl(igk_exx(ig,current_k)),1,ii) 1620 big_result(n+ig,ibnd) = big_result(n+ig,ibnd) - exxalfa* & 1621 result_nc(dfftt%nl(igk_exx(ig,current_k)),2,ii) 1622 ENDDO 1623 ELSE 1624 ! 1625 CALL fwfft( 'Wave', result(:,ii), dfftt ) 1626 ! 1627 DO ig = 1, n 1628 big_result(ig,ibnd) = big_result(ig,ibnd) - exxalfa* & 1629 result(dfftt%nl(igk_exx(ig,current_k)),ii) 1630 ENDDO 1631 ENDIF 1632 ! 1633 ! add non-local \sum_I |beta_I> \alpha_Ii (the sum on i is outside) 1634 IF (okvan) CALL add_nlxx_pot( lda, big_result(:,ibnd), xkp, n, igk_exx(:,current_k), & 1635 deexx(:,ii), eps_occ, exxalfa ) 1636 ! 1637 ENDDO 1638 ! 1639 !deallocate temporary arrays 1640 DEALLOCATE( rhoc, vc ) 1641 ! 1642 !sum result 1643 CALL result_sum( n*npol, m, big_result ) 1644 ! 1645 IF (iexx_istart(my_egrp_id+1) > 0) THEN 1646 ! 1647 IF (negrp == 1) THEN 1648 ending_im = m 1649 ELSE 1650 ending_im = iexx_iend(my_egrp_id+1) - iexx_istart(my_egrp_id+1) + 1 1651 ENDIF 1652 ! 1653 IF (noncolin) THEN 1654 DO im = 1, ending_im 1655!$omp parallel do default(shared), private(ig) firstprivate(im,n) 1656 DO ig = 1, n 1657 hpsi(ig,im) = hpsi(ig,im) + big_result(ig,im+iexx_istart(my_egrp_id+1)-1) 1658 ENDDO 1659!$omp end parallel do 1660!$omp parallel do default(shared), private(ig) firstprivate(im,n) 1661 DO ig = 1, n 1662 hpsi(lda+ig,im) = hpsi(lda+ig,im) + big_result(n+ig,im+iexx_istart(my_egrp_id+1)-1) 1663 ENDDO 1664!$omp end parallel do 1665 ENDDO 1666 ELSE 1667 DO im = 1, ending_im 1668!$omp parallel do default(shared), private(ig) firstprivate(im,n) 1669 DO ig = 1, n 1670 hpsi(ig,im) = hpsi(ig,im) + big_result(ig,im+iexx_istart(my_egrp_id+1)-1) 1671 ENDDO 1672!$omp end parallel do 1673 ENDDO 1674 ENDIF 1675 ENDIF 1676 ! 1677 IF (noncolin) THEN 1678 DEALLOCATE( temppsic_nc, result_nc ) 1679 ELSE 1680 DEALLOCATE( temppsic, result ) 1681 ENDIF 1682 ! 1683 DEALLOCATE( big_result ) 1684 DEALLOCATE( fac, facb ) 1685 IF (okvan) DEALLOCATE( deexx ) 1686 ! 1687 END SUBROUTINE vexx_k 1688 ! 1689 ! 1690 !----------------------------------------------------------------------- 1691 FUNCTION exxenergy() 1692 !----------------------------------------------------------------------- 1693 !! NB: This function is meant to give the SAME RESULT as exxenergy2. 1694 !! It is worth keeping it in the repository because in spite of being 1695 !! slower it is a simple driver using vexx potential routine so it is 1696 !! good, from time to time, to replace exxenergy2 with it to check that 1697 !! everything is ok and energy and potential are consistent as they should. 1698 ! 1699 USE io_files, ONLY : iunwfc_exx, nwordwfc 1700 USE buffers, ONLY : get_buffer 1701 USE wvfct, ONLY : nbnd, npwx, wg, current_k 1702 USE gvect, ONLY : gstart 1703 USE wavefunctions, ONLY : evc 1704 USE lsda_mod, ONLY : lsda, current_spin, isk 1705 USE klist, ONLY : ngk, nks, xk 1706 USE mp_pools, ONLY : inter_pool_comm 1707 USE mp_exx, ONLY : intra_egrp_comm, intra_egrp_comm, & 1708 negrp 1709 USE mp, ONLY : mp_sum 1710 USE becmod, ONLY : bec_type, allocate_bec_type, & 1711 deallocate_bec_type, calbec 1712 USE uspp, ONLY : okvan,nkb,vkb 1713 USE exx_band, ONLY : nwordwfc_exx, igk_exx 1714 ! 1715 IMPLICIT NONE 1716 ! 1717 TYPE(bec_type) :: becpsi 1718 REAL(DP) :: exxenergy, energy 1719 INTEGER :: npw, ibnd, ik 1720 COMPLEX(DP) :: vxpsi(npwx*npol,nbnd), psi(npwx*npol,nbnd) 1721 ! 1722 exxenergy = 0._DP 1723 ! 1724 CALL start_clock( 'exxenergy' ) 1725 ! 1726 IF (okvan) CALL allocate_bec_type( nkb, nbnd, becpsi ) 1727 energy = 0._dp 1728 ! 1729 DO ik = 1, nks 1730 npw = ngk(ik) 1731 ! setup variables for usage by vexx (same logic as for H_psi) 1732 current_k = ik 1733 IF ( lsda ) current_spin = isk(ik) 1734 ! end setup 1735 IF ( nks > 1 ) THEN 1736 CALL get_buffer( psi, nwordwfc_exx, iunwfc_exx, ik ) 1737 ELSE 1738 psi(1:npwx*npol,1:nbnd) = evc(1:npwx*npol,1:nbnd) 1739 ENDIF 1740 ! 1741 IF (okvan) THEN 1742 ! prepare the |beta> function at k+q 1743 CALL init_us_2( npw, igk_exx(1,ik), xk(:,ik), vkb ) 1744 ! compute <beta_I|psi_j> at this k+q point, for all band and all projectors 1745 CALL calbec( npw, vkb, psi, becpsi, nbnd ) 1746 ENDIF 1747 ! 1748 vxpsi(:,:) = (0._dp, 0._dp) 1749 CALL vexx( npwx, npw, nbnd, psi, vxpsi, becpsi ) 1750 ! 1751 DO ibnd = 1, nbnd 1752 energy = energy + DBLE(wg(ibnd,ik) * dot_product(psi(1:npw,ibnd),vxpsi(1:npw,ibnd))) 1753 IF (noncolin) energy = energy + & 1754 DBLE(wg(ibnd,ik) * dot_product(psi(npwx+1:npwx+npw,ibnd),vxpsi(npwx+1:npwx+npw,ibnd))) 1755 ! 1756 ENDDO 1757 IF (gamma_only .AND. gstart == 2) THEN 1758 DO ibnd = 1, nbnd 1759 energy = energy - & 1760 DBLE(0.5_dp * wg(ibnd,ik) * CONJG(psi(1,ibnd)) * vxpsi(1,ibnd)) 1761 ENDDO 1762 ENDIF 1763 ENDDO 1764 ! 1765 IF (gamma_only) energy = 2 * energy 1766 ! 1767 CALL mp_sum( energy, intra_egrp_comm ) 1768 CALL mp_sum( energy, inter_pool_comm ) 1769 IF (okvan) CALL deallocate_bec_type( becpsi ) 1770 ! 1771 exxenergy = energy 1772 ! 1773 CALL stop_clock( 'exxenergy' ) 1774 ! 1775 END FUNCTION exxenergy 1776 ! 1777 ! 1778 !----------------------------------------------------------------------- 1779 FUNCTION exxenergy2() 1780 !----------------------------------------------------------------------- 1781 !! Wrapper to \(\texttt{exxenergy2_gamma}\) and \(\texttt{exxenergy2_k}\). 1782 ! 1783 IMPLICIT NONE 1784 ! 1785 REAL(DP) :: exxenergy2 1786 ! 1787 CALL start_clock( 'exxenergy' ) 1788 ! 1789 IF ( gamma_only ) THEN 1790 exxenergy2 = exxenergy2_gamma() 1791 ELSE 1792 exxenergy2 = exxenergy2_k() 1793 ENDIF 1794 ! 1795 CALL stop_clock( 'exxenergy' ) 1796 ! 1797 END FUNCTION exxenergy2 1798 ! 1799 ! 1800 !----------------------------------------------------------------------- 1801 FUNCTION exxenergy2_gamma() 1802 !----------------------------------------------------------------------- 1803 ! 1804 USE constants, ONLY : fpi, e2, pi 1805 USE io_files, ONLY : iunwfc_exx, nwordwfc 1806 USE buffers, ONLY : get_buffer 1807 USE cell_base, ONLY : alat, omega, bg, at, tpiba 1808 USE symm_base, ONLY : nsym, s 1809 USE gvect, ONLY : ngm, gstart, g 1810 USE wvfct, ONLY : nbnd, npwx, wg 1811 USE wavefunctions, ONLY : evc 1812 USE klist, ONLY : xk, ngk, nks, nkstot 1813 USE lsda_mod, ONLY : lsda, current_spin, isk 1814 USE mp_pools, ONLY : inter_pool_comm 1815 USE mp_bands, ONLY : intra_bgrp_comm 1816 USE mp_exx, ONLY : inter_egrp_comm, my_egrp_id, negrp, & 1817 intra_egrp_comm, me_egrp, & 1818 max_pairs, egrp_pairs, ibands, nibands, & 1819 iexx_istart, iexx_iend, & 1820 all_start, all_end, iexx_start, & 1821 init_index_over_band, jblock 1822 USE mp, ONLY : mp_sum, mp_circular_shift_left 1823 USE fft_interfaces, ONLY : fwfft, invfft 1824 USE gvect, ONLY : ecutrho 1825 USE klist, ONLY : wk 1826 USE uspp, ONLY : okvan,nkb,vkb 1827 USE becmod, ONLY : bec_type, allocate_bec_type, & 1828 deallocate_bec_type, calbec 1829 USE paw_variables, ONLY : okpaw 1830 USE paw_exx, ONLY : PAW_xx_energy 1831 USE us_exx, ONLY : bexg_merge, becxx, addusxx_g, & 1832 addusxx_r, qvan_init, qvan_clean 1833 USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, & 1834 coulomb_fac, g2_convolution_all 1835 USE exx_band, ONLY : igk_exx, change_data_structure, & 1836 transform_evc_to_exx, nwordwfc_exx, & 1837 evc_exx 1838 ! 1839 IMPLICIT NONE 1840 ! 1841 REAL(DP) :: exxenergy2_gamma 1842 ! 1843 ! ... local variables 1844 ! 1845 REAL(DP) :: energy 1846 COMPLEX(DP), ALLOCATABLE :: temppsic(:) 1847 COMPLEX(DP), ALLOCATABLE :: rhoc(:) 1848 REAL(DP), ALLOCATABLE :: fac(:) 1849 COMPLEX(DP), ALLOCATABLE :: vkb_exx(:,:) 1850 INTEGER :: jbnd, ibnd, ik, ikk, ig, ikq, iq, ir 1851 INTEGER :: nrxxs, current_ik, ibnd_loop_start 1852 REAL(DP) :: x1, x2 1853 REAL(DP) :: xkq(3), xkp(3), vc 1854 INTEGER, EXTERNAL :: global_kpoint_index 1855 ! 1856 TYPE(bec_type) :: becpsi 1857 COMPLEX(DP), ALLOCATABLE :: psi_t(:), prod_tot(:) 1858 REAL(DP),ALLOCATABLE :: temppsic_dble (:) 1859 REAL(DP),ALLOCATABLE :: temppsic_aimag(:) 1860 INTEGER :: npw 1861 INTEGER :: istart, iend, ipair, ii, ialloc 1862 INTEGER :: ijt, njt, jblock_start, jblock_end 1863 INTEGER :: exxbuff_index 1864 INTEGER :: calbec_start, calbec_end 1865 INTEGER :: intra_bgrp_comm_ 1866 INTEGER :: iegrp, wegrp 1867 ! 1868 CALL init_index_over_band( inter_egrp_comm, nbnd, nbnd ) 1869 ! 1870 CALL transform_evc_to_exx( 0 ) 1871 ! 1872 ialloc = nibands(my_egrp_id+1) 1873 ! 1874 nrxxs = dfftt%nnr 1875 ALLOCATE( fac(dfftt%ngm) ) 1876 ! 1877 ALLOCATE( temppsic(nrxxs), temppsic_DBLE(nrxxs), temppsic_aimag(nrxxs) ) 1878 ALLOCATE( rhoc(nrxxs) ) 1879 ALLOCATE( vkb_exx(npwx,nkb) ) 1880 ! 1881 energy = 0.0_DP 1882 ! 1883 CALL allocate_bec_type( nkb, nbnd, becpsi ) 1884 ! 1885 IKK_LOOP : & 1886 DO ikk = 1, nks 1887 current_ik = global_kpoint_index( nkstot, ikk ) 1888 xkp = xk(:,ikk) 1889 ! 1890 IF ( lsda ) current_spin = isk(ikk) 1891 npw = ngk (ikk) 1892 IF ( nks > 1 ) CALL get_buffer( evc_exx, nwordwfc_exx, iunwfc_exx, ikk ) 1893 ! 1894 ! ... prepare the |beta> function at k+q 1895 CALL init_us_2( npw, igk_exx(:,ikk), xkp, vkb_exx ) 1896 ! 1897 ! ... compute <beta_I|psi_j> at this k+q point, for all band and all projectors 1898 calbec_start = ibands(1,my_egrp_id+1) 1899 calbec_end = ibands(nibands(my_egrp_id+1),my_egrp_id+1) 1900 ! 1901 intra_bgrp_comm_ = intra_bgrp_comm 1902 intra_bgrp_comm = intra_egrp_comm 1903 ! 1904 CALL calbec( npw, vkb_exx, evc_exx, becpsi, nibands(my_egrp_id+1) ) 1905 ! 1906 intra_bgrp_comm = intra_bgrp_comm_ 1907 ! 1908 IQ_LOOP : & 1909 DO iq = 1,nqs 1910 ! 1911 ikq = index_xkq(current_ik,iq) 1912 ik = index_xk(ikq) 1913 ! 1914 xkq = xkq_collect(:,ikq) 1915 ! 1916 CALL g2_convolution_all( dfftt%ngm, gt, xkp, xkq, iq, current_ik ) 1917 ! 1918 fac = coulomb_fac(:,iq,current_ik) 1919 fac(gstart_t:) = 2 * coulomb_fac(gstart_t:,iq,current_ik) 1920 ! 1921 IF ( okvan .AND. .NOT.tqr ) CALL qvan_init( dfftt%ngm, xkq, xkp ) 1922 ! 1923 DO iegrp = 1, negrp 1924 ! 1925 ! ... compute the id of group whose data is currently worked on 1926 wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 1927 ! 1928 jblock_start = all_start(wegrp) 1929 jblock_end = all_end(wegrp) 1930 ! 1931 JBND_LOOP : & 1932 DO ii = 1, nibands(my_egrp_id+1) 1933 ! 1934 jbnd = ibands(ii,my_egrp_id+1) 1935 ! 1936 IF (jbnd==0 .OR. jbnd>nbnd) CYCLE 1937 ! 1938 IF ( MOD(ii,2)==1 ) THEN 1939 ! 1940 temppsic = (0._DP,0._DP) 1941 ! 1942 IF ( (ii+1) <= nibands(my_egrp_id+1) ) THEN 1943 ! deal with double bands 1944!$omp parallel do default(shared), private(ig) 1945 DO ig = 1, npwt 1946 temppsic( dfftt%nl(ig) ) = & 1947 evc_exx(ig,ii) + (0._DP,1._DP) * evc_exx(ig,ii+1) 1948 temppsic( dfftt%nlm(ig) ) = & 1949 CONJG(evc_exx(ig,ii) - (0._DP,1._DP) * evc_exx(ig,ii+1)) 1950 ENDDO 1951!$omp end parallel do 1952 ENDIF 1953 ! 1954 IF (ii == nibands(my_egrp_id+1)) THEN 1955 ! deal with a single last band 1956!$omp parallel do default(shared), private(ig) 1957 DO ig = 1, npwt 1958 temppsic( dfftt%nl(ig) ) = evc_exx(ig,ii) 1959 temppsic( dfftt%nlm(ig) ) = CONJG(evc_exx(ig,ii)) 1960 ENDDO 1961!$omp end parallel do 1962 ENDIF 1963 ! 1964 CALL invfft( 'Wave', temppsic, dfftt ) 1965!$omp parallel do default(shared), private(ir) 1966 DO ir = 1, nrxxs 1967 temppsic_DBLE(ir) = DBLE( temppsic(ir) ) 1968 temppsic_aimag(ir) = AIMAG( temppsic(ir) ) 1969 ENDDO 1970!$omp end parallel do 1971 ! 1972 ENDIF 1973 ! 1974 !determine which j-bands to calculate 1975 istart = 0 1976 iend = 0 1977 ! 1978 DO ipair = 1, max_pairs 1979 IF (egrp_pairs(1,ipair,my_egrp_id+1) == jbnd) THEN 1980 IF (istart == 0) THEN 1981 istart = egrp_pairs(2,ipair,my_egrp_id+1) 1982 iend = istart 1983 ELSE 1984 iend = egrp_pairs(2,ipair,my_egrp_id+1) 1985 ENDIF 1986 ENDIF 1987 ENDDO 1988 ! 1989 istart = MAX(istart,jblock_start) 1990 iend = MIN(iend,jblock_end) 1991 ! 1992 IF (MOD(istart,2) == 0) THEN 1993 ibnd_loop_start = istart-1 1994 ELSE 1995 ibnd_loop_start = istart 1996 ENDIF 1997 ! 1998 IBND_LOOP_GAM : & 1999 DO ibnd = ibnd_loop_start, iend, 2 !for each band of psi 2000 ! 2001 exxbuff_index = (ibnd+1)/2-(all_start(wegrp)+1)/2+(iexx_start+1)/2 2002 ! 2003 IF ( ibnd < istart ) THEN 2004 x1 = 0.0_DP 2005 ELSE 2006 x1 = x_occupation(ibnd,ik) 2007 ENDIF 2008 ! 2009 IF ( ibnd < iend ) THEN 2010 x2 = x_occupation(ibnd+1,ik) 2011 ELSE 2012 x2 = 0.0_DP 2013 ENDIF 2014 ! calculate rho in real space. Gamma tricks are used. 2015 ! temppsic is real; tempphic contains band 1 in the real part, 2016 ! band 2 in the imaginary part; the same applies to rhoc 2017 ! 2018 IF ( MOD(ii,2) == 0 ) THEN 2019 rhoc = 0.0_DP 2020!$omp parallel do default(shared), private(ir) 2021 DO ir = 1, nrxxs 2022 rhoc(ir) = exxbuff(ir,exxbuff_index,ikq) * temppsic_aimag(ir) / omega 2023 ENDDO 2024!$omp end parallel do 2025 ELSE 2026!$omp parallel do default(shared), private(ir) 2027 DO ir = 1, nrxxs 2028 rhoc(ir) = exxbuff(ir,exxbuff_index,ikq) * temppsic_DBLE(ir) / omega 2029 ENDDO 2030!$omp end parallel do 2031 ENDIF 2032 ! 2033 IF (okvan .AND. tqr) THEN 2034 IF (ibnd >= istart) & 2035 CALL addusxx_r( rhoc,_CX(becxx(ikq)%r(:,ibnd)), & 2036 _CX(becpsi%r(:,jbnd)) ) 2037 IF (ibnd<iend) & 2038 CALL addusxx_r(rhoc,_CY(becxx(ikq)%r(:,ibnd+1)), & 2039 _CX(becpsi%r(:,jbnd))) 2040 ENDIF 2041 ! 2042 ! bring rhoc to G-space 2043 CALL fwfft( 'Rho', rhoc, dfftt ) 2044 ! 2045 IF (okvan .AND. .NOT.tqr) THEN 2046 IF (ibnd >= istart) & 2047 CALL addusxx_g( dfftt, rhoc, xkq, xkp, 'r', & 2048 becphi_r=becxx(ikq)%r(:,ibnd), becpsi_r=becpsi%r(:,jbnd-calbec_start+1) ) 2049 IF (ibnd < iend) & 2050 CALL addusxx_g( dfftt, rhoc, xkq, xkp, 'i', & 2051 becphi_r=becxx(ikq)%r(:,ibnd+1), becpsi_r=becpsi%r(:,jbnd-calbec_start+1) ) 2052 ENDIF 2053 ! 2054 vc = 0.0_DP 2055!$omp parallel do default(shared), private(ig), reduction(+:vc) 2056 DO ig = 1, dfftt%ngm 2057 ! 2058 ! The real part of rhoc contains the contribution from band ibnd 2059 ! The imaginary part contains the contribution from band ibnd+1 2060 ! 2061 vc = vc + fac(ig) * ( x1 * & 2062 ABS( rhoc(dfftt%nl(ig)) + CONJG(rhoc(dfftt%nlm(ig))) )**2 & 2063 +x2 * & 2064 ABS( rhoc(dfftt%nl(ig)) - CONJG(rhoc(dfftt%nlm(ig))) )**2 ) 2065 ENDDO 2066!$omp end parallel do 2067 ! 2068 vc = vc * omega * 0.25_DP / nqs 2069 energy = energy - exxalfa * vc * wg(jbnd,ikk) 2070 ! 2071 IF (okpaw) THEN 2072 IF (ibnd >= ibnd_start) & 2073 energy = energy + exxalfa*wg(jbnd,ikk)*& 2074 x1 * PAW_xx_energy(_CX(becxx(ikq)%r(:,ibnd)),_CX(becpsi%r(:,jbnd)) ) 2075 IF (ibnd < ibnd_end) & 2076 energy = energy + exxalfa*wg(jbnd,ikk)*& 2077 x2 * PAW_xx_energy(_CX(becxx(ikq)%r(:,ibnd+1)), _CX(becpsi%r(:,jbnd)) ) 2078 ENDIF 2079 ! 2080 ENDDO & 2081 IBND_LOOP_GAM 2082 ENDDO & 2083 JBND_LOOP 2084 ! 2085 ! get the next nbnd/negrp data 2086 IF (negrp > 1) CALL mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, inter_egrp_comm ) 2087 ! 2088 ENDDO ! iegrp 2089 IF ( okvan .AND. .NOT.tqr ) CALL qvan_clean( ) 2090 ! 2091 ENDDO & 2092 IQ_LOOP 2093 ENDDO & 2094 IKK_LOOP 2095 ! 2096 DEALLOCATE( temppsic, temppsic_dble, temppsic_aimag ) 2097 ! 2098 DEALLOCATE( rhoc, fac ) 2099 CALL deallocate_bec_type( becpsi ) 2100 ! 2101 CALL mp_sum( energy, inter_egrp_comm ) 2102 CALL mp_sum( energy, intra_egrp_comm ) 2103 CALL mp_sum( energy, inter_pool_comm ) 2104 ! 2105 exxenergy2_gamma = energy 2106 ! 2107 CALL change_data_structure( .FALSE. ) 2108 ! 2109 END FUNCTION exxenergy2_gamma 2110 ! 2111 ! 2112 !----------------------------------------------------------------------- 2113 FUNCTION exxenergy2_k() 2114 !----------------------------------------------------------------------- 2115 ! 2116 USE constants, ONLY : fpi, e2, pi 2117 USE io_files, ONLY : iunwfc_exx, nwordwfc 2118 USE buffers, ONLY : get_buffer 2119 USE cell_base, ONLY : alat, omega, bg, at, tpiba 2120 USE symm_base, ONLY : nsym, s 2121 USE gvect, ONLY : ngm, gstart, g 2122 USE wvfct, ONLY : nbnd, npwx, wg 2123 USE wavefunctions, ONLY : evc 2124 USE klist, ONLY : xk, ngk, nks, nkstot 2125 USE lsda_mod, ONLY : lsda, current_spin, isk 2126 USE mp_pools, ONLY : inter_pool_comm 2127 USE mp_exx, ONLY : inter_egrp_comm, my_egrp_id, negrp, & 2128 intra_egrp_comm, me_egrp, & 2129 max_pairs, egrp_pairs, ibands, nibands, & 2130 iexx_istart, iexx_iend, & 2131 all_start, all_end, iexx_start, & 2132 init_index_over_band, jblock 2133 USE mp_bands, ONLY : intra_bgrp_comm 2134 USE mp, ONLY : mp_sum, mp_circular_shift_left 2135 USE fft_interfaces, ONLY : fwfft, invfft 2136 USE gvect, ONLY : ecutrho 2137 USE klist, ONLY : wk 2138 USE uspp, ONLY : okvan,nkb,vkb 2139 USE becmod, ONLY : bec_type, allocate_bec_type, & 2140 deallocate_bec_type, calbec 2141 USE paw_variables, ONLY : okpaw 2142 USE paw_exx, ONLY : PAW_xx_energy 2143 USE us_exx, ONLY : bexg_merge, becxx, addusxx_g, & 2144 addusxx_r, qvan_init, qvan_clean 2145 USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, & 2146 coulomb_fac, g2_convolution_all 2147 USE exx_band, ONLY : change_data_structure, & 2148 transform_evc_to_exx, nwordwfc_exx, & 2149 igk_exx, evc_exx 2150 ! 2151 IMPLICIT NONE 2152 ! 2153 REAL(DP) :: exxenergy2_k 2154 ! 2155 ! ... local variables 2156 ! 2157 REAL(DP) :: energy 2158 COMPLEX(DP), ALLOCATABLE :: temppsic(:,:) 2159 COMPLEX(DP), ALLOCATABLE :: temppsic_nc(:,:,:) 2160 COMPLEX(DP), ALLOCATABLE,TARGET :: rhoc(:,:) 2161#if defined(__USE_MANY_FFT) 2162 COMPLEX(DP), POINTER :: prhoc(:) 2163#endif 2164 REAL(DP), ALLOCATABLE :: fac(:) 2165 INTEGER :: npw, jbnd, ibnd, ibnd_inner_start, ibnd_inner_end, ibnd_inner_count, & 2166 ik, ikk, ig, ikq, iq, ir 2167 INTEGER :: h_ibnd, nrxxs, current_ik, ibnd_loop_start, nblock, nrt, irt, & 2168 ir_start, ir_end 2169 REAL(DP) :: x1, x2 2170 REAL(DP) :: xkq(3), xkp(3), vc, omega_inv 2171 INTEGER, EXTERNAL :: global_kpoint_index 2172 ! 2173 TYPE(bec_type) :: becpsi 2174 COMPLEX(DP), ALLOCATABLE :: psi_t(:), prod_tot(:) 2175 INTEGER :: intra_bgrp_comm_ 2176 INTEGER :: ii, ialloc, jstart, jend, ipair 2177 INTEGER :: ijt, njt, jblock_start, jblock_end 2178 INTEGER :: iegrp, wegrp 2179 ! 2180 CALL init_index_over_band( inter_egrp_comm, nbnd, nbnd ) 2181 ! 2182 CALL transform_evc_to_exx( 0 ) 2183 ! 2184 ialloc = nibands(my_egrp_id+1) 2185 ! 2186 nrxxs = dfftt%nnr 2187 ALLOCATE( fac(dfftt%ngm) ) 2188 ! 2189 IF (noncolin) THEN 2190 ALLOCATE( temppsic_nc(nrxxs,npol,ialloc) ) 2191 ELSE 2192 ALLOCATE( temppsic(nrxxs,ialloc) ) 2193 ENDIF 2194 ! 2195 energy = 0.0_DP 2196 ! 2197 CALL allocate_bec_type( nkb, nbnd, becpsi ) 2198 ! 2199 !precompute that stuff 2200 omega_inv = 1.0/omega 2201 ! 2202 IKK_LOOP : & 2203 DO ikk = 1, nks 2204 ! 2205 current_ik = global_kpoint_index ( nkstot, ikk ) 2206 xkp = xk(:,ikk) 2207 ! 2208 IF ( lsda ) current_spin = isk(ikk) 2209 npw = ngk(ikk) 2210 IF ( nks > 1 ) CALL get_buffer( evc_exx, nwordwfc_exx, iunwfc_exx, ikk ) 2211 ! 2212 ! compute <beta_I|psi_j> at this k+q point, for all band and all projectors 2213 intra_bgrp_comm_ = intra_bgrp_comm 2214 intra_bgrp_comm = intra_egrp_comm 2215 ! 2216 IF (okvan .OR. okpaw) THEN 2217 CALL compute_becpsi( npw, igk_exx(:,ikk), xkp, evc_exx, & 2218 becpsi%k(:,ibands(1,my_egrp_id+1)) ) 2219 ENDIF 2220 ! 2221 intra_bgrp_comm = intra_bgrp_comm_ 2222 ! 2223 ! ... precompute temppsic 2224 ! 2225 IF (noncolin) THEN 2226 temppsic_nc = 0.0_DP 2227 ELSE 2228 temppsic = 0.0_DP 2229 ENDIF 2230 ! 2231 DO ii = 1, nibands(my_egrp_id+1) 2232 ! 2233 jbnd = ibands(ii,my_egrp_id+1) 2234 ! 2235 IF (jbnd == 0 .OR. jbnd > nbnd) CYCLE 2236 ! 2237 !IF ( abs(wg(jbnd,ikk)) < eps_occ) CYCLE 2238 ! 2239 IF (noncolin) THEN 2240 ! 2241!$omp parallel do default(shared), private(ig) 2242 DO ig = 1, npw 2243 temppsic_nc(dfftt%nl(igk_exx(ig,ikk)),1,ii) = evc_exx(ig,ii) 2244 temppsic_nc(dfftt%nl(igk_exx(ig,ikk)),2,ii) = evc_exx(npwx+ig,ii) 2245 ENDDO 2246!$omp end parallel do 2247 ! 2248 CALL invfft( 'Wave', temppsic_nc(:,1,ii), dfftt ) 2249 CALL invfft( 'Wave', temppsic_nc(:,2,ii), dfftt ) 2250 ! 2251 ELSE 2252!$omp parallel do default(shared), private(ig) 2253 DO ig = 1, npw 2254 temppsic(dfftt%nl(igk_exx(ig,ikk)),ii) = evc_exx(ig,ii) 2255 ENDDO 2256!$omp end parallel do 2257 ! 2258 CALL invfft( 'Wave', temppsic(:,ii), dfftt ) 2259 ! 2260 ENDIF 2261 ENDDO 2262 ! 2263 IQ_LOOP : & 2264 DO iq = 1,nqs 2265 ! 2266 ikq = index_xkq(current_ik,iq) 2267 ik = index_xk(ikq) 2268 ! 2269 xkq = xkq_collect(:,ikq) 2270 ! 2271 CALL g2_convolution_all( dfftt%ngm, gt, xkp, xkq, iq, ikk ) 2272 IF ( okvan .AND..NOT.tqr ) CALL qvan_init( dfftt%ngm, xkq, xkp ) 2273 ! 2274 DO iegrp = 1, negrp 2275 ! 2276 ! ... compute the id of group whose data is currently worked on 2277 wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 2278 njt = (all_end(wegrp)-all_start(wegrp)+jblock)/jblock 2279 ! 2280 IJT_LOOP : & 2281 DO ijt = 1, njt 2282 ! 2283 jblock_start = (ijt - 1) * jblock + all_start(wegrp) 2284 jblock_end = MIN(jblock_start+jblock-1,all_end(wegrp)) 2285 ! 2286 JBND_LOOP : & 2287 DO ii = 1, nibands(my_egrp_id+1) 2288 ! 2289 jbnd = ibands(ii,my_egrp_id+1) 2290 ! 2291 IF (jbnd==0 .OR. jbnd>nbnd) CYCLE 2292 ! 2293 !determine which j-bands to calculate 2294 jstart = 0 2295 jend = 0 2296 ! 2297 DO ipair = 1, max_pairs 2298 IF (egrp_pairs(1,ipair,my_egrp_id+1) == jbnd) THEN 2299 IF (jstart == 0) THEN 2300 jstart = egrp_pairs(2,ipair,my_egrp_id+1) 2301 jend = jstart 2302 ELSE 2303 jend = egrp_pairs(2,ipair,my_egrp_id+1) 2304 ENDIF 2305 ENDIF 2306 ENDDO 2307 ! 2308 !these variables prepare for inner band parallelism 2309 jstart = MAX(jstart,jblock_start) 2310 jend = MIN(jend,jblock_end) 2311 ibnd_inner_start = jstart 2312 ibnd_inner_end = jend 2313 ibnd_inner_count = jend-jstart+1 2314 ! 2315 !allocate arrays 2316 ALLOCATE( rhoc(nrxxs,ibnd_inner_count) ) 2317#if defined(__USE_MANY_FFT) 2318 prhoc(1:nrxxs*ibnd_inner_count) => rhoc 2319#endif 2320 !calculate rho in real space 2321 nblock = 2048 2322 nrt = (nrxxs+nblock-1) / nblock 2323!$omp parallel do collapse(2) private(ir_start,ir_end) 2324 DO irt = 1, nrt 2325 DO ibnd = ibnd_inner_start, ibnd_inner_end 2326 ir_start = (irt - 1) * nblock + 1 2327 ir_end = MIN(ir_start+nblock-1,nrxxs) 2328 IF (noncolin) THEN 2329 DO ir = ir_start, ir_end 2330 rhoc(ir,ibnd-ibnd_inner_start+1) = & 2331 ( CONJG(exxbuff(ir,ibnd-all_start(wegrp)+iexx_start,ikq)) * & 2332 temppsic_nc(ir,1,ii) + & 2333 CONJG(exxbuff(nrxxs+ir,ibnd-all_start(wegrp)+iexx_start,ikq)) * & 2334 temppsic_nc(ir,2,ii) ) * omega_inv 2335 ENDDO 2336 ELSE 2337 DO ir = ir_start, ir_end 2338 rhoc(ir,ibnd-ibnd_inner_start+1) = omega_inv * & 2339 CONJG(exxbuff(ir,ibnd-all_start(wegrp)+iexx_start,ikq)) * & 2340 temppsic(ir,ii) 2341 ENDDO 2342 ENDIF 2343 ENDDO 2344 ENDDO 2345!$omp end parallel do 2346 ! 2347 ! augment the "charge" in real space 2348 IF (okvan .AND. tqr) THEN 2349!$omp parallel do default(shared) private(ibnd) firstprivate(ibnd_inner_start,ibnd_inner_end) 2350 DO ibnd = ibnd_inner_start, ibnd_inner_end 2351 CALL addusxx_r( rhoc(:,ibnd-ibnd_inner_start+1), & 2352 becxx(ikq)%k(:,ibnd), becpsi%k(:,jbnd)) 2353 ENDDO 2354!$omp end parallel do 2355 ENDIF 2356 ! 2357 ! bring rhoc to G-space 2358#if defined(__USE_MANY_FFT) 2359 CALL fwfft ('Rho', prhoc, dfftt, howmany=ibnd_inner_count) 2360#else 2361 DO ibnd = ibnd_inner_start, ibnd_inner_end 2362 CALL fwfft('Rho', rhoc(:,ibnd-ibnd_inner_start+1), dfftt) 2363 ENDDO 2364#endif 2365 ! augment the "charge" in G space 2366 IF (okvan .AND. .NOT. tqr) THEN 2367 DO ibnd = ibnd_inner_start, ibnd_inner_end 2368 CALL addusxx_g(dfftt, rhoc(:,ibnd-ibnd_inner_start+1), & 2369 xkq, xkp, 'c', becphi_c=becxx(ikq)%k(:,ibnd), & 2370 becpsi_c=becpsi%k(:,jbnd)) 2371 ENDDO 2372 ENDIF 2373 ! 2374!$omp parallel do reduction(+:energy) private(vc) 2375 DO ibnd = ibnd_inner_start, ibnd_inner_end 2376 vc=0.0_DP 2377 DO ig=1,dfftt%ngm 2378 vc = vc + coulomb_fac(ig,iq,ikk) * & 2379 DBLE(rhoc(dfftt%nl(ig),ibnd-ibnd_inner_start+1) *& 2380 CONJG(rhoc(dfftt%nl(ig),ibnd-ibnd_inner_start+1))) 2381 ENDDO 2382 vc = vc * omega * x_occupation(ibnd,ik) / nqs 2383 energy = energy - exxalfa * vc * wg(jbnd,ikk) 2384 ! 2385 IF (okpaw) THEN 2386 energy = energy +exxalfa*x_occupation(ibnd,ik)/nqs*wg(jbnd,ikk) & 2387 *PAW_xx_energy(becxx(ikq)%k(:,ibnd), becpsi%k(:,jbnd)) 2388 ENDIF 2389 ENDDO 2390!$omp end parallel do 2391 ! 2392 !deallocate memory 2393 DEALLOCATE( rhoc ) 2394 ENDDO & 2395 JBND_LOOP 2396 ! 2397 ENDDO& 2398 IJT_LOOP 2399 ! get the next nbnd/negrp data 2400 IF (negrp > 1) call mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, inter_egrp_comm ) 2401 ! 2402 END DO !iegrp 2403 ! 2404 IF ( okvan .AND. .NOT.tqr ) CALL qvan_clean() 2405 ENDDO & 2406 IQ_LOOP 2407 ! 2408 ENDDO & 2409 IKK_LOOP 2410 ! 2411 IF (noncolin) THEN 2412 DEALLOCATE( temppsic_nc ) 2413 ELSE 2414 DEALLOCATE( temppsic ) 2415 ENDIF 2416 ! 2417 DEALLOCATE( fac ) 2418 ! 2419 CALL deallocate_bec_type( becpsi ) 2420 ! 2421 CALL mp_sum( energy, inter_egrp_comm ) 2422 CALL mp_sum( energy, intra_egrp_comm ) 2423 CALL mp_sum( energy, inter_pool_comm ) 2424 ! 2425 exxenergy2_k = energy 2426 CALL change_data_structure( .FALSE. ) 2427 ! 2428 END FUNCTION exxenergy2_k 2429 ! 2430 ! 2431 !----------------------------------------------------------------------- 2432 FUNCTION exx_stress() 2433 !----------------------------------------------------------------------- 2434 !! This is Eq.(10) of PRB 73, 125120 (2006). 2435 ! 2436 USE constants, ONLY : fpi, e2, pi, tpi 2437 USE io_files, ONLY : iunwfc_exx, nwordwfc 2438 USE buffers, ONLY : get_buffer 2439 USE cell_base, ONLY : alat, omega, bg, at, tpiba 2440 USE symm_base, ONLY : nsym, s 2441 USE wvfct, ONLY : nbnd, npwx, wg, current_k 2442 USE wavefunctions, ONLY : evc 2443 USE klist, ONLY : xk, ngk, nks 2444 USE lsda_mod, ONLY : lsda, current_spin, isk 2445 USE gvect, ONLY : g 2446 USE mp_pools, ONLY : npool, inter_pool_comm 2447 USE mp_exx, ONLY : inter_egrp_comm, intra_egrp_comm, & 2448 ibands, nibands, my_egrp_id, jblock, & 2449 egrp_pairs, max_pairs, negrp, me_egrp, & 2450 all_start, all_end, iexx_start 2451 USE mp, ONLY : mp_sum, mp_circular_shift_left 2452 USE fft_base, ONLY : dffts 2453 USE fft_interfaces, ONLY : fwfft, invfft 2454 USE uspp, ONLY : okvan 2455 ! 2456 USE exx_base, ONLY : nq1, nq2, nq3, nqs, eps, exxdiv, & 2457 x_gamma_extrapolation, on_double_grid, & 2458 grid_factor, yukawa, erfc_scrlen, & 2459 use_coulomb_vcut_ws, use_coulomb_vcut_spheric, & 2460 gau_scrlen, vcut, index_xkq, index_xk, index_sym 2461 USE exx_band, ONLY : change_data_structure, transform_evc_to_exx, & 2462 g_exx, igk_exx, nwordwfc_exx, evc_exx 2463 USE coulomb_vcut_module, ONLY : vcut_get, vcut_spheric_get 2464 ! 2465 IMPLICIT NONE 2466 ! 2467 ! ... local variables 2468 ! 2469 REAL(DP) :: exx_stress(3,3), exx_stress_(3,3) 2470 ! 2471 COMPLEX(DP),ALLOCATABLE :: tempphic(:), temppsic(:), result(:) 2472 COMPLEX(DP),ALLOCATABLE :: tempphic_nc(:,:), temppsic_nc(:,:), & 2473 result_nc(:,:) 2474 COMPLEX(DP),ALLOCATABLE :: rhoc(:) 2475 REAL(DP), ALLOCATABLE :: fac(:), fac_tens(:,:,:), fac_stress(:) 2476 INTEGER :: npw, jbnd, ibnd, ik, ikk, ig, ir, ikq, iq, isym 2477 INTEGER :: nqi, iqi, beta, nrxxs, ngm 2478 INTEGER :: ibnd_loop_start 2479 REAL(DP) :: x1, x2 2480 REAL(DP) :: qq, xk_cryst(3), sxk(3), xkq(3), vc(3,3), x, q(3) 2481 REAL(DP) :: delta(3,3) 2482 INTEGER :: jstart, jend, ii, ipair, jblock_start, jblock_end 2483 INTEGER :: iegrp, wegrp 2484 INTEGER :: exxbuff_index 2485 ! 2486 CALL start_clock( 'exx_stress' ) 2487 ! 2488 CALL transform_evc_to_exx( 0 ) 2489 ! 2490 IF (npool>1) CALL errore( 'exx_stress', 'stress not available with pools', 1 ) 2491 IF (noncolin) CALL errore( 'exx_stress', 'noncolinear stress not implemented', 1 ) 2492 IF (okvan) CALL infomsg( 'exx_stress', 'USPP stress not tested' ) 2493 ! 2494 nrxxs = dfftt%nnr 2495 ngm = dfftt%ngm 2496 delta = RESHAPE( (/1._dp,0._dp,0._dp, 0._dp,1._dp,0._dp, 0._dp,0._dp,1._dp/), (/3,3/)) 2497 exx_stress_ = 0._dp 2498 ! 2499 ALLOCATE( tempphic(nrxxs), temppsic(nrxxs), rhoc(nrxxs), fac(ngm) ) 2500 ALLOCATE( fac_tens(3,3,ngm), fac_stress(ngm) ) 2501 ! 2502 nqi = nqs 2503 ! 2504 ! ... loop over k-points 2505 DO ikk = 1, nks 2506 current_k = ikk 2507 IF (lsda) current_spin = isk(ikk) 2508 npw = ngk(ikk) 2509 ! 2510 IF (nks > 1) CALL get_buffer( evc_exx, nwordwfc_exx, iunwfc_exx, ikk ) 2511 ! 2512 DO iqi = 1, nqi 2513 ! 2514 iq = iqi 2515 ! 2516 ikq = index_xkq(current_k,iq) 2517 ik = index_xk(ikq) 2518 isym = ABS(index_sym(ikq)) 2519 ! 2520 2521 ! FIXME: use cryst_to_cart and company as above.. 2522 xk_cryst(:) = at(1,:)*xk(1,ik)+at(2,:)*xk(2,ik)+at(3,:)*xk(3,ik) 2523 IF (index_sym(ikq) < 0) xk_cryst = -xk_cryst 2524 sxk(:) = s(:,1,isym)*xk_cryst(1) + & 2525 s(:,2,isym)*xk_cryst(2) + & 2526 s(:,3,isym)*xk_cryst(3) 2527 xkq(:) = bg(:,1)*sxk(1) + bg(:,2)*sxk(2) + bg(:,3)*sxk(3) 2528 ! 2529 !CALL start_clock ('exxen2_ngmloop') 2530 ! 2531!$omp parallel do default(shared), private(ig, beta, q, qq, on_double_grid, x) 2532 DO ig = 1, ngm 2533 IF (negrp == 1) THEN 2534 q(1) = xk(1,current_k) - xkq(1) + g(1,ig) 2535 q(2) = xk(2,current_k) - xkq(2) + g(2,ig) 2536 q(3) = xk(3,current_k) - xkq(3) + g(3,ig) 2537 ELSE 2538 q(1) = xk(1,current_k) - xkq(1) + g_exx(1,ig) 2539 q(2) = xk(2,current_k) - xkq(2) + g_exx(2,ig) 2540 q(3) = xk(3,current_k) - xkq(3) + g_exx(3,ig) 2541 ENDIF 2542 ! 2543 q = q * tpiba 2544 qq = ( q(1)*q(1) + q(2)*q(2) + q(3)*q(3) ) 2545 ! 2546 DO beta = 1, 3 2547 fac_tens(1:3,beta,ig) = q(1:3)*q(beta) 2548 ENDDO 2549 ! 2550 IF (x_gamma_extrapolation) THEN 2551 on_double_grid = .TRUE. 2552 x= 0.5d0/tpiba*(q(1)*at(1,1)+q(2)*at(2,1)+q(3)*at(3,1))*nq1 2553 on_double_grid = on_double_grid .AND. (ABS(x-NINT(x))<eps) 2554 x= 0.5d0/tpiba*(q(1)*at(1,2)+q(2)*at(2,2)+q(3)*at(3,2))*nq2 2555 on_double_grid = on_double_grid .AND. (ABS(x-NINT(x))<eps) 2556 x= 0.5d0/tpiba*(q(1)*at(1,3)+q(2)*at(2,3)+q(3)*at(3,3))*nq3 2557 on_double_grid = on_double_grid .AND. (ABS(x-NINT(x))<eps) 2558 ELSE 2559 on_double_grid = .FALSE. 2560 ENDIF 2561 ! 2562 IF (use_coulomb_vcut_ws) THEN 2563 fac(ig) = vcut_get(vcut, q) 2564 fac_stress(ig) = 0._dp ! not implemented 2565 IF (gamma_only .AND. qq > 1.d-8) fac(ig) = 2.d0 * fac(ig) 2566 ! 2567 ELSEIF ( use_coulomb_vcut_spheric ) THEN 2568 fac(ig) = vcut_spheric_get(vcut, q) 2569 fac_stress(ig) = 0._dp ! not implemented 2570 IF (gamma_only .AND. qq > 1.d-8) fac(ig) = 2.d0 * fac(ig) 2571 ! 2572 ELSEIF (gau_scrlen > 0) THEN 2573 fac(ig) = e2*((pi/gau_scrlen)**(1.5d0))* & 2574 EXP(-qq/4.d0/gau_scrlen) * grid_factor 2575 fac_stress(ig) = e2*2.d0/4.d0/gau_scrlen * & 2576 EXP(-qq/4.d0/gau_scrlen) * & 2577 ((pi/gau_scrlen)**(1.5d0))*grid_factor 2578 IF (gamma_only) fac(ig) = 2.d0 * fac(ig) 2579 IF (gamma_only) fac_stress(ig) = 2.d0 * fac_stress(ig) 2580 IF (on_double_grid) fac(ig) = 0._dp 2581 IF (on_double_grid) fac_stress(ig) = 0._dp 2582 ! 2583 ELSEIF (qq > 1.d-8) THEN 2584 IF ( erfc_scrlen > 0 ) THEN 2585 fac(ig)=e2*fpi/qq*(1._dp-EXP(-qq/4.d0/erfc_scrlen**2)) * grid_factor 2586 fac_stress(ig) = -e2*fpi * 2.d0/qq**2 * ( & 2587 (1._dp+qq/4.d0/erfc_scrlen**2)*EXP(-qq/4.d0/erfc_scrlen**2) - 1._dp) * & 2588 grid_factor 2589 ELSE 2590 fac(ig)=e2*fpi/( qq + yukawa ) * grid_factor 2591 fac_stress(ig) = 2.d0 * e2*fpi/(qq+yukawa)**2 * grid_factor 2592 ENDIF 2593 ! 2594 IF (gamma_only) fac(ig) = 2.d0 * fac(ig) 2595 IF (gamma_only) fac_stress(ig) = 2.d0 * fac_stress(ig) 2596 IF (on_double_grid) fac(ig) = 0._dp 2597 IF (on_double_grid) fac_stress(ig) = 0._dp 2598 ! 2599 ELSE 2600 ! 2601 fac(ig) = -exxdiv ! or rather something else (see f.gygi) 2602 fac_stress(ig) = 0._dp ! or -exxdiv_stress (not yet implemented) 2603 IF ( yukawa> 0._dp .AND. .NOT. x_gamma_extrapolation) THEN 2604 fac(ig) = fac(ig) + e2*fpi/( qq + yukawa ) 2605 fac_stress(ig) = 2.d0 * e2*fpi/(qq+yukawa)**2 2606 ENDIF 2607 IF (erfc_scrlen > 0._dp .AND. .NOT. x_gamma_extrapolation) THEN 2608 fac(ig) = e2*fpi / (4.d0*erfc_scrlen**2) 2609 fac_stress(ig) = e2*fpi / (8.d0*erfc_scrlen**4) 2610 ENDIF 2611 ! 2612 ENDIF 2613 ! 2614 ENDDO 2615!$omp end parallel do 2616 !CALL stop_clock ('exxen2_ngmloop') 2617 DO iegrp = 1, negrp 2618 ! 2619 ! compute the id of group whose data is currently worked on 2620 wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 2621 ! 2622 jblock_start = all_start(wegrp) 2623 jblock_end = all_end(wegrp) 2624 ! 2625 ! loop over bands 2626 DO ii = 1, nibands(my_egrp_id+1) 2627 ! 2628 jbnd = ibands(ii,my_egrp_id+1) 2629 ! 2630 IF (jbnd==0 .OR. jbnd>nbnd) CYCLE 2631 ! 2632 !determine which j-bands to calculate 2633 jstart = 0 2634 jend = 0 2635 DO ipair=1, max_pairs 2636 IF (egrp_pairs(1,ipair,my_egrp_id+1).eq.jbnd)THEN 2637 IF (jstart == 0)THEN 2638 jstart = egrp_pairs(2,ipair,my_egrp_id+1) 2639 jend = jstart 2640 ELSE 2641 jend = egrp_pairs(2,ipair,my_egrp_id+1) 2642 ENDIF 2643 ENDIF 2644 ENDDO 2645 ! 2646 jstart = MAX(jstart,jblock_start) 2647 jend = MIN(jend,jblock_end) 2648 ! 2649 temppsic(:) = ( 0._dp, 0._dp ) 2650!$omp parallel do default(shared), private(ig) 2651 DO ig = 1, npw 2652 temppsic(dfftt%nl(igk_exx(ig,ikk))) = evc_exx(ig,ii) 2653 ENDDO 2654!$omp end parallel do 2655 ! 2656 IF (gamma_only) THEN 2657!$omp parallel do default(shared), private(ig) 2658 DO ig = 1, npw 2659 temppsic(dfftt%nlm(igk_exx(ig,ikk))) = CONJG(evc_exx(ig,ii)) 2660 ENDDO 2661!$omp end parallel do 2662 ENDIF 2663 ! 2664 CALL invfft( 'Wave', temppsic, dfftt ) 2665 ! 2666 IF (gamma_only) THEN 2667 ! 2668 IF (MOD(jstart,2) == 0) THEN 2669 ibnd_loop_start = jstart-1 2670 ELSE 2671 ibnd_loop_start = jstart 2672 ENDIF 2673 ! 2674 DO ibnd = ibnd_loop_start, jend, 2 !for each band of psi 2675 ! 2676 exxbuff_index = (ibnd+1)/2-(all_start(wegrp)+1)/2+(iexx_start+1)/2 2677 ! 2678 IF ( ibnd < jstart ) THEN 2679 x1 = 0._dp 2680 ELSE 2681 x1 = x_occupation(ibnd,ik) 2682 ENDIF 2683 ! 2684 IF ( ibnd == jend) THEN 2685 x2 = 0._dp 2686 ELSE 2687 x2 = x_occupation(ibnd+1,ik) 2688 ENDIF 2689 ! 2690 IF ( ABS(x1) < eps_occ .AND. ABS(x2) < eps_occ ) CYCLE 2691 ! 2692 ! calculate rho in real space 2693!$omp parallel do default(shared), private(ir) 2694 DO ir = 1, nrxxs 2695 tempphic(ir) = exxbuff(ir,exxbuff_index,ikq) 2696 rhoc(ir) = CONJG(tempphic(ir))*temppsic(ir) / omega 2697 ENDDO 2698!$omp end parallel do 2699 ! bring it to G-space 2700 CALL fwfft( 'Rho', rhoc, dfftt ) 2701 ! 2702 vc = 0._dp 2703!$omp parallel do default(shared), private(ig), reduction(+:vc) 2704 DO ig = 1, ngm 2705 ! 2706 vc(:,:) = vc(:,:) + x1 * 0.25_dp * & 2707 ABS( rhoc(dfftt%nl(ig)) + & 2708 CONJG(rhoc(dfftt%nlm(ig))))**2 * & 2709 (fac_tens(:,:,ig)*fac_stress(ig)/2.d0 - delta(:,:)*fac(ig)) 2710 vc(:,:) = vc(:,:) + x2 * 0.25_dp * & 2711 ABS( rhoc(dfftt%nl(ig)) - & 2712 CONJG(rhoc(dfftt%nlm(ig))))**2 * & 2713 (fac_tens(:,:,ig)*fac_stress(ig)/2.d0 - delta(:,:)*fac(ig)) 2714 ENDDO 2715!$omp end parallel do 2716 vc = vc / nqs / 4.d0 2717 exx_stress_ = exx_stress_ + exxalfa * vc * wg(jbnd,ikk) 2718 ENDDO 2719 ! 2720 ELSE 2721 ! 2722 DO ibnd = jstart, jend !for each band of psi 2723 ! 2724 IF ( ABS(x_occupation(ibnd,ik)) < 1.d-6) CYCLE 2725 ! 2726 ! calculate rho in real space 2727!$omp parallel do default(shared), private(ir) 2728 DO ir = 1, nrxxs 2729 tempphic(ir) = exxbuff(ir,ibnd-all_start(wegrp)+iexx_start,ikq) 2730 rhoc(ir) = CONJG(tempphic(ir))*temppsic(ir) / omega 2731 ENDDO 2732!$omp end parallel do 2733 ! 2734 ! bring it to G-space 2735 CALL fwfft( 'Rho', rhoc, dfftt ) 2736 ! 2737 vc = 0._dp 2738!$omp parallel do default(shared), private(ig), reduction(+:vc) 2739 DO ig = 1, ngm 2740 vc(:,:) = vc(:,:) + rhoc(dfftt%nl(ig)) * & 2741 CONJG(rhoc(dfftt%nl(ig))) * & 2742 (fac_tens(:,:,ig)*fac_stress(ig)/2.d0 - & 2743 delta(:,:)*fac(ig)) 2744 ENDDO 2745!$omp end parallel do 2746 ! 2747 vc = vc * x_occupation(ibnd,ik) / nqs / 4.d0 2748 exx_stress_ = exx_stress_ + exxalfa * vc * wg(jbnd,ikk) 2749 ! 2750 ENDDO 2751 ! 2752 ENDIF ! gamma or k-points 2753 ! 2754 ENDDO ! jbnd 2755 ! 2756 ! get the next nbnd/negrp data 2757 IF (negrp > 1) CALL mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, & 2758 inter_egrp_comm ) 2759 ! 2760 ENDDO ! iegrp 2761 ! 2762 ENDDO ! iqi 2763 ! 2764 ENDDO ! ikk 2765 ! 2766 DEALLOCATE( tempphic, temppsic, rhoc, fac, fac_tens, fac_stress ) 2767 ! 2768 CALL mp_sum( exx_stress_, intra_egrp_comm ) 2769 CALL mp_sum( exx_stress_, inter_egrp_comm ) 2770 CALL mp_sum( exx_stress_, inter_pool_comm ) 2771 ! 2772 exx_stress = exx_stress_ 2773 ! 2774 CALL change_data_structure( .FALSE. ) 2775 ! 2776 CALL stop_clock( 'exx_stress' ) 2777 ! 2778 END FUNCTION exx_stress 2779 ! 2780 ! 2781 !---------------------------------------------------------------------- 2782 SUBROUTINE compute_becpsi( npw_, igk_, q_, evc_exx, becpsi_k ) 2783 !---------------------------------------------------------------------- 2784 !! Calculates beta functions (Kleinman-Bylander projectors), with 2785 !! structure factor, for all atoms, in reciprocal space. 2786 !! FIXME: why so much replicated code? 2787 ! 2788 USE kinds, ONLY : DP 2789 USE ions_base, ONLY : nat, ntyp => nsp, ityp, tau 2790 USE cell_base, ONLY : tpiba, omega 2791 USE constants, ONLY : tpi 2792 USE gvect, ONLY : eigts1, eigts2, eigts3, mill, g 2793 USE wvfct, ONLY : npwx, nbnd 2794 USE us, ONLY : nqx, dq, tab, tab_d2y, spline_ps 2795 USE m_gth, ONLY : mk_ffnl_gth 2796 USE splinelib 2797 USE uspp, ONLY : nkb, nhtol, nhtolm, indv 2798 USE uspp_param, ONLY : upf, lmaxkb, nhm, nh 2799 USE becmod, ONLY : calbec 2800 USE mp_exx, ONLY : ibands, nibands, my_egrp_id 2801 ! 2802 IMPLICIT NONE 2803 ! 2804 INTEGER, INTENT(IN) :: npw_ 2805 !! number of PWs 2806 INTEGER, INTENT(IN) :: igk_(npw_) 2807 !! indices of G in the list of q+G vectors 2808 REAL(DP), INTENT(IN) :: q_(3) 2809 !! q vector (2pi/a units) 2810 COMPLEX(DP), INTENT(IN) :: evc_exx(npwx,nibands(my_egrp_id+1)) 2811 !! wavefunctions from the PW set to exx 2812 COMPLEX(DP), INTENT(OUT) :: becpsi_k(nkb,nibands(my_egrp_id+1)) 2813 !! <beta|psi> for k points 2814 ! 2815 ! ... local variables 2816 ! 2817 COMPLEX(DP) :: vkb_(npwx,1) !beta functions (npw_ <= npwx) 2818 ! 2819 INTEGER :: i0, i1, i2, i3, ig, lm, na, nt, nb, ih, jkb 2820 ! 2821 REAL(DP) :: px, ux, vx, wx, arg 2822 REAL(DP), ALLOCATABLE :: gk(:,:), qg(:), vq(:), ylm(:,:), vkb1(:,:) 2823 ! 2824 COMPLEX(DP) :: phase, pref 2825 COMPLEX(DP), ALLOCATABLE :: sk(:) 2826 ! 2827 REAL(DP), ALLOCATABLE :: xdata(:) 2828 INTEGER :: iq 2829 INTEGER :: istart, iend 2830 ! 2831 istart = ibands(1,my_egrp_id+1) 2832 iend = ibands(nibands(my_egrp_id+1),my_egrp_id+1) 2833 ! 2834 IF (lmaxkb < 0) RETURN 2835 ! 2836 ALLOCATE( vkb1(npw_,nhm) ) 2837 ALLOCATE( sk(npw_) ) 2838 ALLOCATE( qg(npw_) ) 2839 ALLOCATE( vq(npw_) ) 2840 ALLOCATE( ylm(npw_,(lmaxkb+1)**2) ) 2841 ALLOCATE( gk(3,npw_) ) 2842 ! 2843 ! write(*,'(3i4,i5,3f10.5)') size(tab,1), size(tab,2), size(tab,3), size(vq), q_ 2844 ! 2845 DO ig = 1, npw_ 2846 gk(1,ig) = q_(1) + g(1,igk_(ig)) 2847 gk(2,ig) = q_(2) + g(2,igk_(ig)) 2848 gk(3,ig) = q_(3) + g(3,igk_(ig)) 2849 qg(ig) = gk(1,ig)**2 + gk(2,ig)**2 + gk(3,ig)**2 2850 ENDDO 2851 ! 2852 CALL ylmr2( (lmaxkb+1)**2, npw_, gk, qg, ylm ) 2853 ! 2854 ! ... set now qg=|q+G| in atomic units 2855 ! 2856 DO ig = 1, npw_ 2857 qg(ig) = SQRT(qg(ig))*tpiba 2858 ENDDO 2859 ! 2860 IF (spline_ps) THEN 2861 ALLOCATE( xdata(nqx) ) 2862 DO iq = 1, nqx 2863 xdata(iq) = (iq - 1) * dq 2864 ENDDO 2865 ENDIF 2866 ! |beta_lm(q)> = (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q) 2867 jkb = 0 2868 ! 2869 DO nt = 1, ntyp 2870 ! ... calculate beta in G-space using an interpolation table: 2871 ! f_l(q)=\int _0 ^\infty dr r^2 f_l(r) j_l(q.r) 2872 DO nb = 1, upf(nt)%nbeta 2873 IF ( upf(nt)%is_gth ) THEN 2874 CALL mk_ffnl_gth( nt, nb, npw_, omega, qg, vq ) 2875 ELSE 2876 DO ig = 1, npw_ 2877 IF (spline_ps) THEN 2878 vq(ig) = splint(xdata, tab(:,nb,nt), tab_d2y(:,nb,nt), qg(ig)) 2879 ELSE 2880 px = qg (ig) / dq - INT(qg (ig) / dq) 2881 ux = 1.d0 - px 2882 vx = 2.d0 - px 2883 wx = 3.d0 - px 2884 i0 = INT( qg (ig) / dq ) + 1 2885 i1 = i0 + 1 2886 i2 = i0 + 2 2887 i3 = i0 + 3 2888 vq (ig) = tab (i0, nb, nt) * ux * vx * wx / 6.d0 + & 2889 tab (i1, nb, nt) * px * vx * wx / 2.d0 - & 2890 tab (i2, nb, nt) * px * ux * wx / 2.d0 + & 2891 tab (i3, nb, nt) * px * ux * vx / 6.d0 2892 ENDIF 2893 ENDDO 2894 ENDIF 2895 ! 2896 ! ... add spherical harmonic part (Y_lm(q)*f_l(q)) 2897 DO ih = 1, nh(nt) 2898 IF (nb == indv(ih,nt)) THEN 2899 !l = nhtol (ih,nt) 2900 lm = nhtolm(ih,nt) 2901 DO ig = 1, npw_ 2902 vkb1(ig,ih) = ylm(ig,lm) * vq(ig) 2903 ENDDO 2904 ENDIF 2905 ENDDO 2906 ! 2907 ENDDO 2908 ! 2909 ! ... vkb1 contains all betas including angular part for type nt 2910 ! now add the structure factor and factor (-i)^l 2911 ! 2912 DO na = 1, nat 2913 ! ordering: first all betas for atoms of type 1 2914 ! then all betas for atoms of type 2 and so on 2915 IF (ityp(na) == nt) THEN 2916 arg = (q_(1) * tau(1,na) + & 2917 q_(2) * tau(2,na) + & 2918 q_(3) * tau(3,na) ) * tpi 2919 phase = CMPLX(COS(arg), - SIN(arg), KIND=DP) 2920 DO ig = 1, npw_ 2921 sk (ig) = eigts1(mill(1,igk_(ig)), na) * & 2922 eigts2(mill(2,igk_(ig)), na) * & 2923 eigts3(mill(3,igk_(ig)), na) 2924 ENDDO 2925 ! 2926 DO ih = 1, nh (nt) 2927 jkb = jkb + 1 2928 pref = (0.d0, -1.d0)**nhtol(ih,nt) * phase 2929 DO ig = 1, npw_ 2930 vkb_(ig,1) = vkb1(ig,ih) * sk(ig) * pref 2931 ENDDO 2932 ! 2933 DO ig = npw_+1, npwx 2934 vkb_(ig, 1) = (0.0_DP, 0.0_DP) 2935 ENDDO 2936 ! 2937 CALL calbec( npw_, vkb_, evc_exx, becpsi_k(jkb:jkb,:), & 2938 nibands(my_egrp_id+1) ) 2939 ENDDO 2940 ! 2941 ENDIF 2942 ! 2943 ENDDO 2944 ENDDO 2945 ! 2946 DEALLOCATE( gk ) 2947 DEALLOCATE( ylm ) 2948 DEALLOCATE( vq ) 2949 DEALLOCATE( qg ) 2950 DEALLOCATE( sk ) 2951 DEALLOCATE( vkb1 ) 2952 ! 2953 RETURN 2954 ! 2955 END SUBROUTINE compute_becpsi 2956 ! 2957 ! 2958 !----------------------------------------------------------------------------- 2959 SUBROUTINE aceinit( DoLoc, exex ) 2960 !---------------------------------------------------------------------------- 2961 !! ACE Initialization 2962 ! 2963 USE wvfct, ONLY : nbnd, npwx, current_k 2964 USE klist, ONLY : nks, xk, ngk, igk_k 2965 USE uspp, ONLY : nkb, vkb, okvan 2966 USE becmod, ONLY : allocate_bec_type, deallocate_bec_type, & 2967 bec_type, calbec 2968 USE lsda_mod, ONLY : current_spin, lsda, isk 2969 USE io_files, ONLY : nwordwfc, iunwfc 2970 USE buffers, ONLY : get_buffer 2971 USE mp_pools, ONLY : inter_pool_comm 2972 USE mp_bands, ONLY : intra_bgrp_comm 2973 USE mp, ONLY : mp_sum 2974 USE wavefunctions, ONLY : evc 2975 ! 2976 IMPLICIT NONE 2977 ! 2978 LOGICAL, INTENT(IN) :: DoLoc 2979 !! if TRUE calculates exact exchange with SCDM orbitals 2980 REAL(DP), OPTIONAL, INTENT(OUT) :: exex 2981 !! ACE energy 2982 ! 2983 ! ... local variables 2984 ! 2985 REAL(DP) :: ee, eexx 2986 INTEGER :: ik, npw 2987 TYPE(bec_type) :: becpsi 2988 ! 2989 IF (nbndproj < x_nbnd_occ .OR. nbndproj > nbnd) THEN 2990 WRITE( stdout, '(3(A,I4))' ) ' occ = ', x_nbnd_occ, ' proj = ', nbndproj, & 2991 ' tot = ', nbnd 2992 CALL errore( 'aceinit', 'n_proj must be between occ and tot.', 1 ) 2993 ENDIF 2994 ! 2995 IF (.NOT. ALLOCATED(xi)) ALLOCATE( xi(npwx*npol,nbndproj,nks) ) 2996 IF ( okvan ) CALL allocate_bec_type( nkb, nbnd, becpsi ) 2997 ! 2998 eexx = 0.0d0 2999 xi = (0.0d0,0.0d0) 3000 ! 3001 DO ik = 1, nks 3002 npw = ngk(ik) 3003 current_k = ik 3004 IF ( lsda ) current_spin = isk(ik) 3005 IF ( nks > 1 ) CALL get_buffer( evc, nwordwfc, iunwfc, ik ) 3006 IF ( okvan ) THEN 3007 CALL init_us_2( npw, igk_k(1,ik), xk(:,ik), vkb ) 3008 CALL calbec( npw, vkb, evc, becpsi, nbnd ) 3009 ENDIF 3010 IF (gamma_only) THEN 3011 CALL aceinit_gamma( DoLoc, npw, nbnd, evc, xi(1,1,ik), becpsi, ee ) 3012 ELSE 3013 CALL aceinit_k( DoLoc, npw, nbnd, evc, xi(1,1,ik), becpsi, ee ) 3014 ENDIF 3015 eexx = eexx + ee 3016 ENDDO 3017 ! 3018 CALL mp_sum( eexx, inter_pool_comm ) 3019 ! WRITE(stdout,'(/,5X,"ACE energy",f15.8)') eexx 3020 ! 3021 IF (PRESENT(exex)) exex = eexx 3022 IF ( okvan ) CALL deallocate_bec_type( becpsi ) 3023 ! 3024 domat = .FALSE. 3025 ! 3026 END SUBROUTINE aceinit 3027 ! 3028 ! 3029 !--------------------------------------------------------------------------------- 3030 SUBROUTINE aceinit_gamma( DoLoc, nnpw, nbnd, phi, xitmp, becpsi, exxe ) 3031 !------------------------------------------------------------------------------- 3032 !! Compute xi(npw,nbndproj) for the ACE method. 3033 ! 3034 USE becmod, ONLY : bec_type 3035 USE lsda_mod, ONLY : current_spin 3036 USE mp, ONLY : mp_stop 3037 ! 3038 IMPLICIT NONE 3039 ! 3040 LOGICAL, INTENT(IN) :: DoLoc 3041 !! if TRUE calculates exact exchange with SCDM orbitals 3042 INTEGER :: nnpw 3043 !! number of pw 3044 INTEGER :: nbnd 3045 !! number of bands 3046 COMPLEX(DP) :: phi(nnpw,nbnd) 3047 !! wavefunction 3048 COMPLEX(DP) :: xitmp(nnpw,nbndproj) 3049 !! xi(npw,nbndproj) 3050 TYPE(bec_type), INTENT(IN) :: becpsi 3051 !! <beta|psi> 3052 REAL(DP) :: exxe 3053 !! exx energy 3054 ! 3055 ! ... local variables 3056 ! 3057 INTEGER :: nrxxs 3058 REAL(DP), ALLOCATABLE :: mexx(:,:) 3059 REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 3060 LOGICAL :: domat0 3061 ! 3062 CALL start_clock( 'aceinit' ) 3063 ! 3064 nrxxs = dfftt%nnr * npol 3065 ! 3066 ALLOCATE( mexx(nbndproj,nbndproj) ) 3067 xitmp = (Zero,Zero) 3068 mexx = Zero 3069 ! 3070 IF ( DoLoc ) then 3071 CALL vexx_loc( nnpw, nbndproj, xitmp, mexx ) 3072 CALL MatSymm( 'S', 'L', mexx,nbndproj ) 3073 ELSE 3074 ! |xi> = Vx[phi]|phi> 3075 CALL vexx( nnpw, nnpw, nbndproj, phi, xitmp, becpsi ) 3076 ! mexx = <phi|Vx[phi]|phi> 3077 CALL matcalc( 'exact', .TRUE., 0, nnpw, nbndproj, nbndproj, phi, xitmp, mexx, exxe ) 3078 ! |xi> = -One * Vx[phi]|phi> * rmexx^T 3079 ENDIF 3080 ! 3081 CALL aceupdate( nbndproj, nnpw, xitmp, mexx ) 3082 ! 3083 DEALLOCATE( mexx ) 3084 ! 3085 IF ( local_thr > 0.0d0 ) THEN 3086 domat0 = domat 3087 domat = .TRUE. 3088 CALL vexxace_gamma( nnpw, nbndproj, evc0(1,1,current_spin), exxe ) 3089 evc0(:,:,current_spin) = phi(:,:) 3090 domat = domat0 3091 ENDIF 3092 ! 3093 CALL stop_clock( 'aceinit' ) 3094 ! 3095 END SUBROUTINE aceinit_gamma 3096 ! 3097 ! 3098 !---------------------------------------------------------------------------------- 3099 SUBROUTINE vexxace_gamma( nnpw, nbnd, phi, exxe, vphi ) 3100 !------------------------------------------------------------------------------- 3101 !! Do the ACE potential and (optional) print the ACE matrix representation. 3102 ! 3103 USE wvfct, ONLY : current_k, wg 3104 USE lsda_mod, ONLY : current_spin 3105 ! 3106 IMPLICIT NONE 3107 ! 3108 INTEGER :: nnpw 3109 !! number of plane waves 3110 INTEGER :: nbnd 3111 !! number of bands 3112 COMPLEX(DP) :: phi(nnpw,nbnd) 3113 !! wave function 3114 REAL(DP) :: exxe 3115 !! exx energy 3116 COMPLEX(DP), OPTIONAL :: vphi(nnpw,nbnd) 3117 !! v times phi 3118 ! 3119 ! ... local variables 3120 ! 3121 INTEGER :: i, ik 3122 REAL*8, ALLOCATABLE :: rmexx(:,:) 3123 COMPLEX(DP),ALLOCATABLE :: cmexx(:,:), vv(:,:) 3124 REAL*8, PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 3125 ! 3126 CALL start_clock( 'vexxace' ) 3127 ! 3128 ALLOCATE( vv(nnpw,nbnd) ) 3129 ! 3130 IF (PRESENT(vphi)) THEN 3131 vv = vphi 3132 ELSE 3133 vv = (Zero,Zero) 3134 ENDIF 3135 ! 3136 ! do the ACE potential 3137 ALLOCATE( rmexx(nbndproj,nbnd), cmexx(nbndproj,nbnd) ) 3138 ! 3139 rmexx = Zero 3140 cmexx = (Zero,Zero) 3141 ! <xi|phi> 3142 CALL matcalc( '<xi|phi>', .FALSE. , 0, nnpw, nbndproj, nbnd, xi(1,1,current_k), & 3143 phi, rmexx, exxe ) 3144 ! |vv> = |vphi> + (-One) * |xi> * <xi|phi> 3145 cmexx = (One,Zero)*rmexx 3146 ! 3147 CALL ZGEMM( 'N', 'N', nnpw, nbnd, nbndproj, -(One,Zero), xi(1,1,current_k), & 3148 nnpw, cmexx, nbndproj, (One,Zero), vv, nnpw ) 3149 ! 3150 DEALLOCATE( cmexx, rmexx ) 3151 ! 3152 IF (domat) THEN 3153 ALLOCATE( rmexx(nbnd,nbnd) ) 3154 CALL matcalc( 'ACE', .TRUE., 0, nnpw, nbnd, nbnd, phi, vv, rmexx, exxe ) 3155 DEALLOCATE( rmexx ) 3156#if defined(__DEBUG) 3157 WRITE(stdout,'(4(A,I3),A,I9,A,f12.6)') 'vexxace: nbnd=', nbnd, ' nbndproj=',nbndproj, & 3158 ' k=',current_k,' spin=',current_spin,' npw=', & 3159 nnpw, ' E=',exxe 3160 ELSE 3161 WRITE(stdout,'(4(A,I3),A,I9)') 'vexxace: nbnd=', nbnd, ' nbndproj=',nbndproj, & 3162 ' k=',current_k,' spin=',current_spin,' npw=', & 3163 nnpw 3164#endif 3165 ENDIF 3166 ! 3167 IF (PRESENT(vphi)) vphi = vv 3168 DEALLOCATE( vv ) 3169 ! 3170 CALL stop_clock( 'vexxace' ) 3171 ! 3172 END SUBROUTINE vexxace_gamma 3173 ! 3174 ! 3175 !------------------------------------------------------------------------------------------- 3176 SUBROUTINE aceupdate( nbndproj, nnpw, xitmp, rmexx ) 3177 !---------------------------------------------------------------------------------------- 3178 !! Build the ACE operator from the potential amd matrix (rmexx is assumed symmetric 3179 !! and only the Lower Triangular part is considered). 3180 ! 3181 IMPLICIT NONE 3182 ! 3183 INTEGER :: nbndproj 3184 !! number of bands 3185 INTEGER :: nnpw 3186 !! number of PW 3187 COMPLEX(DP) :: xitmp(nnpw,nbndproj) 3188 !! xi(nnpw,nbndproj) 3189 REAL(DP) :: rmexx(nbndproj,nbndproj) 3190 !! |xi> = -One * Vx[phi]|phi> * rmexx^T 3191 ! 3192 ! ... local variables 3193 ! 3194 COMPLEX(DP), ALLOCATABLE :: cmexx(:,:) 3195 REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 3196 ! 3197 CALL start_clock( 'aceupdate' ) 3198 ! 3199 ! rmexx = -(Cholesky(rmexx))^-1 3200 rmexx = -rmexx 3201 ! CALL invchol( nbndproj, rmexx ) 3202 CALL MatChol( nbndproj, rmexx ) 3203 CALL MatInv( 'L', nbndproj, rmexx ) 3204 ! 3205 ! |xi> = -One * Vx[phi]|phi> * rmexx^T 3206 ALLOCATE( cmexx(nbndproj,nbndproj) ) 3207 cmexx = (One,Zero)*rmexx 3208 CALL ZTRMM( 'R', 'L', 'C', 'N', nnpw, nbndproj, (One,Zero), cmexx, nbndproj, xitmp, nnpw ) 3209 ! 3210 DEALLOCATE( cmexx ) 3211 ! 3212 CALL stop_clock( 'aceupdate' ) 3213 ! 3214 END SUBROUTINE 3215 ! 3216 ! 3217 !--------------------------------------------------------------------------------------------- 3218 SUBROUTINE aceinit_k( DoLoc, nnpw, nbnd, phi, xitmp, becpsi, exxe ) 3219 !----------------------------------------------------------------------------------------- 3220 !! Compute xi(npw,nbndproj) for the ACE method. 3221 ! 3222 USE becmod, ONLY : bec_type 3223 USE wvfct, ONLY : current_k, npwx 3224 USE klist, ONLY : wk 3225 USE noncollin_module, ONLY : npol 3226 ! 3227 IMPLICIT NONE 3228 ! 3229 LOGICAL, INTENT(IN) :: DoLoc 3230 !! if TRUE calculates exact exchange with SCDM orbitals 3231 INTEGER :: nnpw 3232 !! number of PW 3233 INTEGER :: nbnd 3234 !! number of bands 3235 COMPLEX(DP) :: phi(npwx*npol,nbnd) 3236 !! wave function 3237 COMPLEX(DP) :: xitmp(npwx*npol,nbndproj) 3238 !! xi(nnpw,nbndproj) 3239 TYPE(bec_type), INTENT(IN) :: becpsi 3240 !! <beta|psi> 3241 REAL(DP) :: exxe 3242 !! exx energy 3243 ! 3244 ! ... local variables 3245 ! 3246 COMPLEX(DP), ALLOCATABLE :: mexx(:,:) 3247 REAL(DP) :: exxe0 3248 REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 3249 INTEGER :: i 3250 LOGICAL :: domat0 3251 ! 3252 CALL start_clock( 'aceinit' ) 3253 ! 3254 IF (nbndproj>nbnd) CALL errore( 'aceinit_k', 'nbndproj greater than nbnd.', 1 ) 3255 IF (nbndproj<=0) CALL errore( 'aceinit_k', 'nbndproj le 0.', 1 ) 3256 ! 3257 ALLOCATE( mexx(nbndproj,nbndproj) ) 3258 xitmp = (Zero,Zero) 3259 mexx = (Zero,Zero) 3260 IF ( DoLoc ) THEN 3261 CALL vexx_loc_k( nnpw, nbndproj, xitmp, mexx, exxe ) 3262 CALL MatSymm_k( 'S', 'L', mexx, nbndproj ) 3263 ELSE 3264 ! |xi> = Vx[phi]|phi> 3265 CALL vexx( npwx, nnpw, nbndproj, phi, xitmp, becpsi ) 3266 ! mexx = <phi|Vx[phi]|phi> 3267 CALL matcalc_k( 'exact', .TRUE., 0, current_k, npwx*npol, nbndproj, nbndproj, & 3268 phi, xitmp, mexx, exxe ) 3269 ENDIF 3270#if defined(__DEBUG) 3271 WRITE( stdout,'(3(A,I3),A,I9,A,f12.6)') 'aceinit_k: nbnd=', nbnd, ' nbndproj=',nbndproj, & 3272 ' k=',current_k,' npw=',nnpw,' Ex(k)=',exxe 3273#endif 3274 ! Skip k-points that have exactly zero weight 3275 IF(wk(current_k)/=0._dp)THEN 3276 ! |xi> = -One * Vx[phi]|phi> * rmexx^T 3277 CALL aceupdate_k( nbndproj, nnpw, xitmp, mexx ) 3278 ENDIF 3279 ! 3280 DEALLOCATE( mexx ) 3281 ! 3282 IF ( DoLoc ) THEN 3283 domat0 = domat 3284 domat = .TRUE. 3285 CALL vexxace_k( nnpw, nbnd, evc0(1,1,current_k), exxe ) 3286 evc0(:,:,current_k) = phi(:,:) 3287 domat = domat0 3288 ENDIF 3289 ! 3290 CALL stop_clock( 'aceinit' ) 3291 ! 3292 END SUBROUTINE aceinit_k 3293 ! 3294 ! 3295 !------------------------------------------------------------------------------ 3296 SUBROUTINE aceupdate_k( nbndproj, nnpw, xitmp, mexx ) 3297 !---------------------------------------------------------------------------- 3298 !! Updates xi(npw,nbndproj) for the ACE method. 3299 ! 3300 USE wvfct, ONLY : npwx 3301 USE noncollin_module, ONLY : noncolin, npol 3302 ! 3303 IMPLICIT NONE 3304 ! 3305 INTEGER :: nbndproj 3306 !! number of bands 3307 INTEGER :: nnpw 3308 !! number of PW 3309 COMPLEX(DP) :: mexx(nbndproj,nbndproj) 3310 !! mexx = -(Cholesky(mexx))^-1 3311 COMPLEX(DP) :: xitmp(npwx*npol,nbndproj) 3312 !! |xi> = -One * Vx[phi]|phi> * mexx^T 3313 ! 3314 CALL start_clock( 'aceupdate' ) 3315 ! 3316 ! mexx = -(Cholesky(mexx))^-1 3317 mexx = -mexx 3318 CALL invchol_k( nbndproj, mexx ) 3319 ! 3320 ! |xi> = -One * Vx[phi]|phi> * mexx^T 3321 CALL ZTRMM( 'R', 'L', 'C', 'N', npwx*npol, nbndproj, (1.0_dp,0.0_dp), mexx,nbndproj, & 3322 xitmp, npwx*npol ) 3323 ! 3324 CALL stop_clock( 'aceupdate' ) 3325 ! 3326 END SUBROUTINE aceupdate_k 3327 ! 3328 ! 3329 !-------------------------------------------------------------------------------------- 3330 SUBROUTINE vexxace_k( nnpw, nbnd, phi, exxe, vphi ) 3331 !----------------------------------------------------------------------------------- 3332 !! Do the ACE potential and (optional) print the ACE matrix representation. 3333 ! 3334 USE becmod, ONLY : calbec 3335 USE wvfct, ONLY : current_k, npwx 3336 USE noncollin_module, ONLY : npol 3337 ! 3338 IMPLICIT NONE 3339 ! 3340 REAL(DP) :: exxe 3341 !! exx energy 3342 INTEGER :: nnpw 3343 !! number of PW 3344 INTEGER :: nbnd 3345 !! number of bands 3346 COMPLEX(DP) :: phi(npwx*npol,nbnd) 3347 !! wave function 3348 COMPLEX(DP), OPTIONAL :: vphi(npwx*npol,nbnd) 3349 !! ACE potential 3350 ! 3351 ! ... local variables 3352 ! 3353 INTEGER :: i 3354 COMPLEX(DP), ALLOCATABLE :: cmexx(:,:), vv(:,:) 3355 REAL*8, PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 3356 ! 3357 CALL start_clock( 'vexxace' ) 3358 ! 3359 ALLOCATE( vv(npwx*npol,nbnd) ) 3360 IF (PRESENT(vphi)) THEN 3361 vv = vphi 3362 ELSE 3363 vv = (Zero, Zero) 3364 ENDIF 3365 ! 3366 ! do the ACE potential! 3367 ALLOCATE( cmexx(nbndproj,nbnd) ) 3368 cmexx = (Zero,Zero) 3369 ! <xi|phi> 3370 CALL matcalc_k( '<xi|phi>', .FALSE., 0, current_k, npwx*npol, nbndproj, nbnd, & 3371 xi(1,1,current_k), phi, cmexx, exxe ) 3372 ! 3373 ! |vv> = |vphi> + (-One) * |xi> * <xi|phi>! 3374 CALL ZGEMM( 'N', 'N', npwx*npol, nbnd, nbndproj, -(One,Zero), xi(1,1,current_k), & 3375 npwx*npol, cmexx, nbndproj, (One,Zero), vv, npwx*npol ) 3376 ! 3377 IF (domat) THEN 3378 ! 3379 IF ( nbndproj /= nbnd) THEN 3380 DEALLOCATE( cmexx ) 3381 ALLOCATE( cmexx(nbnd,nbnd) ) 3382 ENDIF 3383 ! 3384 CALL matcalc_k( 'ACE', .TRUE., 0, current_k, npwx*npol, nbnd, nbnd, phi, vv, cmexx, exxe ) 3385 ! 3386#if defined(__DEBUG) 3387 WRITE(stdout,'(3(A,I3),A,I9,A,f12.6)') 'vexxace_k: nbnd=', nbnd, ' nbndproj=',nbndproj, & 3388 ' k=',current_k,' npw=',nnpw, ' Ex(k)=',exxe 3389 ELSE 3390 WRITE(stdout,'(3(A,I3),A,I9)') 'vexxace_k: nbnd=', nbnd, ' nbndproj=',nbndproj, & 3391 ' k=',current_k,' npw=',nnpw 3392#endif 3393 ENDIF 3394 ! 3395 IF (PRESENT(vphi)) vphi = vv 3396 DEALLOCATE( vv, cmexx ) 3397 ! 3398 CALL stop_clock( 'vexxace' ) 3399 ! 3400 END SUBROUTINE vexxace_k 3401 ! 3402 ! 3403 !--------------------------------------------------------------------------------- 3404 SUBROUTINE vexx_loc( npw, nbnd, hpsi, mexx ) 3405 !--------------------------------------------------------------------------------- 3406 !! Exact exchange with SCDM orbitals. 3407 !! Vx|phi> = Vx|psi> <psi|Vx|psi>^(-1) <psi|Vx|phi>. 3408 !! locmat contains localization integrals. 3409 ! 3410 USE noncollin_module, ONLY : npol 3411 USE cell_base, ONLY : omega, alat 3412 USE wvfct, ONLY : current_k 3413 USE klist, ONLY : xk, nks, nkstot 3414 USE fft_interfaces, ONLY : fwfft, invfft 3415 USE mp, ONLY : mp_stop, mp_barrier, mp_sum 3416 USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp 3417 USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, & 3418 g2_convolution 3419 ! 3420 IMPLICIT NONE 3421 ! 3422 INTEGER :: npw 3423 !! number of PW 3424 INTEGER :: nbnd 3425 !! number of bands 3426 COMPLEX(DP) :: hpsi(npw,nbnd) 3427 !! hpsi 3428 REAL(DP) :: mexx(nbnd,nbnd) 3429 !! mexx contains in output the exchange matrix 3430 ! 3431 ! ... local variables 3432 ! 3433 INTEGER :: nrxxs, npairs, ntot, NBands 3434 INTEGER :: ig, ir, ik, ikq, iq, ibnd, jbnd, kbnd, NQR 3435 INTEGER :: current_ik 3436 REAL(DP) :: exxe 3437 COMPLEX(DP), ALLOCATABLE :: rhoc(:), vc(:), RESULT(:,:) 3438 REAL(DP), ALLOCATABLE :: fac(:) 3439 REAL(DP) :: xkp(3), xkq(3) 3440 INTEGER, EXTERNAL :: global_kpoint_index 3441 ! 3442 WRITE( stdout, '(5X,A)' ) ' ' 3443 WRITE( stdout, '(5X,A)' ) 'Exact-exchange with localized orbitals' 3444 ! 3445 CALL start_clock( 'vexxloc' ) 3446 ! 3447 WRITE( stdout,'(7X,A,f24.12)' ) 'local_thr =', local_thr 3448 nrxxs = dfftt%nnr 3449 NQR = nrxxs*npol 3450 ! 3451 ! ... exchange projected onto localized orbitals 3452 WRITE( stdout,'(A)' ) 'Allocating exx quantities...' 3453 ALLOCATE( fac(dfftt%ngm) ) 3454 ALLOCATE( rhoc(nrxxs), vc(NQR) ) 3455 ALLOCATE( RESULT(nrxxs,nbnd) ) 3456 WRITE( stdout,'(A)' ) 'Allocations done.' 3457 ! 3458 current_ik = global_kpoint_index( nkstot, current_k ) 3459 xkp = xk(:,current_k) 3460 ! 3461 vc = (0.0d0, 0.0d0) 3462 npairs = 0 3463 ! 3464 DO iq = 1, nqs 3465 ikq = index_xkq(current_ik,iq) 3466 ik = index_xk(ikq) 3467 xkq = xkq_collect(:,ikq) 3468 ! 3469 CALL g2_convolution( dfftt%ngm, gt, xkp, xkq, fac ) 3470 ! 3471 RESULT = (0.0d0, 0.0d0) 3472 ! 3473 DO ibnd = 1, nbnd 3474 ! 3475 IF (x_occupation(ibnd,ikq) > 0.0d0) THEN 3476 ! 3477 DO ir = 1, NQR 3478 rhoc(ir) = locbuff(ir,ibnd,ikq) * locbuff(ir,ibnd,ikq) / omega 3479 ENDDO 3480 ! 3481 CALL fwfft( 'Rho', rhoc, dfftt ) 3482 ! 3483 vc = (0.0d0, 0.0d0) 3484 DO ig = 1, dfftt%ngm 3485 vc(dfftt%nl(ig)) = fac(ig) * rhoc(dfftt%nl(ig)) 3486 vc(dfftt%nlm(ig)) = fac(ig) * rhoc(dfftt%nlm(ig)) 3487 ENDDO 3488 ! 3489 CALL invfft( 'Rho', vc, dfftt ) 3490 ! 3491 DO ir = 1, NQR 3492 RESULT(ir,ibnd) = RESULT(ir,ibnd) + locbuff(ir,ibnd,ikq) * vc(ir) 3493 ENDDO 3494 ! 3495 ENDIF 3496 ! 3497 DO kbnd = 1, ibnd-1 3498 IF ( (locmat(ibnd,kbnd,ikq) > local_thr) .AND. & 3499 ( (x_occupation(ibnd,ikq) > 0.0d0) .OR. & 3500 (x_occupation(kbnd,ikq) > 0.0d0) ) ) THEN 3501 ! 3502 !write(stdout,'(3I4,3f12.6,A)') ikq, ibnd, kbnd, x_occupation(ibnd,ikq), & 3503 ! x_occupation(kbnd,ikq), locmat(ibnd,kbnd,ikq), ' IN ' 3504 ! 3505 DO ir = 1, NQR 3506 rhoc(ir) = locbuff(ir,ibnd,ikq) * locbuff(ir,kbnd,ikq) / omega 3507 ENDDO 3508 ! 3509 npairs = npairs + 1 3510 ! 3511 CALL fwfft( 'Rho', rhoc, dfftt ) 3512 ! 3513 vc = (0.0d0, 0.0d0) 3514 ! 3515 DO ig = 1, dfftt%ngm 3516 vc(dfftt%nl(ig)) = fac(ig) * rhoc(dfftt%nl(ig)) 3517 vc(dfftt%nlm(ig)) = fac(ig) * rhoc(dfftt%nlm(ig)) 3518 ENDDO 3519 ! 3520 CALL invfft( 'Rho', vc, dfftt ) 3521 ! 3522 DO ir = 1, NQR 3523 RESULT(ir,kbnd) = RESULT(ir,kbnd) + x_occupation(ibnd,ikq) * locbuff(ir,ibnd,ikq) * vc(ir) 3524 ENDDO 3525 ! 3526 DO ir = 1, NQR 3527 RESULT(ir,ibnd) = RESULT(ir,ibnd) + x_occupation(kbnd,ikq) * locbuff(ir,kbnd,ikq) * vc(ir) 3528 ENDDO 3529 ! ELSE 3530 ! write(stdout,'(3I4,3f12.6,A)') ikq, ibnd, kbnd, x_occupation(ibnd,ikq), & 3531 ! x_occupation(kbnd,ikq), locmat(ibnd,kbnd,ikq), ' OUT ' 3532 ENDIF 3533 ! 3534 ENDDO 3535 ! 3536 ENDDO 3537 ! 3538 DO jbnd = 1, nbnd 3539 ! 3540 CALL fwfft( 'Wave', RESULT(:,jbnd), dfftt ) 3541 ! 3542 DO ig = 1, npw 3543 hpsi(ig,jbnd) = hpsi(ig,jbnd) - exxalfa*RESULT(dfftt%nl(ig),jbnd) 3544 ENDDO 3545 ! 3546 ENDDO 3547 ! 3548 ENDDO 3549 ! 3550 DEALLOCATE( fac, vc ) 3551 DEALLOCATE( RESULT ) 3552 ! 3553 ! ... localized functions to G-space and exchange matrix onto localized functions 3554 ALLOCATE( RESULT(npw,nbnd) ) 3555 RESULT = (0.0d0,0.0d0) 3556 ! 3557 DO jbnd = 1, nbnd 3558 rhoc(:) = DBLE(locbuff(:,jbnd,ikq)) + (0.0d0,1.0d0)*0.0d0 3559 CALL fwfft( 'Wave' , rhoc, dfftt ) 3560 DO ig = 1, npw 3561 RESULT(ig,jbnd) = rhoc(dfftt%nl(ig)) 3562 ENDDO 3563 ENDDO 3564 ! 3565 DEALLOCATE( rhoc ) 3566 ! 3567 CALL matcalc( 'M1-', .TRUE., 0, npw, nbnd, nbnd, RESULT, hpsi, mexx, exxe ) 3568 ! 3569 DEALLOCATE( RESULT ) 3570 ! 3571 NBands = INT(SUM(x_occupation(:,ikq))) 3572 ntot = NBands * (NBands-1)/2 + NBands * (nbnd-NBands) 3573 WRITE( stdout,'(7X,2(A,I12),A,f12.2)') ' Pairs(full): ', ntot, & 3574 ' Pairs(included): ', npairs, & 3575 ' Pairs(%): ', DBLE(npairs)/DBLE(ntot)*100.0d0 3576 ! 3577 CALL stop_clock( 'vexxloc' ) 3578 ! 3579 END SUBROUTINE vexx_loc 3580 ! 3581 ! 3582 !---------------------------------------------------------------------------------------------- 3583 SUBROUTINE compute_density( DoPrint, Shift, CenterPBC, SpreadPBC, Overlap, PsiI, PsiJ, NQR, & 3584 ibnd, jbnd ) 3585 !------------------------------------------------------------------------------------------- 3586 !! Manipulate density: get pair density, center, spread, absolute overlap. 3587 !! Shift: 3588 !! .FALSE. refer the centers to the cell -L/2 ... +L/2; 3589 !! .TRUE. shift the centers to the cell 0 ... L (presumably the one given in input). 3590 ! 3591 USE constants, ONLY : pi, bohr_radius_angs 3592 USE cell_base, ONLY : alat, omega 3593 USE mp, ONLY : mp_sum 3594 USE mp_bands, ONLY : intra_bgrp_comm 3595 USE fft_types, ONLY : fft_index_to_3d 3596 ! 3597 IMPLICIT NONE 3598 ! 3599 LOGICAL, INTENT(IN) :: DoPrint 3600 !! whether to print or not the quantities 3601 LOGICAL, INTENT(IN) :: Shift 3602 !! .FALSE. Centers with respect to the minimum image cell convention; 3603 !! .TRUE. Centers shifted to the input cell. 3604 REAL(DP), INTENT(OUT) :: CenterPBC(3) 3605 REAL(DP), INTENT(OUT) :: SpreadPBC(3) 3606 REAL(DP), INTENT(OUT) :: Overlap 3607 INTEGER, INTENT(IN) :: NQR 3608 REAL(DP), INTENT(IN) :: PsiI(NQR) 3609 REAL(DP), INTENT(IN) :: PsiJ(NQR) 3610 INTEGER, INTENT(IN) :: ibnd 3611 INTEGER, INTENT(IN) :: jbnd 3612 ! 3613 ! ... local variables 3614 ! 3615 REAL(DP) :: vol, rbuff, TotSpread 3616 INTEGER :: ir, i, j, k 3617 LOGICAL :: offrange 3618 COMPLEX(DP) :: cbuff(3) 3619 REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0 3620 ! 3621 vol = omega / DBLE(dfftt%nr1 * dfftt%nr2 * dfftt%nr3) 3622 ! 3623 CenterPBC = Zero 3624 SpreadPBC = Zero 3625 Overlap = Zero 3626 cbuff = (Zero,Zero) 3627 rbuff = Zero 3628 ! 3629 DO ir = 1, dfftt%nr1x*dfftt%my_nr2p*dfftt%my_nr3p 3630 ! 3631 ! ... three dimensional indexes 3632 ! 3633 CALL fft_index_to_3d (ir, dfftt, i,j,k, offrange) 3634 IF ( offrange ) CYCLE 3635 ! 3636 rbuff = PsiI(ir) * PsiJ(ir) / omega 3637 Overlap = Overlap + ABS(rbuff)*vol 3638 cbuff(1) = cbuff(1) + rbuff*EXP((Zero,One)*Two*pi*DBLE(i)/DBLE(dfftt%nr1))*vol 3639 cbuff(2) = cbuff(2) + rbuff*EXP((Zero,One)*Two*pi*DBLE(j)/DBLE(dfftt%nr2))*vol 3640 cbuff(3) = cbuff(3) + rbuff*EXP((Zero,One)*Two*pi*DBLE(k)/DBLE(dfftt%nr3))*vol 3641 ENDDO 3642 ! 3643 CALL mp_sum( cbuff, intra_bgrp_comm ) 3644 CALL mp_sum( Overlap, intra_bgrp_comm ) 3645 ! 3646 CenterPBC(1) = alat/Two/pi*AIMAG( LOG(cbuff(1)) ) 3647 CenterPBC(2) = alat/Two/pi*AIMAG( LOG(cbuff(2)) ) 3648 CenterPBC(3) = alat/Two/pi*AIMAG( LOG(cbuff(3)) ) 3649 ! 3650 IF (Shift) THEN 3651 IF (CenterPBC(1) < Zero) CenterPBC(1) = CenterPBC(1) + alat 3652 IF (CenterPBC(2) < Zero) CenterPBC(2) = CenterPBC(2) + alat 3653 IF (CenterPBC(3) < Zero) CenterPBC(3) = CenterPBC(3) + alat 3654 ENDIF 3655 ! 3656 rbuff = DBLE(cbuff(1))**2 + AIMAG(cbuff(1))**2 3657 SpreadPBC(1) = -(alat/Two/pi)**2 * DLOG(rbuff) 3658 rbuff = DBLE(cbuff(2))**2 + AIMAG(cbuff(2))**2 3659 SpreadPBC(2) = -(alat/Two/pi)**2 * DLOG(rbuff) 3660 rbuff = DBLE(cbuff(3))**2 + AIMAG(cbuff(3))**2 3661 SpreadPBC(3) = -(alat/Two/pi)**2 * DLOG(rbuff) 3662 TotSpread = (SpreadPBC(1) + SpreadPBC(2) + SpreadPBC(3))*bohr_radius_angs**2 3663 ! 3664 IF (DoPrint) THEN 3665 WRITE(stdout,'(A,2I4)') 'MOs: ', ibnd, jbnd 3666 WRITE(stdout,'(A,10f12.6)') 'Absolute Overlap: ', Overlap 3667 WRITE(stdout,'(A,10f12.6)') 'Center(PBC)[A]: ', CenterPBC(1)*bohr_radius_angs, & 3668 CenterPBC(2)*bohr_radius_angs, CenterPBC(3)*bohr_radius_angs 3669 WRITE(stdout,'(A,10f12.6)') 'Spread [A**2]: ', SpreadPBC(1)*bohr_radius_angs**2, & 3670 SpreadPBC(2)*bohr_radius_angs**2, SpreadPBC(3)*bohr_radius_angs**2 3671 WRITE(stdout,'(A,10f12.6)') 'Total Spread [A**2]: ', TotSpread 3672 ENDIF 3673 ! 3674 IF (TotSpread < Zero) CALL errore( 'compute_density', 'Negative spread found', 1 ) 3675 ! 3676 END SUBROUTINE compute_density 3677 ! 3678 ! 3679 !------------------------------------------------------------------------------------ 3680 SUBROUTINE compute_density_k( DoPrint, Shift, CenterPBC, SpreadPBC, Overlap, PsiI, & 3681 PsiJ, NQR, ibnd, jbnd ) 3682 !---------------------------------------------------------------------------------- 3683 !! Manipulate density: get pair density, center, spread, absolute overlap. 3684 !! Shift: 3685 !! .FALSE. refer the centers to the cell -L/2 ... +L/2 3686 !! .TRUE. shift the centers to the cell 0 ... L (presumably the 3687 !! one given in input ) 3688 ! 3689 USE constants, ONLY : pi, bohr_radius_angs 3690 USE cell_base, ONLY : alat, omega 3691 USE mp, ONLY : mp_sum 3692 USE mp_bands, ONLY : intra_bgrp_comm 3693 USE fft_types, ONLY : fft_index_to_3d 3694 ! 3695 IMPLICIT NONE 3696 ! 3697 LOGICAL, INTENT(IN) :: DoPrint 3698 !! whether to print or not the quantities 3699 LOGICAL, INTENT(IN) :: Shift 3700 !! .FALSE. Centers with respect to the minimum image cell convention 3701 !! .TRUE. Centers shifted to the input cell 3702 REAL(DP), INTENT(OUT) :: CenterPBC(3) 3703 !! Coordinates of the center 3704 REAL(DP), INTENT(OUT) :: SpreadPBC(3) 3705 REAL(DP), INTENT(OUT) :: Overlap 3706 INTEGER, INTENT(IN) :: NQR 3707 COMPLEX(DP), INTENT(IN) :: PsiI(NQR) 3708 COMPLEX(DP), INTENT(IN) :: PsiJ(NQR) 3709 INTEGER, INTENT(IN) :: ibnd 3710 INTEGER, INTENT(IN) :: jbnd 3711 ! 3712 ! ... local variables 3713 ! 3714 REAL(DP) :: vol, TotSpread, rbuff 3715 INTEGER :: ir, i, j, k 3716 LOGICAL :: offrange 3717 COMPLEX(DP) :: cbuff(3) 3718 REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0 3719 ! 3720 vol = omega / DBLE(dfftt%nr1 * dfftt%nr2 * dfftt%nr3) 3721 ! 3722 CenterPBC = Zero 3723 SpreadPBC = Zero 3724 Overlap = Zero 3725 cbuff = (Zero, Zero) 3726 rbuff = Zero 3727 ! 3728 DO ir = 1, dfftt%nr1x*dfftt%my_nr2p*dfftt%my_nr3p 3729 ! 3730 ! ... three dimensional indexes 3731 ! 3732 CALL fft_index_to_3d (ir, dfftt, i,j,k, offrange) 3733 IF ( offrange ) CYCLE 3734 ! 3735 rbuff = ABS(PsiI(ir) * CONJG(PsiJ(ir)) / omega ) 3736 Overlap = Overlap + rbuff*vol 3737 cbuff(1) = cbuff(1) + rbuff*EXP((Zero,One)*Two*pi*DBLE(i)/DBLE(dfftt%nr1))*vol 3738 cbuff(2) = cbuff(2) + rbuff*EXP((Zero,One)*Two*pi*DBLE(j)/DBLE(dfftt%nr2))*vol 3739 cbuff(3) = cbuff(3) + rbuff*EXP((Zero,One)*Two*pi*DBLE(k)/DBLE(dfftt%nr3))*vol 3740 ENDDO 3741 ! 3742 CALL mp_sum( cbuff, intra_bgrp_comm ) 3743 CALL mp_sum( Overlap, intra_bgrp_comm ) 3744 ! 3745 CenterPBC(1) = alat/Two/pi*AIMAG(LOG(cbuff(1))) 3746 CenterPBC(2) = alat/Two/pi*AIMAG(LOG(cbuff(2))) 3747 CenterPBC(3) = alat/Two/pi*AIMAG(LOG(cbuff(3))) 3748 ! 3749 IF (Shift) THEN 3750 IF (CenterPBC(1) < Zero) CenterPBC(1) = CenterPBC(1) + alat 3751 IF (CenterPBC(2) < Zero) CenterPBC(2) = CenterPBC(2) + alat 3752 IF (CenterPBC(3) < Zero) CenterPBC(3) = CenterPBC(3) + alat 3753 ENDIF 3754 ! 3755 rbuff = DBLE(cbuff(1))**2 + AIMAG(cbuff(1))**2 3756 SpreadPBC(1) = -(alat/Two/pi)**2 * LOG(rbuff) 3757 rbuff = DBLE(cbuff(2))**2 + AIMAG(cbuff(2))**2 3758 SpreadPBC(2) = -(alat/Two/pi)**2 * LOG(rbuff) 3759 rbuff = DBLE(cbuff(3))**2 + AIMAG(cbuff(3))**2 3760 SpreadPBC(3) = -(alat/Two/pi)**2 * LOG(rbuff) 3761 TotSpread = (SpreadPBC(1) + SpreadPBC(2) + SpreadPBC(3))*bohr_radius_angs**2 3762 ! 3763 IF (DoPrint) THEN 3764 WRITE(stdout,'(A,2I4)') 'MOs: ', ibnd, jbnd 3765 WRITE(stdout,'(A,10f12.6)') 'Absolute Overlap: ', Overlap 3766 WRITE(stdout,'(A,10f12.6)') 'Center(PBC)[A]: ', CenterPBC(1)*bohr_radius_angs, & 3767 CenterPBC(2)*bohr_radius_angs, CenterPBC(3)*bohr_radius_angs 3768 WRITE(stdout,'(A,10f12.6)') 'Spread [A**2]: ', SpreadPBC(1)*bohr_radius_angs**2, & 3769 SpreadPBC(2)*bohr_radius_angs**2, SpreadPBC(3)*bohr_radius_angs**2 3770 WRITE(stdout,'(A,10f12.6)') 'Total Spread [A**2]: ', TotSpread 3771 ENDIF 3772 ! 3773 IF (TotSpread < Zero) CALL errore( 'compute_density_k', 'Negative spread found', 1 ) 3774 ! 3775 END SUBROUTINE compute_density_k 3776 ! 3777 ! 3778 !------------------------------------------------------------------------ 3779 SUBROUTINE vexx_loc_k( npw, NBands, hpsi, mexx, exxe ) 3780 !----------------------------------------------------------------------- 3781 !! Generic, k-point version of \(\texttt{vexx}\). 3782 ! 3783 USE cell_base, ONLY : omega 3784 USE gvect, ONLY : ngm, g 3785 USE wvfct, ONLY : current_k, npwx 3786 USE klist, ONLY : xk, nks, nkstot 3787 USE fft_interfaces, ONLY : fwfft, invfft 3788 USE exx_base, ONLY : index_xkq, nqs, index_xk, xkq_collect, & 3789 g2_convolution 3790 USE exx_band, ONLY : igk_exx 3791 ! 3792 IMPLICIT NONE 3793 ! 3794 INTEGER :: npw 3795 !! number of PW 3796 INTEGER :: NBands 3797 !! number of bands 3798 COMPLEX(DP) :: hpsi(npwx*npol,NBands) 3799 !! h psi 3800 COMPLEX(DP) :: mexx(NBands,NBands) 3801 !! mexx contains in output the exchange matrix 3802 REAL(DP) :: exxe 3803 !! exx energy 3804 ! 3805 ! ... local variables 3806 ! 3807 COMPLEX(DP), ALLOCATABLE :: RESULT(:), RESULT2(:,:) 3808 COMPLEX(DP), ALLOCATABLE :: rhoc(:), vc(:) 3809 REAL(DP), ALLOCATABLE :: fac(:) 3810 INTEGER :: ibnd, jbnd, ik, ikq, iq 3811 INTEGER :: ir, ig, NBin, NBtot 3812 INTEGER :: current_ik, current_jk 3813 INTEGER :: nrxxs 3814 REAL(DP) :: xkp(3) 3815 REAL(DP) :: xkq(3) 3816 ! 3817 INTEGER, EXTERNAL :: global_kpoint_index 3818 ! 3819 CALL start_clock( 'vexxloc' ) 3820 ! 3821 ALLOCATE( fac(dfftt%ngm) ) 3822 ! 3823 nrxxs = dfftt%nnr 3824 ! 3825 ALLOCATE( RESULT(nrxxs) ) 3826 ALLOCATE( rhoc(nrxxs), vc(nrxxs) ) 3827 ! 3828 current_ik = global_kpoint_index ( nkstot, current_k ) 3829 current_jk = index_xkq(current_ik,1) 3830 ! 3831 NBin = 0 3832 NBtot = 0 3833 xkp = xk(:,current_k) 3834 DO jbnd = 1, NBands 3835 RESULT = (0.0_DP, 0.0_DP) 3836 DO iq = 1, nqs 3837 ikq = index_xkq(current_ik,iq) 3838 ik = index_xk(ikq) 3839 xkq = xkq_collect(:,ikq) 3840 CALL g2_convolution( dfftt%ngm, gt, xkp, xkq, fac ) 3841 DO ibnd = 1, NBands 3842 ! IF ( abs(x_occupation(ibnd,ik)) < eps_occ) CYCLE 3843 ! 3844 NBtot = NBtot + 1 3845 IF ((exxmat(ibnd,ikq,jbnd,current_k) > local_thr).AND. & 3846 ((x_occupation(ibnd,ik) > eps_occ))) THEN 3847 NBin = NBin + 1 3848 ! 3849 ! write(stdout,'(4I4,f12.6,A)') ibnd, ikq, jbnd, current_k, exxmat(ibnd,ikq,jbnd,current_k), ' IN ' 3850!$omp parallel do default(shared), private(ir) 3851 DO ir = 1, nrxxs 3852 rhoc(ir)=CONJG(exxbuff(ir,ibnd,ikq))*exxbuff(ir,jbnd,current_jk) / omega 3853 ENDDO 3854!$omp end parallel do 3855 CALL fwfft( 'Rho', rhoc, dfftt ) 3856 vc = (0._DP, 0._DP) 3857!$omp parallel do default(shared), private(ig) 3858 DO ig = 1, dfftt%ngm 3859 vc(dfftt%nl(ig)) = & 3860 fac(ig) * rhoc(dfftt%nl(ig)) * x_occupation(ibnd,ik) / nqs 3861 ENDDO 3862!$omp end parallel do 3863 CALL invfft( 'Rho', vc, dfftt ) 3864!$omp parallel do default(shared), private(ir) 3865 DO ir = 1, nrxxs 3866 RESULT(ir) = RESULT(ir) + vc(ir)*exxbuff(ir,ibnd,ikq) 3867 ENDDO 3868!$omp end parallel do 3869! ELSE 3870! write(stdout,'(4I4,f12.6,A)') ibnd, ikq, jbnd, current_k, exxmat(ibnd,ikq,jbnd,current_k), ' OUT' 3871 ENDIF 3872 ENDDO 3873 ENDDO 3874 ! 3875 CALL fwfft( 'Wave', RESULT, dfftt ) 3876!$omp parallel do default(shared), private(ig) 3877 DO ig = 1, npw 3878 hpsi(ig,jbnd) = hpsi(ig,jbnd) - exxalfa*RESULT(dfftt%nl(igk_exx(ig,current_k))) 3879 ENDDO 3880!$omp end parallel do 3881 ENDDO 3882 ! 3883 DEALLOCATE( RESULT ) 3884 DEALLOCATE( vc, fac ) 3885 ! 3886 ! ... Localized functions to G-space and exchange matrix onto localized functions 3887 ALLOCATE( RESULT2(npwx,NBands) ) 3888 RESULT2 = (0.0d0,0.0d0) 3889 ! 3890 DO jbnd = 1, NBands 3891 rhoc(:) = exxbuff(:,jbnd,current_jk) 3892 CALL fwfft( 'Wave' , rhoc, dfftt ) 3893 DO ig = 1, npw 3894 RESULT2(ig,jbnd) = rhoc(dfftt%nl(igk_exx(ig,current_k))) 3895 ENDDO 3896 ENDDO 3897 ! 3898 DEALLOCATE( rhoc ) 3899 CALL matcalc_k( 'M1-', .TRUE., 0, current_k, npwx*npol, NBands, NBands, RESULT2, hpsi, mexx, exxe ) 3900 DEALLOCATE( RESULT2 ) 3901 ! 3902 WRITE(stdout,'(7X,2(A,I12),A,f12.2)') ' Pairs(full): ', NBtot, & 3903 ' Pairs(included): ', NBin, & 3904 ' Pairs(%): ', DBLE(NBin)/DBLE(NBtot)*100.0d0 3905 ! 3906 CALL stop_clock( 'vexxloc' ) 3907 ! 3908 END SUBROUTINE vexx_loc_k 3909 ! 3910 ! 3911END MODULE exx 3912