! Copyrigh(C) 2005-2018 Quantum ESPRESSO group ! This file is distributed under the terms of the ! GNU General Public License. See the file `License' ! in the root directory of the present distribution, ! or http://www.gnu.org/copyleft/gpl.txt . ! !----------------------------------------------------------------------------- MODULE exx !----------------------------------------------------------------------------- !! Variables and subroutines for calculation of exact-exchange contribution. !! Implements ACE: Lin Lin, J. Chem. Theory Comput. 2016, 12, 2242. !! Contains code for band parallelization over pairs of bands: see T. Barnes, !! T. Kurth, P. Carrier, N. Wichmann, D. Prendergast, P.R.C. Kent, J. Deslippe !! Computer Physics Communications 2017, dx.doi.org/10.1016/j.cpc.2017.01.008. ! USE kinds, ONLY : DP USE noncollin_module, ONLY : noncolin, npol USE io_global, ONLY : ionode, stdout ! USE control_flags, ONLY : gamma_only, tqr USE fft_types, ONLY : fft_type_descriptor USE stick_base, ONLY : sticks_map, sticks_map_deallocate ! IMPLICIT NONE ! SAVE ! ! ... general purpose vars ! REAL(DP):: exxalfa=0._DP !! the parameter multiplying the exact-exchange part REAL(DP), ALLOCATABLE :: x_occupation(:,:) !! the weights of auxiliary functions in the density matrix INTEGER :: x_nbnd_occ !! number of bands of auxiliary functions with at least !! some x_occupation > eps_occ REAL(DP), PARAMETER :: eps_occ = 1.d-8 !! occupation threshold COMPLEX(DP), ALLOCATABLE :: exxbuff(:,:,:) !! Buffers: temporary (complex) buffer for wfc storage REAL(DP), ALLOCATABLE :: locbuff(:,:,:) !! temporary (real) buffer for wfc storage REAL(DP), ALLOCATABLE :: locmat(:,:,:) !! buffer for matrix of localization integrals REAL(DP), ALLOCATABLE :: exxmat(:,:,:,:) !! buffer for matrix of localization integrals (K) ! #if defined(__USE_INTEL_HBM_DIRECTIVES) !DIR$ ATTRIBUTES FASTMEM :: exxbuff #elif defined(__USE_CRAY_HBM_DIRECTIVES) !DIR$ memory(bandwidth) exxbuff #endif ! LOGICAL :: use_ace !! if .TRUE. use Lin Lin's ACE method, if .FALSE. do not use ACE, !! use old algorithm instead COMPLEX(DP), ALLOCATABLE :: xi(:,:,:) !! ACE projectors COMPLEX(DP), ALLOCATABLE :: evc0(:,:,:) !! old wfc (G-space) needed to compute fock3 INTEGER :: nbndproj !! LOGICAL :: domat !! REAL(DP):: local_thr !! threshold for Lin Lin's SCDM localized orbitals: discard !! contribution to V_x if overlap between localized orbitals !! is smaller than "local_thr". ! ! ... energy related variables ! REAL(DP) :: fock0 = 0.0_DP !! sum REAL(DP) :: fock1 = 0.0_DP !! sum REAL(DP) :: fock2 = 0.0_DP !! sum REAL(DP) :: fock3 = 0.0_DP !! sum REAL(DP) :: dexx = 0.0_DP !! fock1 - 0.5*(fock2+fock0) ! ! ... custom fft grid and related G-vectors ! TYPE(fft_type_descriptor) :: dfftt LOGICAL :: exx_fft_initialized = .FALSE. REAL(kind=DP), DIMENSION(:), POINTER :: ggt => null() !! G-vectors in custom gri REAL(kind=DP), DIMENSION(:,:),POINTER :: gt => null() !! G-vectors in custom grid INTEGER :: gstart_t !! gstart_t=2 if ggt(1)=0, =1 otherwise INTEGER :: npwt !! number of plane waves in custom grid (Gamma-only) INTEGER :: ngmt_g !! Total number of G-vectors in custom grid REAL(DP) :: ecutfock !! energy cutoff for custom grid INTEGER :: ibnd_start = 0 !! starting band index used in bgrp parallelization INTEGER :: ibnd_end = 0 !! ending band index used in bgrp parallelization INTEGER :: ibnd_buff_start !! starting buffer index used in bgrp parallelization INTEGER :: ibnd_buff_end !! ending buffer index used in bgrp parallelization ! CONTAINS #define _CX(A) CMPLX(A,0._dp,kind=DP) #define _CY(A) CMPLX(0._dp,-A,kind=DP) ! !------------------------------------------------------------------------ SUBROUTINE exx_fft_create() !------------------------------------------------------------------------ !! Initialise the custom grid that allows to put the wavefunction !! onto the new (smaller) grid for \rho=\psi_{k+q}\psi^*_k and vice versa. !! Set up fft descriptors, including parallel stuff: sticks, planes, etc. ! USE gvecw, ONLY : ecutwfc USE gvect, ONLY : ecutrho, ngm, g, gg, gstart, mill USE cell_base, ONLY : at, bg, tpiba2 USE recvec_subs, ONLY : ggen, ggens USE fft_base, ONLY : smap USE fft_types, ONLY : fft_type_init USE symm_base, ONLY : fft_fact USE mp_exx, ONLY : nproc_egrp, negrp, intra_egrp_comm USE mp_bands, ONLY : nproc_bgrp, intra_bgrp_comm, nyfft ! USE klist, ONLY : nks, xk USE mp_pools, ONLY : inter_pool_comm USE mp, ONLY : mp_max, mp_sum ! USE control_flags, ONLY : tqr USE realus, ONLY : qpointlist, tabxx, tabp USE exx_band, ONLY : smap_exx USE command_line_options, ONLY : nmany_ ! IMPLICIT NONE ! ! ... local variables ! INTEGER :: ik, ngmt INTEGER, ALLOCATABLE :: ig_l2gt(:), millt(:,:) INTEGER, EXTERNAL :: n_plane_waves REAL(DP) :: gkcut, gcutmt LOGICAL :: lpara ! IF ( exx_fft_initialized ) RETURN ! ! Initialise the custom grid that allows us to put the wavefunction ! onto the new (smaller) grid for \rho=\psi_{k+q}\psi^*_k and vice versa ! ! gkcut is such that all |k+G|^2 < gkcut (in units of (2pi/a)^2) ! Note that with k-points, gkcut > ecutwfc/(2pi/a)^2 ! gcutmt is such that |q+G|^2 < gcutmt ! IF ( gamma_only ) THEN gkcut = ecutwfc / tpiba2 gcutmt = ecutfock / tpiba2 ELSE gkcut = 0.0_DP DO ik = 1, nks gkcut = MAX(gkcut, SQRT(SUM(xk(:,ik)**2))) ENDDO CALL mp_max( gkcut, inter_pool_comm ) ! Alternatively, variable "qnorm" earlier computed in "exx_grid_init" ! could be used as follows: ! gkcut = ( SQRT(ecutwfc/tpiba2) + qnorm )**2 gkcut = ( SQRT(ecutwfc/tpiba2) + gkcut )**2 ! ! ... the following instruction may be needed if ecutfock \simeq ecutwfc ! and guarantees that all k+G are included ! gcutmt = MAX(ecutfock/tpiba2, gkcut) ENDIF ! ! ... set up fft descriptors, including parallel stuff: sticks, planes, etc. ! IF (negrp == 1) THEN ! ! ... no band parallelization: exx grid is a subgrid of general grid ! lpara = ( nproc_bgrp > 1 ) CALL fft_type_init( dfftt, smap, "rho", gamma_only, lpara, & intra_bgrp_comm, at, bg, gcutmt, gcutmt/gkcut, & fft_fact=fft_fact, nyfft=nyfft, nmany=nmany_ ) CALL ggens( dfftt, gamma_only, at, g, gg, mill, gcutmt, ngmt, gt, ggt ) gstart_t = gstart npwt = n_plane_waves(ecutwfc/tpiba2, nks, xk, gt, ngmt) ngmt_g = ngmt CALL mp_sum( ngmt_g, intra_bgrp_comm ) ! ELSE ! WRITE( 6, "(5X,'Exchange parallelized over bands (',i4,' band groups)')" ) & negrp lpara = ( nproc_egrp > 1 ) CALL fft_type_init( dfftt, smap_exx, "rho", gamma_only, lpara, & intra_egrp_comm, at, bg, gcutmt, gcutmt/gkcut, & fft_fact=fft_fact, nyfft=nyfft, nmany=nmany_ ) ngmt = dfftt%ngm ngmt_g = ngmt CALL mp_sum( ngmt_g, intra_egrp_comm ) ALLOCATE( gt(3,dfftt%ngm) ) ALLOCATE( ggt(dfftt%ngm) ) ALLOCATE( millt(3,dfftt%ngm) ) ALLOCATE( ig_l2gt(dfftt%ngm) ) ! CALL ggen( dfftt, gamma_only, at, bg, gcutmt, ngmt_g, ngmt, & gt, ggt, millt, ig_l2gt, gstart_t ) ! DEALLOCATE( ig_l2gt ) DEALLOCATE( millt ) npwt = n_plane_waves( ecutwfc/tpiba2, nks, xk, gt, ngmt ) ! ENDIF ! define clock labels (this enables the corresponding fft too) dfftt%rho_clock_label = 'fftc' ; dfftt%wave_clock_label = 'fftcw' ! WRITE( stdout, '(/5x,"EXX grid: ",i8," G-vectors", 5x, & & "FFT dimensions: (",i4,",",i4,",",i4,")")') ngmt_g, & & dfftt%nr1, dfftt%nr2, dfftt%nr3 ! exx_fft_initialized = .TRUE. ! IF (tqr) THEN IF (ecutfock == ecutrho) THEN WRITE( stdout, '(5x,"Real-space augmentation: EXX grid -> DENSE grid")' ) tabxx => tabp ELSE WRITE( stdout, '(5x,"Real-space augmentation: initializing EXX grid")' ) CALL qpointlist( dfftt, tabxx ) ENDIF ENDIF ! RETURN ! END SUBROUTINE exx_fft_create ! ! !------------------------------------------------------------------------ SUBROUTINE exx_gvec_reinit( at_old ) !---------------------------------------------------------------------- !! Re-initialize g-vectors after rescaling. ! USE cell_base, ONLY : bg ! IMPLICIT NONE ! REAL(DP), INTENT(IN) :: at_old(3,3) !! the lattice vectors at the previous step ! ! ... local variables ! INTEGER :: ig REAL(DP) :: gx, gy, gz ! ! ... rescale g-vectors ! CALL cryst_to_cart( dfftt%ngm, gt, at_old, -1 ) CALL cryst_to_cart( dfftt%ngm, gt, bg, +1 ) ! DO ig = 1, dfftt%ngm gx = gt(1,ig) gy = gt(2,ig) gz = gt(3,ig) ggt(ig) = gx*gx + gy*gy + gz*gz ENDDO ! END SUBROUTINE exx_gvec_reinit ! ! !------------------------------------------------------------------------ SUBROUTINE deallocate_exx() !------------------------------------------------------------------------ !! Deallocates exx objects. ! USE becmod, ONLY : deallocate_bec_type, is_allocated_bec_type, bec_type USE us_exx, ONLY : becxx USE exx_base, ONLY : xkq_collect, index_xkq, index_xk, index_sym, rir, & working_pool, exx_grid_initialized ! IMPLICIT NONE ! INTEGER :: ikq ! exx_grid_initialized = .FALSE. ! IF ( ALLOCATED(index_xkq) ) DEALLOCATE( index_xkq ) IF ( ALLOCATED(index_xk ) ) DEALLOCATE( index_xk ) IF ( ALLOCATED(index_sym) ) DEALLOCATE( index_sym ) IF ( ALLOCATED(rir) ) DEALLOCATE( rir ) IF ( ALLOCATED(x_occupation) ) DEALLOCATE( x_occupation ) IF ( ALLOCATED(xkq_collect ) ) DEALLOCATE( xkq_collect ) IF ( ALLOCATED(exxbuff) ) DEALLOCATE( exxbuff ) IF ( ALLOCATED(locbuff) ) DEALLOCATE( locbuff ) IF ( ALLOCATED(locmat) ) DEALLOCATE( locmat ) IF ( ALLOCATED(exxmat) ) DEALLOCATE( exxmat ) IF ( ALLOCATED(xi) ) DEALLOCATE( xi ) IF ( ALLOCATED(evc0) ) DEALLOCATE( evc0 ) ! IF ( ALLOCATED(becxx) ) THEN DO ikq = 1, SIZE(becxx) IF (is_allocated_bec_type(becxx(ikq))) CALL deallocate_bec_type( becxx(ikq) ) ENDDO ! DEALLOCATE( becxx ) ENDIF ! IF ( ALLOCATED(working_pool) ) DEALLOCATE( working_pool ) ! exx_fft_initialized = .FALSE. IF ( ASSOCIATED(gt) ) DEALLOCATE( gt ) IF ( ASSOCIATED(ggt) ) DEALLOCATE( ggt ) ! END SUBROUTINE deallocate_exx ! ! !------------------------------------------------------------------------ SUBROUTINE exxinit( DoLoc ) !------------------------------------------------------------------------ !! This subroutine is run before the first H_psi() of each iteration. !! It saves the wavefunctions for the right density matrix, in real space. ! USE wavefunctions, ONLY : evc USE io_files, ONLY : nwordwfc, iunwfc_exx USE buffers, ONLY : get_buffer USE wvfct, ONLY : nbnd, npwx, wg, current_k USE klist, ONLY : ngk, nks, nkstot, xk, wk, igk_k USE symm_base, ONLY : nsym, s, sr USE mp_pools, ONLY : npool, nproc_pool, me_pool, inter_pool_comm USE mp_exx, ONLY : me_egrp, negrp, init_index_over_band, & my_egrp_id, inter_egrp_comm, & intra_egrp_comm, iexx_start, iexx_end, & all_start, all_end USE mp, ONLY : mp_sum, mp_bcast USE funct, ONLY : get_exx_fraction, start_exx,exx_is_active, & get_screening_parameter, get_gau_parameter USE scatter_mod, ONLY : gather_grid, scatter_grid USE fft_interfaces, ONLY : invfft USE uspp, ONLY : nkb, vkb, okvan USE us_exx, ONLY : rotate_becxx USE paw_variables, ONLY : okpaw USE paw_exx, ONLY : PAW_init_fock_kernel USE mp_orthopools, ONLY : intra_orthopool_comm USE exx_base, ONLY : nkqs, xkq_collect, index_xk, index_sym, & exx_set_symm, rir, working_pool, exxdiv, & erfc_scrlen, gau_scrlen, exx_divergence USE exx_band, ONLY : change_data_structure, nwordwfc_exx, & transform_evc_to_exx, igk_exx, evc_exx ! IMPLICIT NONE ! LOGICAL :: DoLoc !! TRUE: Real Array locbuff(ir, nbnd, nkqs); !! FALSE: Complex Array exxbuff(ir, nbnd/2, nkqs). ! ! ... local variables ! INTEGER :: ik, ibnd, i, j, k, ir, isym, ikq, ig INTEGER :: ibnd_loop_start INTEGER :: ipol, jpol REAL(DP), ALLOCATABLE :: occ(:,:) COMPLEX(DP),ALLOCATABLE :: temppsic(:) #if defined(__USE_INTEL_HBM_DIRECTIVES) !DIR$ ATTRIBUTES FASTMEM :: temppsic #elif defined(__USE_CRAY_HBM_DIRECTIVES) !DIR$ memory(bandwidth) temppsic #endif COMPLEX(DP),ALLOCATABLE :: temppsic_nc(:,:), psic_nc(:,:) COMPLEX(DP),ALLOCATABLE :: psic_exx(:) INTEGER :: nxxs, nrxxs #if defined(__MPI) COMPLEX(DP),ALLOCATABLE :: temppsic_all(:), psic_all(:) COMPLEX(DP), ALLOCATABLE :: temppsic_all_nc(:,:), psic_all_nc(:,:) #endif COMPLEX(DP) :: d_spin(2,2,48) INTEGER :: npw, current_ik INTEGER, EXTERNAL :: global_kpoint_index INTEGER :: ibnd_start_new, ibnd_end_new, max_buff_bands_per_egrp INTEGER :: ibnd_exx, evc_offset ! CALL start_clock ('exxinit') IF ( Doloc ) THEN WRITE(stdout,'(/,5X,"Using localization algorithm with threshold: ",& & D10.2)') local_thr ! IF (.NOT.gamma_only) CALL errore('exxinit','SCDM with K-points NYI',1) IF (okvan .OR. okpaw) CALL errore( 'exxinit','SCDM with USPP/PAW not & &implemented', 1 ) ENDIF IF ( use_ace ) & WRITE(stdout,'(/,5X,"Using ACE for calculation of exact exchange")') ! CALL transform_evc_to_exx( 2 ) ! ! ... prepare the symmetry matrices for the spin part ! IF (noncolin) THEN DO isym = 1, nsym CALL find_u( sr(:,:,isym), d_spin(:,:,isym) ) ENDDO ENDIF ! CALL exx_fft_create() ! ! Note that nxxs is not the same as nrxxs in parallel case nxxs = dfftt%nr1x * dfftt%nr2x * dfftt%nr3x nrxxs = dfftt%nnr #if defined(__MPI) IF (noncolin) THEN ALLOCATE( psic_all_nc(nxxs,npol), temppsic_all_nc(nxxs,npol) ) ELSEIF ( .NOT. gamma_only ) THEN ALLOCATE( psic_all(nxxs), temppsic_all(nxxs) ) ENDIF #endif IF (noncolin) THEN ALLOCATE( temppsic_nc(nrxxs, npol), psic_nc(nrxxs, npol) ) ELSEIF ( .NOT. gamma_only ) THEN ALLOCATE( temppsic(nrxxs) ) ENDIF ! ALLOCATE( psic_exx(nrxxs) ) ! IF (.NOT.exx_is_active()) THEN ! erfc_scrlen = get_screening_parameter() gau_scrlen = get_gau_parameter() exxdiv = exx_divergence() exxalfa = get_exx_fraction() ! CALL start_exx() ENDIF ! IF (.NOT. gamma_only) CALL exx_set_symm( dfftt%nr1, dfftt%nr2, dfftt%nr3, & dfftt%nr1x, dfftt%nr2x, dfftt%nr3x ) ! set occupations of wavefunctions used in the calculation of exchange term IF (.NOT. ALLOCATED(x_occupation)) ALLOCATE( x_occupation(nbnd,nkstot) ) ALLOCATE( occ(nbnd,nks) ) ! DO ik = 1, nks IF (ABS(wk(ik)) > eps_occ) THEN occ(1:nbnd,ik) = wg(1:nbnd,ik) / wk(ik) ELSE occ(1:nbnd,ik) = 0._DP ENDIF ENDDO ! CALL poolcollect( nbnd, nks, occ, nkstot, x_occupation ) ! DEALLOCATE( occ ) ! ! ... find an upper bound to the number of bands with non zero occupation. ! Useful to distribute bands among band groups ! x_nbnd_occ = 0 DO ik = 1, nkstot DO ibnd = MAX(1,x_nbnd_occ), nbnd IF (ABS(x_occupation(ibnd,ik)) > eps_occ) x_nbnd_occ = ibnd ENDDO ENDDO ! IF (nbndproj == 0) nbndproj = nbnd ! CALL divide( inter_egrp_comm, x_nbnd_occ, ibnd_start, ibnd_end ) CALL init_index_over_band( inter_egrp_comm, nbnd, nbnd ) ! ! ... this will cause exxbuff to be calculated for every band ibnd_start_new = iexx_start ibnd_end_new = iexx_end ! IF ( gamma_only ) THEN ibnd_buff_start = (ibnd_start_new+1)/2 ibnd_buff_end = (ibnd_end_new+1)/2 max_buff_bands_per_egrp = MAXVAL((all_end(:)+1)/2-(all_start(:)+1)/2)+1 ELSE ibnd_buff_start = ibnd_start_new ibnd_buff_end = ibnd_end_new max_buff_bands_per_egrp = MAXVAL(all_end(:)-all_start(:))+1 ENDIF ! IF (DoLoc) THEN ! IF (gamma_only) THEN IF (.NOT. ALLOCATED(locbuff)) ALLOCATE( locbuff(nrxxs*npol,nbnd,nks) ) IF (.NOT. ALLOCATED(locmat)) ALLOCATE( locmat(nbnd,nbnd,nks) ) locbuff = 0.0d0 locmat = 0.0d0 ELSE IF (.NOT. ALLOCATED(exxbuff)) ALLOCATE( exxbuff(nrxxs*npol,nbnd,nkqs) ) IF (.NOT. ALLOCATED(exxmat) ) ALLOCATE( exxmat(nbnd,nkqs,nbnd,nks) ) exxbuff = (0.0d0, 0.0d0) exxmat = 0.0d0 ENDIF ! IF (.NOT. ALLOCATED(evc0)) then ALLOCATE( evc0(npwx*npol,nbndproj,nks) ) evc0 = (0.0d0,0.0d0) ENDIF ! ELSE ! IF (.NOT. ALLOCATED(exxbuff)) THEN IF (gamma_only) THEN ALLOCATE( exxbuff(nrxxs*npol,ibnd_buff_start:ibnd_buff_start + & max_buff_bands_per_egrp-1,nkqs) ) ! THIS WORKS as for k ELSE ALLOCATE( exxbuff(nrxxs*npol,ibnd_buff_start:ibnd_buff_start + & max_buff_bands_per_egrp-1,nkqs) ) ENDIF ENDIF ENDIF ! !assign buffer IF(DoLoc) THEN IF(gamma_only) THEN !$omp parallel do collapse(3) default(shared) firstprivate(npol,nrxxs,nkqs, & !$omp ibnd_buff_start,ibnd_buff_end) private(ir,ibnd,ikq,ipol) DO ikq=1,SIZE(locbuff,3) DO ibnd=1, x_nbnd_occ DO ir=1,nrxxs*npol locbuff(ir,ibnd,ikq)=0.0_DP ENDDO ENDDO ENDDO ENDIF ELSE !$omp parallel do collapse(3) default(shared) firstprivate(npol,nrxxs,nkqs, & !$omp ibnd_buff_start,ibnd_buff_end) private(ir,ibnd,ikq,ipol) DO ikq = 1, SIZE(exxbuff,3) DO ibnd = ibnd_buff_start, ibnd_buff_end DO ir = 1, nrxxs*npol exxbuff(ir,ibnd,ikq) = (0.0_DP,0.0_DP) ENDDO ENDDO ENDDO ! the above loops will replaced with the following line soon !CALL threaded_memset(exxbuff, 0.0_DP, nrxxs*npol*SIZE(exxbuff,2)*nkqs*2) ENDIF ! ! ... This is parallelized over pools. Each pool computes only its k-points ! KPOINTS_LOOP : & DO ik = 1, nks ! IF ( nks > 1 ) CALL get_buffer( evc_exx, nwordwfc_exx, iunwfc_exx, ik ) ! ! ik = index of k-point in this pool ! current_ik = index of k-point over all pools ! current_ik = global_kpoint_index( nkstot, ik ) ! IF_GAMMA_ONLY : & IF (gamma_only) THEN ! IF (MOD(iexx_start,2) == 0) THEN ibnd_loop_start = iexx_start-1 ELSE ibnd_loop_start = iexx_start ENDIF ! evc_offset = 0 DO ibnd = ibnd_loop_start, iexx_end, 2 ! psic_exx(:) = ( 0._DP, 0._DP ) ! IF ( ibnd < iexx_end ) THEN IF ( ibnd == ibnd_loop_start .AND. MOD(iexx_start,2) == 0 ) THEN DO ig = 1, npwt psic_exx(dfftt%nl(ig)) = ( 0._DP, 1._DP )*evc_exx(ig,1) psic_exx(dfftt%nlm(ig)) = ( 0._DP, 1._DP )*CONJG(evc_exx(ig,1)) ENDDO evc_offset = -1 ELSE DO ig = 1, npwt psic_exx(dfftt%nl(ig)) = evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+1) & + ( 0._DP, 1._DP ) * evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+2) psic_exx(dfftt%nlm(ig)) = CONJG( evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+1) ) & + ( 0._DP, 1._DP ) * CONJG( evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+2) ) ENDDO ENDIF ELSE DO ig=1,npwt psic_exx(dfftt%nl (ig)) = evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+1) psic_exx(dfftt%nlm(ig)) = CONJG( evc_exx(ig,ibnd-ibnd_loop_start+evc_offset+1) ) ENDDO ENDIF ! CALL invfft( 'Wave', psic_exx, dfftt ) ! IF (DoLoc) THEN locbuff(1:nrxxs,ibnd-ibnd_loop_start+evc_offset+1,ik) = DBLE( psic_exx(1:nrxxs) ) IF (ibnd-ibnd_loop_start+evc_offset+2 <= nbnd) & locbuff(1:nrxxs,ibnd-ibnd_loop_start+evc_offset+2,ik) = AIMAG( psic_exx(1:nrxxs) ) ELSE exxbuff(1:nrxxs,(ibnd+1)/2,current_ik)=psic_exx(1:nrxxs) ENDIF ! ENDDO ! ELSE IF_GAMMA_ONLY ! npw = ngk (ik) IBND_LOOP_K : & DO ibnd = iexx_start, iexx_end ! ibnd_exx = ibnd IF (noncolin) THEN !$omp parallel do default(shared) private(ir) firstprivate(nrxxs) DO ir = 1, nrxxs temppsic_nc(ir,1) = ( 0._DP, 0._DP ) temppsic_nc(ir,2) = ( 0._DP, 0._DP ) ENDDO !$omp parallel do default(shared) private(ig) firstprivate(npw,ik,ibnd_exx) DO ig = 1, npw temppsic_nc(dfftt%nl(igk_exx(ig,ik)),1) = evc_exx(ig,ibnd-iexx_start+1) ENDDO !$omp end parallel do CALL invfft( 'Wave', temppsic_nc(:,1), dfftt ) !$omp parallel do default(shared) private(ig) firstprivate(npw,ik,ibnd_exx,npwx) DO ig = 1, npw temppsic_nc(dfftt%nl(igk_exx(ig,ik)),2) = evc_exx(ig+npwx,ibnd-iexx_start+1) ENDDO !$omp end parallel do CALL invfft( 'Wave', temppsic_nc(:,2), dfftt ) ELSE !$omp parallel do default(shared) private(ir) firstprivate(nrxxs) DO ir = 1, nrxxs temppsic(ir) = ( 0._DP, 0._DP ) ENDDO !$omp parallel do default(shared) private(ig) firstprivate(npw,ik,ibnd_exx) DO ig = 1, npw temppsic(dfftt%nl(igk_exx(ig,ik))) = evc_exx(ig,ibnd-iexx_start+1) ENDDO !$omp end parallel do CALL invfft( 'Wave', temppsic, dfftt ) ENDIF ! DO ikq = 1, nkqs ! IF (index_xk(ikq) /= current_ik) CYCLE isym = ABS(index_sym(ikq) ) ! IF (noncolin) THEN ! noncolinear #if defined(__MPI) DO ipol = 1, npol CALL gather_grid( dfftt, temppsic_nc(:,ipol), temppsic_all_nc(:,ipol) ) ENDDO ! IF ( me_egrp == 0 ) THEN !$omp parallel do default(shared) private(ir) firstprivate(npol,nxxs) DO ir = 1, nxxs !DIR$ UNROLL_AND_JAM (2) DO ipol = 1, npol psic_all_nc(ir,ipol) = (0.0_DP, 0.0_DP) ENDDO ENDDO !$omp end parallel do !$omp parallel do default(shared) private(ir) firstprivate(npol,isym,nxxs) reduction(+:psic_all_nc) DO ir = 1, nxxs !DIR$ UNROLL_AND_JAM (4) DO ipol = 1, npol DO jpol = 1, npol psic_all_nc(ir,ipol) = psic_all_nc(ir,ipol) + & CONJG(d_spin(jpol,ipol,isym)) * & temppsic_all_nc(rir(ir,isym),jpol) ENDDO ENDDO ENDDO !$omp end parallel do ENDIF ! DO ipol = 1, npol CALL scatter_grid( dfftt, psic_all_nc(:,ipol), psic_nc(:,ipol) ) ENDDO #else !$omp parallel do default(shared) private(ir) firstprivate(npol,nxxs) DO ir = 1, nxxs !DIR$ UNROLL_AND_JAM (2) DO ipol = 1, npol psic_nc(ir,ipol) = (0._DP,0._DP) ENDDO ENDDO !$omp end parallel do !$omp parallel do default(shared) private(ipol,jpol,ir) firstprivate(npol,isym,nxxs) reduction(+:psic_nc) DO ir = 1, nxxs !DIR$ UNROLL_AND_JAM (4) DO ipol = 1, npol DO jpol = 1, npol psic_nc(ir,ipol) = psic_nc(ir,ipol) + CONJG(d_spin(jpol,ipol,isym))* & temppsic_nc(rir(ir,isym),jpol) ENDDO ENDDO ENDDO !$omp end parallel do #endif IF (index_sym(ikq) > 0 ) THEN ! sym. op. without time reversal: normal case !$omp parallel do default(shared) private(ir) firstprivate(ibnd,isym,ikq) DO ir = 1, nrxxs exxbuff(ir,ibnd,ikq) = psic_nc(ir,1) exxbuff(ir+nrxxs,ibnd,ikq) = psic_nc(ir,2) ENDDO !$omp end parallel do ELSE ! sym. op. with time reversal: spin 1->2*, 2->-1* !$omp parallel do default(shared) private(ir) firstprivate(ibnd,isym,ikq) DO ir = 1, nrxxs exxbuff(ir,ibnd,ikq) = CONJG(psic_nc(ir,2)) exxbuff(ir+nrxxs,ibnd,ikq) = -CONJG(psic_nc(ir,1)) ENDDO !$omp end parallel do ENDIF ELSE ! noncolinear #if defined(__MPI) CALL gather_grid( dfftt, temppsic, temppsic_all ) IF ( me_egrp == 0 ) THEN !$omp parallel do default(shared) private(ir) firstprivate(isym) DO ir = 1, nxxs psic_all(ir) = temppsic_all(rir(ir,isym)) ENDDO !$omp end parallel do ENDIF CALL scatter_grid( dfftt, psic_all, psic_exx ) #else !$omp parallel do default(shared) private(ir) firstprivate(isym) DO ir = 1, nrxxs psic_exx(ir) = temppsic(rir(ir,isym)) ENDDO !$omp end parallel do #endif !$omp parallel do default(shared) private(ir) firstprivate(isym,ibnd,ikq) DO ir = 1, nrxxs IF (index_sym(ikq) < 0 ) THEN psic_exx(ir) = CONJG(psic_exx(ir)) ENDIF exxbuff(ir,ibnd,ikq) = psic_exx(ir) ENDDO !$omp end parallel do ! ENDIF ! noncolinear ! ENDDO ! ENDDO& IBND_LOOP_K ! ENDIF& IF_GAMMA_ONLY ENDDO& KPOINTS_LOOP ! DEALLOCATE( psic_exx ) IF (noncolin) THEN DEALLOCATE( temppsic_nc, psic_nc ) #if defined(__MPI) DEALLOCATE( temppsic_all_nc, psic_all_nc ) #endif ELSE IF ( .NOT. gamma_only ) THEN DEALLOCATE( temppsic ) #if defined(__MPI) DEALLOCATE( temppsic_all, psic_all ) #endif ENDIF ! ! Each wavefunction in exxbuff is computed by a single pool, collect among ! pools in a smart way (i.e. without doing all-to-all sum and bcast) ! See also the initialization of working_pool in exx_mp_init ! Note that in Gamma-only LSDA can be parallelized over two pools, and there ! is no need to communicate anything: each pools deals with its own spin ! IF ( .NOT. gamma_only ) THEN DO ikq = 1, nkqs CALL mp_bcast( exxbuff(:,:,ikq), working_pool(ikq), intra_orthopool_comm ) ENDDO ENDIF ! ! For US/PAW only: compute for the entire ! de-symmetrized k+q grid by rotating the ones from the irreducible wedge ! IF (okvan) CALL rotate_becxx( nkqs, index_xk, index_sym, xkq_collect ) ! ! Initialize 4-wavefunctions one-center Fock integrals ! \int \psi_a(r)\phi_a(r)\phi_b(r')\psi_b(r')/|r-r'| ! IF (okpaw) CALL PAW_init_fock_kernel() ! CALL change_data_structure( .FALSE. ) ! CALL stop_clock( 'exxinit' ) ! END SUBROUTINE exxinit ! ! !----------------------------------------------------------------------- SUBROUTINE vexx( lda, n, m, psi, hpsi, becpsi ) !----------------------------------------------------------------------- !! Wrapper routine computing V_x\psi, V_x = exchange potential. !! Calls generic version vexx_k or Gamma-specific one vexx_gamma. ! USE becmod, ONLY : bec_type USE uspp, ONLY : okvan USE paw_variables, ONLY : okpaw USE us_exx, ONLY : becxx USE mp_exx, ONLY : negrp, inter_egrp_comm, init_index_over_band USE wvfct, ONLY : nbnd USE exx_band, ONLY : transform_psi_to_exx, transform_hpsi_to_local, & psi_exx, hpsi_exx, igk_exx ! IMPLICIT NONE ! INTEGER :: lda !! input: leading dimension of arrays psi and hpsi INTEGER :: n !! input: true dimension of psi and hpsi INTEGER :: m !! input: number of states psi COMPLEX(DP) :: psi(lda*npol,m) !! input: m wavefunctions COMPLEX(DP) :: hpsi(lda*npol,m) !! output: V_x*psi TYPE(bec_type), OPTIONAL :: becpsi !! input: , optional but needed for US and PAW case ! INTEGER :: i ! IF ((okvan.OR.okpaw) .AND. .NOT. PRESENT(becpsi)) & CALL errore( 'vexx','becpsi needed for US/PAW case', 1 ) CALL start_clock( 'vexx' ) ! IF (negrp > 1) THEN CALL init_index_over_band( inter_egrp_comm, nbnd, m ) ! ! ... transform psi to the EXX data structure CALL transform_psi_to_exx( lda, n, m, psi ) ENDIF ! ! ... calculate the EXX contribution to hpsi ! IF ( gamma_only ) THEN IF (negrp == 1)THEN CALL vexx_gamma( lda, n, m, psi, hpsi, becpsi ) ELSE CALL vexx_gamma( lda, n, m, psi_exx, hpsi_exx, becpsi ) ENDIF ELSE IF (negrp.eq.1)THEN CALL vexx_k( lda, n, m, psi, hpsi, becpsi ) ELSE CALL vexx_k( lda, n, m, psi_exx, hpsi_exx, becpsi ) ENDIF ENDIF ! IF (negrp > 1) THEN ! ! ... transform hpsi to the local data structure ! CALL transform_hpsi_to_local(lda,n,m,hpsi) ! ENDIF ! CALL stop_clock( 'vexx' ) ! END SUBROUTINE vexx ! ! !----------------------------------------------------------------------- SUBROUTINE vexx_gamma( lda, n, m, psi, hpsi, becpsi ) !----------------------------------------------------------------------- !! Gamma-specific version of vexx. ! USE constants, ONLY : fpi, e2, pi USE cell_base, ONLY : omega USE gvect, ONLY : ngm, g USE wvfct, ONLY : npwx, current_k, nbnd USE klist, ONLY : xk, nks, nkstot, igk_k USE fft_interfaces, ONLY : fwfft, invfft USE becmod, ONLY : bec_type USE mp_exx, ONLY : inter_egrp_comm, my_egrp_id, & intra_egrp_comm, me_egrp, & negrp, max_pairs, egrp_pairs, ibands, nibands, & iexx_istart, iexx_iend, & all_start, all_end, iexx_start, jblock USE mp, ONLY : mp_sum, mp_barrier, mp_circular_shift_left USE uspp, ONLY : nkb, okvan USE paw_variables, ONLY : okpaw USE us_exx, ONLY : bexg_merge, becxx, addusxx_g, addusxx_r, & newdxx_g, newdxx_r, add_nlxx_pot, & qvan_init, qvan_clean USE paw_exx, ONLY : PAW_newdxx USE exx_base, ONLY : nqs, index_xkq, index_xk, xkq_collect, & coulomb_fac, g2_convolution_all USE exx_band, ONLY : result_sum, igk_exx ! IMPLICIT NONE ! INTEGER :: lda !! input: leading dimension of arrays psi and hpsi INTEGER :: n !! input: true dimension of psi and hpsi INTEGER :: m !! input: number of states psi COMPLEX(DP) :: psi(lda*npol,m) !! input: m wavefunctions COMPLEX(DP) :: hpsi(lda*npol,m) !! output: V_x*psi TYPE(bec_type), OPTIONAL :: becpsi ! or call a calbec(...psi) instead !! input: , optional but needed for US and PAW case ! ! ... local variables ! COMPLEX(DP), ALLOCATABLE :: result(:,:) REAL(DP), ALLOCATABLE :: temppsic_dble (:) REAL(DP), ALLOCATABLE :: temppsic_aimag(:) ! COMPLEX(DP), ALLOCATABLE :: vc(:,:), deexx(:,:) REAL(DP), ALLOCATABLE :: fac(:) INTEGER :: ibnd, ik, im , ikq, iq, ipol INTEGER :: ir, ig INTEGER :: current_ik INTEGER :: ibnd_loop_start INTEGER :: nrxxs REAL(DP) :: x1, x2, xkp(3) REAL(DP) :: xkq(3) INTEGER, EXTERNAL :: global_kpoint_index INTEGER :: ialloc COMPLEX(DP), ALLOCATABLE :: big_result(:,:) INTEGER :: iproc, nproc_egrp, ii, ipair INTEGER :: jbnd, jstart, jend ! ... scratch space for fft of psi and rho COMPLEX(DP), ALLOCATABLE :: psi_rhoc_work(:) INTEGER :: jblock_start, jblock_end INTEGER :: iegrp, wegrp INTEGER :: exxbuff_index INTEGER :: ending_im ! ialloc = nibands(my_egrp_id+1) ! ALLOCATE( fac(dfftt%ngm) ) ! nrxxs = dfftt%nnr ! !ALLOCATE( result(nrxxs), temppsic_DBLE(nrxxs), temppsic_aimag(nrxxs) ) ALLOCATE( result(nrxxs,ialloc), temppsic_DBLE(nrxxs) ) ALLOCATE( temppsic_aimag(nrxxs) ) ALLOCATE( psi_rhoc_work(nrxxs) ) ! ALLOCATE( vc(nrxxs,ialloc) ) IF (okvan) ALLOCATE( deexx(nkb,ialloc) ) ! current_ik = global_kpoint_index( nkstot, current_k ) xkp = xk(:,current_k) ! ALLOCATE( big_result(n,m) ) big_result = 0.0_DP result = 0.0_DP ! DO ii = 1, nibands(my_egrp_id+1) IF (okvan) deexx(:,ii) = 0.0_DP ENDDO ! ! Here the loops start ! INTERNAL_LOOP_ON_Q : & DO iq = 1, nqs ! ikq = index_xkq(current_ik,iq) ik = index_xk(ikq) xkq = xkq_collect(:,ikq) ! ! calculate the 1/|r-r'| (actually, k+q+g) factor and place it in fac CALL g2_convolution_all( dfftt%ngm, gt, xkp, xkq, iq, current_k ) IF ( okvan .AND..NOT.tqr ) CALL qvan_init( dfftt%ngm, xkq, xkp ) ! DO iegrp = 1, negrp ! ! compute the id of group whose data is currently worked on wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 ! jblock_start = all_start(wegrp) jblock_end = all_end(wegrp) ! LOOP_ON_PSI_BANDS : & DO ii = 1, nibands(my_egrp_id+1) ! ibnd = ibands(ii,my_egrp_id+1) ! IF (ibnd==0 .OR. ibnd>m) CYCLE ! IF (MOD(ii,2) == 1) THEN ! psi_rhoc_work = (0._DP,0._DP) ! IF ((ii+1) <= MIN(m,nibands(my_egrp_id+1))) THEN ! deal with double bands !$omp parallel do default(shared), private(ig) DO ig = 1, npwt psi_rhoc_work( dfftt%nl(ig) ) = psi(ig, ii) + (0._DP,1._DP) * psi(ig, ii+1) psi_rhoc_work( dfftt%nlm(ig) ) = CONJG(psi(ig, ii) - (0._DP,1._DP) * psi(ig, ii+1)) ENDDO !$omp end parallel do ENDIF ! IF ( ii == MIN(m,nibands(my_egrp_id+1)) ) THEN ! deal with a single last band !$omp parallel do default(shared), private(ig) DO ig = 1, npwt psi_rhoc_work( dfftt%nl(ig) ) = psi(ig,ii) psi_rhoc_work( dfftt%nlm(ig) ) = CONJG(psi(ig,ii)) ENDDO !$omp end parallel do ENDIF ! CALL invfft( 'Wave', psi_rhoc_work, dfftt ) !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs temppsic_DBLE(ir) = DBLE( psi_rhoc_work(ir) ) temppsic_aimag(ir) = AIMAG( psi_rhoc_work(ir) ) ENDDO !$omp end parallel do ! ENDIF ! ! !determine which j-bands to calculate jstart = 0 jend = 0 DO ipair = 1, max_pairs IF (egrp_pairs(1,ipair,my_egrp_id+1) == ibnd) THEN IF (jstart == 0)THEN jstart = egrp_pairs(2,ipair,my_egrp_id+1) jend = jstart ELSE jend = egrp_pairs(2,ipair,my_egrp_id+1) ENDIF ENDIF ENDDO ! jstart = MAX(jstart,jblock_start) jend = MIN(jend,jblock_end) ! IF (MOD(jstart,2) ==0 ) THEN ibnd_loop_start = jstart-1 ELSE ibnd_loop_start = jstart ENDIF ! IBND_LOOP_GAM : & DO jbnd = ibnd_loop_start, jend, 2 !for each band of psi ! exxbuff_index = (jbnd+1)/2-(all_start(wegrp)+1)/2+(iexx_start+1)/2 ! IF ( jbnd < jstart ) THEN x1 = 0.0_DP ELSE x1 = x_occupation(jbnd,ik) ENDIF IF ( jbnd == jend) THEN x2 = 0.0_DP ELSE x2 = x_occupation(jbnd+1,ik) ENDIF IF ( ABS(x1) < eps_occ .AND. ABS(x2) < eps_occ ) CYCLE ! ! ... calculate rho in real space. Gamma tricks are used. ! temppsic is real; tempphic contains one band in the real part, ! another one in the imaginary part; the same applies to rhoc ! IF ( MOD(ii,2) == 0 ) THEN !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs psi_rhoc_work(ir) = exxbuff(ir,exxbuff_index,ikq) * temppsic_aimag(ir) / omega ENDDO !$omp end parallel do ELSE !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs psi_rhoc_work(ir) = exxbuff(ir,exxbuff_index,ikq) * temppsic_DBLE(ir) / omega ENDDO !$omp end parallel do ENDIF ! ! ... bring rho to G-space ! ! >>> add augmentation in REAL SPACE here IF (okvan .AND. tqr) THEN IF (jbnd >= jstart) & CALL addusxx_r( psi_rhoc_work, & _CX(becxx(ikq)%r(:,jbnd)), _CX(becpsi%r(:,ibnd))) IF (jbnd < jend) & CALL addusxx_r( psi_rhoc_work, & _CY(becxx(ikq)%r(:,jbnd+1)),_CX(becpsi%r(:,ibnd))) ENDIF ! CALL fwfft( 'Rho', psi_rhoc_work, dfftt ) ! >>>> add augmentation in G SPACE here IF (okvan .AND. .NOT. tqr) THEN ! ... contribution from one band added to real (in real space) part of rhoc IF (jbnd >= jstart) & CALL addusxx_g( dfftt, psi_rhoc_work, xkq, xkp, 'r', & becphi_r=becxx(ikq)%r(:,jbnd), becpsi_r=becpsi%r(:,ibnd) ) ! ... contribution from following band added to imaginary (in real space) part of rhoc IF (jbnd < jend) & CALL addusxx_g( dfftt, psi_rhoc_work, xkq, xkp, 'i', & becphi_r=becxx(ikq)%r(:,jbnd+1), becpsi_r=becpsi%r(:,ibnd) ) ENDIF ! >>>> charge density done ! vc(:,ii) = 0._DP ! !$omp parallel do default(shared), private(ig) DO ig = 1, dfftt%ngm ! vc(dfftt%nl(ig),ii) = coulomb_fac(ig,iq,current_k) * psi_rhoc_work(dfftt%nl(ig)) vc(dfftt%nlm(ig),ii) = coulomb_fac(ig,iq,current_k) * psi_rhoc_work(dfftt%nlm(ig)) ! ENDDO !$omp end parallel do ! ! >>>> compute = jstart) & CALL newdxx_g( dfftt, vc(:,ii), xkq, xkp, 'r', deexx(:,ii), & becphi_r=x1*becxx(ikq)%r(:,jbnd) ) IF (jbnd>>> compute = jstart) & CALL newdxx_r( dfftt,vc(:,ii), _CX(x1*becxx(ikq)%r(:,jbnd)), deexx(:,ii) ) IF (jbnd < jend) & CALL newdxx_r( dfftt,vc(:,ii), _CY(x2*becxx(ikq)%r(:,jbnd+1)), deexx(:,ii) ) ENDIF ! IF (okpaw) THEN IF (jbnd >= jstart) & CALL PAW_newdxx( x1/nqs, _CX(becxx(ikq)%r(:,jbnd)),& _CX(becpsi%r(:,ibnd)), deexx(:,ii) ) IF (jbnd < jend) & CALL PAW_newdxx( x2/nqs, _CX(becxx(ikq)%r(:,jbnd+1)),& _CX(becpsi%r(:,ibnd)), deexx(:,ii) ) ENDIF ! ! ... accumulates over bands and k points ! !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs result(ir,ii) = result(ir,ii) & + x1* DBLE(vc(ir,ii))* DBLE(exxbuff(ir,exxbuff_index,ikq)) & + x2*AIMAG(vc(ir,ii))*AIMAG(exxbuff(ir,exxbuff_index,ikq)) ENDDO !$omp end parallel do ! ENDDO & IBND_LOOP_GAM ! ENDDO & LOOP_ON_PSI_BANDS ! ! get the next nbnd/negrp data IF (negrp > 1) CALL mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, inter_egrp_comm ) ! ENDDO ! iegrp IF (okvan .AND. .NOT.tqr) CALL qvan_clean() ENDDO & INTERNAL_LOOP_ON_Q ! DO ii = 1, nibands(my_egrp_id+1) ! ibnd = ibands(ii,my_egrp_id+1) ! IF (ibnd == 0 .OR. ibnd>m) CYCLE ! IF (okvan) THEN CALL mp_sum( deexx(:,ii), intra_egrp_comm ) ENDIF ! ! ... brings back result in G-space ! CALL fwfft( 'Wave' , result(:,ii), dfftt ) ! ! ... communicate result DO ig = 1, n big_result(ig,ibnd) = big_result(ig,ibnd) - exxalfa*result(dfftt%nl(igk_exx(ig,current_k)),ii) ENDDO ! ! ... add non-local \sum_I |beta_I> \alpha_Ii (the sum on i is outside) IF (okvan) CALL add_nlxx_pot( lda, big_result(:,ibnd), xkp, n, & igk_exx(1,current_k), deexx(:,ii), eps_occ, exxalfa ) ENDDO ! CALL result_sum( n*npol, m, big_result ) ! IF (iexx_istart(my_egrp_id+1) > 0) THEN IF (negrp == 1) THEN ending_im = m ELSE ending_im = iexx_iend(my_egrp_id+1) - iexx_istart(my_egrp_id+1) + 1 END IF DO im = 1, ending_im !$omp parallel do default(shared), private(ig) firstprivate(im,n) DO ig = 1, n hpsi(ig,im) = hpsi(ig,im) + big_result(ig,im+iexx_istart(my_egrp_id+1)-1) ENDDO !$omp end parallel do ENDDO ENDIF ! DEALLOCATE( big_result ) DEALLOCATE( result, temppsic_dble, temppsic_aimag ) DEALLOCATE( psi_rhoc_work ) DEALLOCATE( vc, fac ) IF (okvan) DEALLOCATE( deexx ) ! END SUBROUTINE vexx_gamma ! ! !----------------------------------------------------------------------- SUBROUTINE vexx_k( lda, n, m, psi, hpsi, becpsi ) !----------------------------------------------------------------------- !! Generic, k-point version of vexx. ! USE constants, ONLY : fpi, e2, pi USE cell_base, ONLY : omega USE gvect, ONLY : ngm, g USE wvfct, ONLY : npwx, current_k, nbnd USE klist, ONLY : xk, nks, nkstot USE fft_interfaces, ONLY : fwfft, invfft USE becmod, ONLY : bec_type USE mp_exx, ONLY : inter_egrp_comm, my_egrp_id, negrp, & intra_egrp_comm, me_egrp, & max_pairs, egrp_pairs, ibands, nibands, & max_ibands, iexx_istart, iexx_iend, & all_start, all_end, iexx_start, jblock USE mp, ONLY : mp_sum, mp_barrier, mp_circular_shift_left USE uspp, ONLY : nkb, okvan USE paw_variables, ONLY : okpaw USE us_exx, ONLY : bexg_merge, becxx, addusxx_g, addusxx_r, & newdxx_g, newdxx_r, add_nlxx_pot, & qvan_init, qvan_clean USE paw_exx, ONLY : PAW_newdxx USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, & coulomb_fac, g2_convolution_all USE exx_band, ONLY : result_sum, igk_exx USE io_global, ONLY : stdout ! ! IMPLICIT NONE ! INTEGER :: lda !! input: leading dimension of arrays psi and hpsi INTEGER :: n !! input: true dimension of psi and hpsi INTEGER :: m !! input: number of states psi COMPLEX(DP) :: psi(lda*npol,max_ibands) !! input: m wavefunctions COMPLEX(DP) :: hpsi(lda*npol,max_ibands) !! output: V_x*psi TYPE(bec_type), OPTIONAL :: becpsi ! or call a calbec(...psi) instead !! input: , optional but needed for US and PAW case ! ! ... local variables ! COMPLEX(DP),ALLOCATABLE :: temppsic(:,:), result(:,:) #if defined(__USE_INTEL_HBM_DIRECTIVES) !DIR$ ATTRIBUTES FASTMEM :: result #elif defined(__USE_CRAY_HBM_DIRECTIVES) !DIR$ memory(bandwidth) result #endif COMPLEX(DP),ALLOCATABLE :: temppsic_nc(:,:,:),result_nc(:,:,:) INTEGER :: request_send, request_recv ! COMPLEX(DP),ALLOCATABLE :: deexx(:,:) COMPLEX(DP),ALLOCATABLE,TARGET :: rhoc(:,:), vc(:,:) #if defined(__USE_MANY_FFT) COMPLEX(DP),POINTER :: prhoc(:), pvc(:) #endif #if defined(__USE_INTEL_HBM_DIRECTIVES) !DIR$ ATTRIBUTES FASTMEM :: rhoc, vc #elif defined(__USE_CRAY_HBM_DIRECTIVES) !DIR$ memory(bandwidth) rhoc, vc #endif REAL(DP), ALLOCATABLE :: fac(:), facb(:) INTEGER :: ibnd, ik, im , ikq, iq, ipol INTEGER :: ir, ig, ir_start, ir_end INTEGER :: irt, nrt, nblock INTEGER :: current_ik INTEGER :: ibnd_loop_start INTEGER :: nrxxs REAL(DP) :: x1, x2, xkp(3), omega_inv, nqs_inv REAL(DP) :: xkq(3) INTEGER, EXTERNAL :: global_kpoint_index DOUBLE PRECISION :: max, tempx COMPLEX(DP), ALLOCATABLE :: big_result(:,:) INTEGER :: ir_out, ipair, jbnd INTEGER :: ii, jstart, jend, jcount, jind INTEGER :: ialloc, ending_im INTEGER :: ijt, njt, jblock_start, jblock_end INTEGER :: iegrp, wegrp ! ialloc = nibands(my_egrp_id+1) ! ALLOCATE( fac(dfftt%ngm) ) nrxxs= dfftt%nnr ALLOCATE( facb(nrxxs) ) ! IF (noncolin) THEN ALLOCATE( temppsic_nc(nrxxs,npol,ialloc), result_nc(nrxxs,npol,ialloc) ) ELSE ALLOCATE( temppsic(nrxxs,ialloc), result(nrxxs,ialloc) ) ENDIF ! IF (okvan) ALLOCATE( deexx(nkb,ialloc) ) ! current_ik = global_kpoint_index( nkstot, current_k ) xkp = xk(:,current_k) ! ALLOCATE( big_result(n*npol,m) ) big_result = 0.0_DP ! ! allocate arrays for rhoc and vc ALLOCATE( rhoc(nrxxs,jblock), vc(nrxxs,jblock) ) #if defined(__USE_MANY_FFT) prhoc(1:nrxxs*jblock) => rhoc(:,:) pvc(1:nrxxs*jblock) => vc(:,:) #endif ! DO ii = 1, nibands(my_egrp_id+1) ! ibnd = ibands(ii,my_egrp_id+1) ! IF (ibnd==0 .OR. ibnd>m) CYCLE IF (okvan) deexx(:,ii) = 0._DP ! IF (noncolin) THEN temppsic_nc(:,:,ii) = 0._DP ELSE !$omp parallel do default(shared), private(ir) firstprivate(nrxxs) DO ir = 1, nrxxs temppsic(ir,ii) = 0._DP ENDDO ENDIF ! IF (noncolin) THEN ! !$omp parallel do default(shared), private(ig) DO ig = 1, n temppsic_nc(dfftt%nl(igk_exx(ig,current_k)),1,ii) = psi(ig,ii) temppsic_nc(dfftt%nl(igk_exx(ig,current_k)),2,ii) = psi(npwx+ig,ii) ENDDO !$omp end parallel do ! CALL invfft( 'Wave', temppsic_nc(:,1,ii), dfftt ) CALL invfft( 'Wave', temppsic_nc(:,2,ii), dfftt ) ! ELSE ! !$omp parallel do default(shared), private(ig) DO ig = 1, n temppsic( dfftt%nl(igk_exx(ig,current_k)), ii ) = psi(ig,ii) ENDDO !$omp end parallel do ! CALL invfft( 'Wave', temppsic(:,ii), dfftt ) ! ENDIF ! IF (noncolin) THEN !$omp parallel do default(shared) firstprivate(nrxxs) private(ir) DO ir = 1, nrxxs result_nc(ir,1,ii) = 0.0_DP result_nc(ir,2,ii) = 0.0_DP ENDDO ELSE !$omp parallel do default(shared) firstprivate(nrxxs) private(ir) DO ir = 1, nrxxs result(ir,ii) = 0.0_DP ENDDO ENDIF ! ENDDO ! !precompute these guys omega_inv = 1.0 / omega nqs_inv = 1.0 / nqs ! !------------------------------------------------------------------------! ! Beginning of main loop !------------------------------------------------------------------------! DO iq = 1, nqs ! ikq = index_xkq(current_ik,iq) ik = index_xk(ikq) xkq = xkq_collect(:,ikq) ! ! calculate the 1/|r-r'| (actually, k+q+g) factor and place it in fac CALL g2_convolution_all( dfftt%ngm, gt, xkp, xkq, iq, current_k ) ! ! JRD - below not threaded facb = 0D0 DO ig = 1, dfftt%ngm facb(dfftt%nl(ig)) = coulomb_fac(ig,iq,current_k) ENDDO ! IF ( okvan .AND..NOT.tqr ) CALL qvan_init( dfftt%ngm, xkq, xkp ) ! DO iegrp = 1, negrp ! ! compute the id of group whose data is currently worked on wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 njt = (all_end(wegrp)-all_start(wegrp)+jblock)/jblock ! DO ijt = 1, njt ! jblock_start = (ijt - 1) * jblock + all_start(wegrp) jblock_end = MIN(jblock_start+jblock-1,all_end(wegrp)) ! DO ii = 1, nibands(my_egrp_id+1) ! ibnd = ibands(ii,my_egrp_id+1) ! IF (ibnd==0 .OR. ibnd>m) CYCLE ! !determine which j-bands to calculate jstart = 0 jend = 0 ! DO ipair = 1, max_pairs IF (egrp_pairs(1,ipair,my_egrp_id+1) == ibnd) THEN IF (jstart == 0) THEN jstart = egrp_pairs(2,ipair,my_egrp_id+1) jend = jstart ELSE jend = egrp_pairs(2,ipair,my_egrp_id+1) ENDIF ENDIF ENDDO ! jstart = MAX( jstart, jblock_start ) jend = MIN( jend, jblock_end ) ! !how many iters jcount = jend-jstart+1 IF (jcount <= 0) CYCLE ! !----------------------------------------------------------------------! !INNER LOOP START !----------------------------------------------------------------------! ! nblock = 2048 nrt = (nrxxs+nblock-1)/nblock ! !$omp parallel do collapse(2) private(ir_start,ir_end) DO irt = 1, nrt DO jbnd = jstart, jend ir_start = (irt - 1) * nblock + 1 ir_end = MIN(ir_start+nblock-1, nrxxs) IF (noncolin) THEN DO ir = ir_start, ir_end rhoc(ir,jbnd-jstart+1) = ( CONJG(exxbuff(ir,jbnd-all_start(wegrp)+ & iexx_start,ikq))*temppsic_nc(ir,1,ii) + & CONJG(exxbuff(nrxxs+ir,jbnd-all_start(wegrp)+ & iexx_start,ikq))*temppsic_nc(ir,2,ii) )/omega ENDDO ELSE !DIR$ vector nontemporal (rhoc) DO ir = ir_start, ir_end rhoc(ir,jbnd-jstart+1) = CONJG(exxbuff(ir,jbnd-all_start(wegrp)+ & iexx_start,ikq))*temppsic(ir,ii)*omega_inv ENDDO ENDIF ENDDO ENDDO !$omp end parallel do ! ! ... add augmentation in REAL space HERE IF (okvan .AND. tqr) THEN ! augment the "charge" in real space DO jbnd = jstart, jend CALL addusxx_r(rhoc(:,jbnd-jstart+1), becxx(ikq)%k(:,jbnd), becpsi%k(:,ibnd)) ENDDO ENDIF ! ! ... brings it to G-space #if defined(__USE_MANY_FFT) CALL fwfft( 'Rho', prhoc, dfftt, howmany=jcount ) #else DO jbnd=jstart, jend CALL fwfft( 'Rho', rhoc(:,jbnd-jstart+1), dfftt ) ENDDO #endif ! ! ... add augmentation in G space HERE IF (okvan .AND. .NOT. tqr) THEN DO jbnd = jstart, jend CALL addusxx_g( dfftt, rhoc(:,jbnd-jstart+1), xkq, xkp, & 'c', becphi_c=becxx(ikq)%k(:,jbnd),becpsi_c=becpsi%k(:,ibnd) ) ENDDO ENDIF ! ... charge done ! !call start_collection() !$omp parallel do collapse(2) private(ir_start,ir_end) DO irt = 1, nrt DO jbnd = jstart, jend ir_start = (irt - 1) * nblock + 1 ir_end = MIN(ir_start+nblock-1,nrxxs) !DIR$ vector nontemporal (vc) DO ir = ir_start, ir_end vc(ir,jbnd-jstart+1) = facb(ir) * rhoc(ir,jbnd-jstart+1)*& x_occupation(jbnd,ik) * nqs_inv ENDDO ENDDO ENDDO !$omp end parallel do !call stop_collection() ! ! Add ultrasoft contribution (RECIPROCAL SPACE) ! compute alpha_I,j,k+q = \sum_J \int V_i,j,k,q Q_I,J(r) d3r IF (okvan .AND. .NOT. tqr) THEN DO jbnd=jstart, jend CALL newdxx_g( dfftt, vc(:,jbnd-jstart+1), xkq, xkp, 'c',& deexx(:,ii), becphi_c=becxx(ikq)%k(:,jbnd) ) ENDDO ENDIF ! !brings back v in real space #if defined(__USE_MANY_FFT) !fft many CALL invfft( 'Rho', pvc, dfftt, howmany=jcount ) #else DO jbnd = jstart, jend CALL invfft( 'Rho', vc(:,jbnd-jstart+1), dfftt ) ENDDO #endif ! ! Add ultrasoft contribution (REAL SPACE) IF (okvan .AND. tqr) THEN DO jbnd = jstart, jend CALL newdxx_r( dfftt, vc(:,jbnd-jstart+1), becxx(ikq)%k(:,jbnd),deexx(:,ii) ) ENDDO ENDIF ! ! ... Add PAW one-center contribution IF (okpaw) THEN DO jbnd = jstart, jend CALL PAW_newdxx( x_occupation(jbnd,ik)/nqs, becxx(ikq)%k(:,jbnd), & becpsi%k(:,ibnd), deexx(:,ii) ) ENDDO ENDIF ! ! ... accumulates over bands and k points ! !call start_collection() !$omp parallel do private(ir_start,ir_end) DO irt = 1, nrt DO jbnd = jstart, jend ir_start = (irt - 1) * nblock + 1 ir_end = MIN(ir_start+nblock-1, nrxxs) IF (noncolin) THEN DO ir = ir_start, ir_end result_nc(ir,1,ii) = result_nc(ir,1,ii) + vc(ir,jbnd-jstart+1) * & exxbuff(ir,jbnd-all_start(wegrp)+iexx_start,ikq) result_nc(ir,2,ii) = result_nc(ir,2,ii) + vc(ir,jbnd-jstart+1) * & exxbuff(ir+nrxxs,jbnd-all_start(wegrp)+iexx_start,ikq) ENDDO ELSE !!dir$ vector nontemporal (result) DO ir = ir_start, ir_end result(ir,ii) = result(ir,ii) + vc(ir,jbnd-jstart+1)* & exxbuff(ir,jbnd-all_start(wegrp)+iexx_start,ikq) ENDDO ENDIF ENDDO ENDDO !$omp end parallel do !call stop_collection() ! !----------------------------------------------------------------------! !INNER LOOP END !----------------------------------------------------------------------! ! ENDDO !I-LOOP ENDDO !IJT ! ! get the next nbnd/negrp data IF (negrp > 1) CALL mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, inter_egrp_comm ) ! ENDDO !iegrp ! IF ( okvan .AND..NOT.tqr ) CALL qvan_clean() ! ENDDO ! ! ! DO ii = 1, nibands(my_egrp_id+1) ! ibnd = ibands(ii,my_egrp_id+1) ! IF (ibnd==0 .OR. ibnd>m) CYCLE ! IF (okvan) THEN CALL mp_sum( deexx(:,ii), intra_egrp_comm ) ENDIF ! IF (noncolin) THEN !brings back result in G-space CALL fwfft( 'Wave', result_nc(:,1,ii), dfftt ) CALL fwfft( 'Wave', result_nc(:,2,ii), dfftt ) ! DO ig = 1, n big_result(ig,ibnd) = big_result(ig,ibnd) - exxalfa* & result_nc(dfftt%nl(igk_exx(ig,current_k)),1,ii) big_result(n+ig,ibnd) = big_result(n+ig,ibnd) - exxalfa* & result_nc(dfftt%nl(igk_exx(ig,current_k)),2,ii) ENDDO ELSE ! CALL fwfft( 'Wave', result(:,ii), dfftt ) ! DO ig = 1, n big_result(ig,ibnd) = big_result(ig,ibnd) - exxalfa* & result(dfftt%nl(igk_exx(ig,current_k)),ii) ENDDO ENDIF ! ! add non-local \sum_I |beta_I> \alpha_Ii (the sum on i is outside) IF (okvan) CALL add_nlxx_pot( lda, big_result(:,ibnd), xkp, n, igk_exx(:,current_k), & deexx(:,ii), eps_occ, exxalfa ) ! ENDDO ! !deallocate temporary arrays DEALLOCATE( rhoc, vc ) ! !sum result CALL result_sum( n*npol, m, big_result ) ! IF (iexx_istart(my_egrp_id+1) > 0) THEN ! IF (negrp == 1) THEN ending_im = m ELSE ending_im = iexx_iend(my_egrp_id+1) - iexx_istart(my_egrp_id+1) + 1 ENDIF ! IF (noncolin) THEN DO im = 1, ending_im !$omp parallel do default(shared), private(ig) firstprivate(im,n) DO ig = 1, n hpsi(ig,im) = hpsi(ig,im) + big_result(ig,im+iexx_istart(my_egrp_id+1)-1) ENDDO !$omp end parallel do !$omp parallel do default(shared), private(ig) firstprivate(im,n) DO ig = 1, n hpsi(lda+ig,im) = hpsi(lda+ig,im) + big_result(n+ig,im+iexx_istart(my_egrp_id+1)-1) ENDDO !$omp end parallel do ENDDO ELSE DO im = 1, ending_im !$omp parallel do default(shared), private(ig) firstprivate(im,n) DO ig = 1, n hpsi(ig,im) = hpsi(ig,im) + big_result(ig,im+iexx_istart(my_egrp_id+1)-1) ENDDO !$omp end parallel do ENDDO ENDIF ENDIF ! IF (noncolin) THEN DEALLOCATE( temppsic_nc, result_nc ) ELSE DEALLOCATE( temppsic, result ) ENDIF ! DEALLOCATE( big_result ) DEALLOCATE( fac, facb ) IF (okvan) DEALLOCATE( deexx ) ! END SUBROUTINE vexx_k ! ! !----------------------------------------------------------------------- FUNCTION exxenergy() !----------------------------------------------------------------------- !! NB: This function is meant to give the SAME RESULT as exxenergy2. !! It is worth keeping it in the repository because in spite of being !! slower it is a simple driver using vexx potential routine so it is !! good, from time to time, to replace exxenergy2 with it to check that !! everything is ok and energy and potential are consistent as they should. ! USE io_files, ONLY : iunwfc_exx, nwordwfc USE buffers, ONLY : get_buffer USE wvfct, ONLY : nbnd, npwx, wg, current_k USE gvect, ONLY : gstart USE wavefunctions, ONLY : evc USE lsda_mod, ONLY : lsda, current_spin, isk USE klist, ONLY : ngk, nks, xk USE mp_pools, ONLY : inter_pool_comm USE mp_exx, ONLY : intra_egrp_comm, intra_egrp_comm, & negrp USE mp, ONLY : mp_sum USE becmod, ONLY : bec_type, allocate_bec_type, & deallocate_bec_type, calbec USE uspp, ONLY : okvan,nkb,vkb USE exx_band, ONLY : nwordwfc_exx, igk_exx ! IMPLICIT NONE ! TYPE(bec_type) :: becpsi REAL(DP) :: exxenergy, energy INTEGER :: npw, ibnd, ik COMPLEX(DP) :: vxpsi(npwx*npol,nbnd), psi(npwx*npol,nbnd) ! exxenergy = 0._DP ! CALL start_clock( 'exxenergy' ) ! IF (okvan) CALL allocate_bec_type( nkb, nbnd, becpsi ) energy = 0._dp ! DO ik = 1, nks npw = ngk(ik) ! setup variables for usage by vexx (same logic as for H_psi) current_k = ik IF ( lsda ) current_spin = isk(ik) ! end setup IF ( nks > 1 ) THEN CALL get_buffer( psi, nwordwfc_exx, iunwfc_exx, ik ) ELSE psi(1:npwx*npol,1:nbnd) = evc(1:npwx*npol,1:nbnd) ENDIF ! IF (okvan) THEN ! prepare the |beta> function at k+q CALL init_us_2( npw, igk_exx(1,ik), xk(:,ik), vkb ) ! compute at this k+q point, for all band and all projectors CALL calbec( npw, vkb, psi, becpsi, nbnd ) ENDIF ! vxpsi(:,:) = (0._dp, 0._dp) CALL vexx( npwx, npw, nbnd, psi, vxpsi, becpsi ) ! DO ibnd = 1, nbnd energy = energy + DBLE(wg(ibnd,ik) * dot_product(psi(1:npw,ibnd),vxpsi(1:npw,ibnd))) IF (noncolin) energy = energy + & DBLE(wg(ibnd,ik) * dot_product(psi(npwx+1:npwx+npw,ibnd),vxpsi(npwx+1:npwx+npw,ibnd))) ! ENDDO IF (gamma_only .AND. gstart == 2) THEN DO ibnd = 1, nbnd energy = energy - & DBLE(0.5_dp * wg(ibnd,ik) * CONJG(psi(1,ibnd)) * vxpsi(1,ibnd)) ENDDO ENDIF ENDDO ! IF (gamma_only) energy = 2 * energy ! CALL mp_sum( energy, intra_egrp_comm ) CALL mp_sum( energy, inter_pool_comm ) IF (okvan) CALL deallocate_bec_type( becpsi ) ! exxenergy = energy ! CALL stop_clock( 'exxenergy' ) ! END FUNCTION exxenergy ! ! !----------------------------------------------------------------------- FUNCTION exxenergy2() !----------------------------------------------------------------------- !! Wrapper to \(\texttt{exxenergy2_gamma}\) and \(\texttt{exxenergy2_k}\). ! IMPLICIT NONE ! REAL(DP) :: exxenergy2 ! CALL start_clock( 'exxenergy' ) ! IF ( gamma_only ) THEN exxenergy2 = exxenergy2_gamma() ELSE exxenergy2 = exxenergy2_k() ENDIF ! CALL stop_clock( 'exxenergy' ) ! END FUNCTION exxenergy2 ! ! !----------------------------------------------------------------------- FUNCTION exxenergy2_gamma() !----------------------------------------------------------------------- ! USE constants, ONLY : fpi, e2, pi USE io_files, ONLY : iunwfc_exx, nwordwfc USE buffers, ONLY : get_buffer USE cell_base, ONLY : alat, omega, bg, at, tpiba USE symm_base, ONLY : nsym, s USE gvect, ONLY : ngm, gstart, g USE wvfct, ONLY : nbnd, npwx, wg USE wavefunctions, ONLY : evc USE klist, ONLY : xk, ngk, nks, nkstot USE lsda_mod, ONLY : lsda, current_spin, isk USE mp_pools, ONLY : inter_pool_comm USE mp_bands, ONLY : intra_bgrp_comm USE mp_exx, ONLY : inter_egrp_comm, my_egrp_id, negrp, & intra_egrp_comm, me_egrp, & max_pairs, egrp_pairs, ibands, nibands, & iexx_istart, iexx_iend, & all_start, all_end, iexx_start, & init_index_over_band, jblock USE mp, ONLY : mp_sum, mp_circular_shift_left USE fft_interfaces, ONLY : fwfft, invfft USE gvect, ONLY : ecutrho USE klist, ONLY : wk USE uspp, ONLY : okvan,nkb,vkb USE becmod, ONLY : bec_type, allocate_bec_type, & deallocate_bec_type, calbec USE paw_variables, ONLY : okpaw USE paw_exx, ONLY : PAW_xx_energy USE us_exx, ONLY : bexg_merge, becxx, addusxx_g, & addusxx_r, qvan_init, qvan_clean USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, & coulomb_fac, g2_convolution_all USE exx_band, ONLY : igk_exx, change_data_structure, & transform_evc_to_exx, nwordwfc_exx, & evc_exx ! IMPLICIT NONE ! REAL(DP) :: exxenergy2_gamma ! ! ... local variables ! REAL(DP) :: energy COMPLEX(DP), ALLOCATABLE :: temppsic(:) COMPLEX(DP), ALLOCATABLE :: rhoc(:) REAL(DP), ALLOCATABLE :: fac(:) COMPLEX(DP), ALLOCATABLE :: vkb_exx(:,:) INTEGER :: jbnd, ibnd, ik, ikk, ig, ikq, iq, ir INTEGER :: nrxxs, current_ik, ibnd_loop_start REAL(DP) :: x1, x2 REAL(DP) :: xkq(3), xkp(3), vc INTEGER, EXTERNAL :: global_kpoint_index ! TYPE(bec_type) :: becpsi COMPLEX(DP), ALLOCATABLE :: psi_t(:), prod_tot(:) REAL(DP),ALLOCATABLE :: temppsic_dble (:) REAL(DP),ALLOCATABLE :: temppsic_aimag(:) INTEGER :: npw INTEGER :: istart, iend, ipair, ii, ialloc INTEGER :: ijt, njt, jblock_start, jblock_end INTEGER :: exxbuff_index INTEGER :: calbec_start, calbec_end INTEGER :: intra_bgrp_comm_ INTEGER :: iegrp, wegrp ! CALL init_index_over_band( inter_egrp_comm, nbnd, nbnd ) ! CALL transform_evc_to_exx( 0 ) ! ialloc = nibands(my_egrp_id+1) ! nrxxs = dfftt%nnr ALLOCATE( fac(dfftt%ngm) ) ! ALLOCATE( temppsic(nrxxs), temppsic_DBLE(nrxxs), temppsic_aimag(nrxxs) ) ALLOCATE( rhoc(nrxxs) ) ALLOCATE( vkb_exx(npwx,nkb) ) ! energy = 0.0_DP ! CALL allocate_bec_type( nkb, nbnd, becpsi ) ! IKK_LOOP : & DO ikk = 1, nks current_ik = global_kpoint_index( nkstot, ikk ) xkp = xk(:,ikk) ! IF ( lsda ) current_spin = isk(ikk) npw = ngk (ikk) IF ( nks > 1 ) CALL get_buffer( evc_exx, nwordwfc_exx, iunwfc_exx, ikk ) ! ! ... prepare the |beta> function at k+q CALL init_us_2( npw, igk_exx(:,ikk), xkp, vkb_exx ) ! ! ... compute at this k+q point, for all band and all projectors calbec_start = ibands(1,my_egrp_id+1) calbec_end = ibands(nibands(my_egrp_id+1),my_egrp_id+1) ! intra_bgrp_comm_ = intra_bgrp_comm intra_bgrp_comm = intra_egrp_comm ! CALL calbec( npw, vkb_exx, evc_exx, becpsi, nibands(my_egrp_id+1) ) ! intra_bgrp_comm = intra_bgrp_comm_ ! IQ_LOOP : & DO iq = 1,nqs ! ikq = index_xkq(current_ik,iq) ik = index_xk(ikq) ! xkq = xkq_collect(:,ikq) ! CALL g2_convolution_all( dfftt%ngm, gt, xkp, xkq, iq, current_ik ) ! fac = coulomb_fac(:,iq,current_ik) fac(gstart_t:) = 2 * coulomb_fac(gstart_t:,iq,current_ik) ! IF ( okvan .AND. .NOT.tqr ) CALL qvan_init( dfftt%ngm, xkq, xkp ) ! DO iegrp = 1, negrp ! ! ... compute the id of group whose data is currently worked on wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 ! jblock_start = all_start(wegrp) jblock_end = all_end(wegrp) ! JBND_LOOP : & DO ii = 1, nibands(my_egrp_id+1) ! jbnd = ibands(ii,my_egrp_id+1) ! IF (jbnd==0 .OR. jbnd>nbnd) CYCLE ! IF ( MOD(ii,2)==1 ) THEN ! temppsic = (0._DP,0._DP) ! IF ( (ii+1) <= nibands(my_egrp_id+1) ) THEN ! deal with double bands !$omp parallel do default(shared), private(ig) DO ig = 1, npwt temppsic( dfftt%nl(ig) ) = & evc_exx(ig,ii) + (0._DP,1._DP) * evc_exx(ig,ii+1) temppsic( dfftt%nlm(ig) ) = & CONJG(evc_exx(ig,ii) - (0._DP,1._DP) * evc_exx(ig,ii+1)) ENDDO !$omp end parallel do ENDIF ! IF (ii == nibands(my_egrp_id+1)) THEN ! deal with a single last band !$omp parallel do default(shared), private(ig) DO ig = 1, npwt temppsic( dfftt%nl(ig) ) = evc_exx(ig,ii) temppsic( dfftt%nlm(ig) ) = CONJG(evc_exx(ig,ii)) ENDDO !$omp end parallel do ENDIF ! CALL invfft( 'Wave', temppsic, dfftt ) !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs temppsic_DBLE(ir) = DBLE( temppsic(ir) ) temppsic_aimag(ir) = AIMAG( temppsic(ir) ) ENDDO !$omp end parallel do ! ENDIF ! !determine which j-bands to calculate istart = 0 iend = 0 ! DO ipair = 1, max_pairs IF (egrp_pairs(1,ipair,my_egrp_id+1) == jbnd) THEN IF (istart == 0) THEN istart = egrp_pairs(2,ipair,my_egrp_id+1) iend = istart ELSE iend = egrp_pairs(2,ipair,my_egrp_id+1) ENDIF ENDIF ENDDO ! istart = MAX(istart,jblock_start) iend = MIN(iend,jblock_end) ! IF (MOD(istart,2) == 0) THEN ibnd_loop_start = istart-1 ELSE ibnd_loop_start = istart ENDIF ! IBND_LOOP_GAM : & DO ibnd = ibnd_loop_start, iend, 2 !for each band of psi ! exxbuff_index = (ibnd+1)/2-(all_start(wegrp)+1)/2+(iexx_start+1)/2 ! IF ( ibnd < istart ) THEN x1 = 0.0_DP ELSE x1 = x_occupation(ibnd,ik) ENDIF ! IF ( ibnd < iend ) THEN x2 = x_occupation(ibnd+1,ik) ELSE x2 = 0.0_DP ENDIF ! calculate rho in real space. Gamma tricks are used. ! temppsic is real; tempphic contains band 1 in the real part, ! band 2 in the imaginary part; the same applies to rhoc ! IF ( MOD(ii,2) == 0 ) THEN rhoc = 0.0_DP !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs rhoc(ir) = exxbuff(ir,exxbuff_index,ikq) * temppsic_aimag(ir) / omega ENDDO !$omp end parallel do ELSE !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs rhoc(ir) = exxbuff(ir,exxbuff_index,ikq) * temppsic_DBLE(ir) / omega ENDDO !$omp end parallel do ENDIF ! IF (okvan .AND. tqr) THEN IF (ibnd >= istart) & CALL addusxx_r( rhoc,_CX(becxx(ikq)%r(:,ibnd)), & _CX(becpsi%r(:,jbnd)) ) IF (ibnd= istart) & CALL addusxx_g( dfftt, rhoc, xkq, xkp, 'r', & becphi_r=becxx(ikq)%r(:,ibnd), becpsi_r=becpsi%r(:,jbnd-calbec_start+1) ) IF (ibnd < iend) & CALL addusxx_g( dfftt, rhoc, xkq, xkp, 'i', & becphi_r=becxx(ikq)%r(:,ibnd+1), becpsi_r=becpsi%r(:,jbnd-calbec_start+1) ) ENDIF ! vc = 0.0_DP !$omp parallel do default(shared), private(ig), reduction(+:vc) DO ig = 1, dfftt%ngm ! ! The real part of rhoc contains the contribution from band ibnd ! The imaginary part contains the contribution from band ibnd+1 ! vc = vc + fac(ig) * ( x1 * & ABS( rhoc(dfftt%nl(ig)) + CONJG(rhoc(dfftt%nlm(ig))) )**2 & +x2 * & ABS( rhoc(dfftt%nl(ig)) - CONJG(rhoc(dfftt%nlm(ig))) )**2 ) ENDDO !$omp end parallel do ! vc = vc * omega * 0.25_DP / nqs energy = energy - exxalfa * vc * wg(jbnd,ikk) ! IF (okpaw) THEN IF (ibnd >= ibnd_start) & energy = energy + exxalfa*wg(jbnd,ikk)*& x1 * PAW_xx_energy(_CX(becxx(ikq)%r(:,ibnd)),_CX(becpsi%r(:,jbnd)) ) IF (ibnd < ibnd_end) & energy = energy + exxalfa*wg(jbnd,ikk)*& x2 * PAW_xx_energy(_CX(becxx(ikq)%r(:,ibnd+1)), _CX(becpsi%r(:,jbnd)) ) ENDIF ! ENDDO & IBND_LOOP_GAM ENDDO & JBND_LOOP ! ! get the next nbnd/negrp data IF (negrp > 1) CALL mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, inter_egrp_comm ) ! ENDDO ! iegrp IF ( okvan .AND. .NOT.tqr ) CALL qvan_clean( ) ! ENDDO & IQ_LOOP ENDDO & IKK_LOOP ! DEALLOCATE( temppsic, temppsic_dble, temppsic_aimag ) ! DEALLOCATE( rhoc, fac ) CALL deallocate_bec_type( becpsi ) ! CALL mp_sum( energy, inter_egrp_comm ) CALL mp_sum( energy, intra_egrp_comm ) CALL mp_sum( energy, inter_pool_comm ) ! exxenergy2_gamma = energy ! CALL change_data_structure( .FALSE. ) ! END FUNCTION exxenergy2_gamma ! ! !----------------------------------------------------------------------- FUNCTION exxenergy2_k() !----------------------------------------------------------------------- ! USE constants, ONLY : fpi, e2, pi USE io_files, ONLY : iunwfc_exx, nwordwfc USE buffers, ONLY : get_buffer USE cell_base, ONLY : alat, omega, bg, at, tpiba USE symm_base, ONLY : nsym, s USE gvect, ONLY : ngm, gstart, g USE wvfct, ONLY : nbnd, npwx, wg USE wavefunctions, ONLY : evc USE klist, ONLY : xk, ngk, nks, nkstot USE lsda_mod, ONLY : lsda, current_spin, isk USE mp_pools, ONLY : inter_pool_comm USE mp_exx, ONLY : inter_egrp_comm, my_egrp_id, negrp, & intra_egrp_comm, me_egrp, & max_pairs, egrp_pairs, ibands, nibands, & iexx_istart, iexx_iend, & all_start, all_end, iexx_start, & init_index_over_band, jblock USE mp_bands, ONLY : intra_bgrp_comm USE mp, ONLY : mp_sum, mp_circular_shift_left USE fft_interfaces, ONLY : fwfft, invfft USE gvect, ONLY : ecutrho USE klist, ONLY : wk USE uspp, ONLY : okvan,nkb,vkb USE becmod, ONLY : bec_type, allocate_bec_type, & deallocate_bec_type, calbec USE paw_variables, ONLY : okpaw USE paw_exx, ONLY : PAW_xx_energy USE us_exx, ONLY : bexg_merge, becxx, addusxx_g, & addusxx_r, qvan_init, qvan_clean USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, & coulomb_fac, g2_convolution_all USE exx_band, ONLY : change_data_structure, & transform_evc_to_exx, nwordwfc_exx, & igk_exx, evc_exx ! IMPLICIT NONE ! REAL(DP) :: exxenergy2_k ! ! ... local variables ! REAL(DP) :: energy COMPLEX(DP), ALLOCATABLE :: temppsic(:,:) COMPLEX(DP), ALLOCATABLE :: temppsic_nc(:,:,:) COMPLEX(DP), ALLOCATABLE,TARGET :: rhoc(:,:) #if defined(__USE_MANY_FFT) COMPLEX(DP), POINTER :: prhoc(:) #endif REAL(DP), ALLOCATABLE :: fac(:) INTEGER :: npw, jbnd, ibnd, ibnd_inner_start, ibnd_inner_end, ibnd_inner_count, & ik, ikk, ig, ikq, iq, ir INTEGER :: h_ibnd, nrxxs, current_ik, ibnd_loop_start, nblock, nrt, irt, & ir_start, ir_end REAL(DP) :: x1, x2 REAL(DP) :: xkq(3), xkp(3), vc, omega_inv INTEGER, EXTERNAL :: global_kpoint_index ! TYPE(bec_type) :: becpsi COMPLEX(DP), ALLOCATABLE :: psi_t(:), prod_tot(:) INTEGER :: intra_bgrp_comm_ INTEGER :: ii, ialloc, jstart, jend, ipair INTEGER :: ijt, njt, jblock_start, jblock_end INTEGER :: iegrp, wegrp ! CALL init_index_over_band( inter_egrp_comm, nbnd, nbnd ) ! CALL transform_evc_to_exx( 0 ) ! ialloc = nibands(my_egrp_id+1) ! nrxxs = dfftt%nnr ALLOCATE( fac(dfftt%ngm) ) ! IF (noncolin) THEN ALLOCATE( temppsic_nc(nrxxs,npol,ialloc) ) ELSE ALLOCATE( temppsic(nrxxs,ialloc) ) ENDIF ! energy = 0.0_DP ! CALL allocate_bec_type( nkb, nbnd, becpsi ) ! !precompute that stuff omega_inv = 1.0/omega ! IKK_LOOP : & DO ikk = 1, nks ! current_ik = global_kpoint_index ( nkstot, ikk ) xkp = xk(:,ikk) ! IF ( lsda ) current_spin = isk(ikk) npw = ngk(ikk) IF ( nks > 1 ) CALL get_buffer( evc_exx, nwordwfc_exx, iunwfc_exx, ikk ) ! ! compute at this k+q point, for all band and all projectors intra_bgrp_comm_ = intra_bgrp_comm intra_bgrp_comm = intra_egrp_comm ! IF (okvan .OR. okpaw) THEN CALL compute_becpsi( npw, igk_exx(:,ikk), xkp, evc_exx, & becpsi%k(:,ibands(1,my_egrp_id+1)) ) ENDIF ! intra_bgrp_comm = intra_bgrp_comm_ ! ! ... precompute temppsic ! IF (noncolin) THEN temppsic_nc = 0.0_DP ELSE temppsic = 0.0_DP ENDIF ! DO ii = 1, nibands(my_egrp_id+1) ! jbnd = ibands(ii,my_egrp_id+1) ! IF (jbnd == 0 .OR. jbnd > nbnd) CYCLE ! !IF ( abs(wg(jbnd,ikk)) < eps_occ) CYCLE ! IF (noncolin) THEN ! !$omp parallel do default(shared), private(ig) DO ig = 1, npw temppsic_nc(dfftt%nl(igk_exx(ig,ikk)),1,ii) = evc_exx(ig,ii) temppsic_nc(dfftt%nl(igk_exx(ig,ikk)),2,ii) = evc_exx(npwx+ig,ii) ENDDO !$omp end parallel do ! CALL invfft( 'Wave', temppsic_nc(:,1,ii), dfftt ) CALL invfft( 'Wave', temppsic_nc(:,2,ii), dfftt ) ! ELSE !$omp parallel do default(shared), private(ig) DO ig = 1, npw temppsic(dfftt%nl(igk_exx(ig,ikk)),ii) = evc_exx(ig,ii) ENDDO !$omp end parallel do ! CALL invfft( 'Wave', temppsic(:,ii), dfftt ) ! ENDIF ENDDO ! IQ_LOOP : & DO iq = 1,nqs ! ikq = index_xkq(current_ik,iq) ik = index_xk(ikq) ! xkq = xkq_collect(:,ikq) ! CALL g2_convolution_all( dfftt%ngm, gt, xkp, xkq, iq, ikk ) IF ( okvan .AND..NOT.tqr ) CALL qvan_init( dfftt%ngm, xkq, xkp ) ! DO iegrp = 1, negrp ! ! ... compute the id of group whose data is currently worked on wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 njt = (all_end(wegrp)-all_start(wegrp)+jblock)/jblock ! IJT_LOOP : & DO ijt = 1, njt ! jblock_start = (ijt - 1) * jblock + all_start(wegrp) jblock_end = MIN(jblock_start+jblock-1,all_end(wegrp)) ! JBND_LOOP : & DO ii = 1, nibands(my_egrp_id+1) ! jbnd = ibands(ii,my_egrp_id+1) ! IF (jbnd==0 .OR. jbnd>nbnd) CYCLE ! !determine which j-bands to calculate jstart = 0 jend = 0 ! DO ipair = 1, max_pairs IF (egrp_pairs(1,ipair,my_egrp_id+1) == jbnd) THEN IF (jstart == 0) THEN jstart = egrp_pairs(2,ipair,my_egrp_id+1) jend = jstart ELSE jend = egrp_pairs(2,ipair,my_egrp_id+1) ENDIF ENDIF ENDDO ! !these variables prepare for inner band parallelism jstart = MAX(jstart,jblock_start) jend = MIN(jend,jblock_end) ibnd_inner_start = jstart ibnd_inner_end = jend ibnd_inner_count = jend-jstart+1 ! !allocate arrays ALLOCATE( rhoc(nrxxs,ibnd_inner_count) ) #if defined(__USE_MANY_FFT) prhoc(1:nrxxs*ibnd_inner_count) => rhoc #endif !calculate rho in real space nblock = 2048 nrt = (nrxxs+nblock-1) / nblock !$omp parallel do collapse(2) private(ir_start,ir_end) DO irt = 1, nrt DO ibnd = ibnd_inner_start, ibnd_inner_end ir_start = (irt - 1) * nblock + 1 ir_end = MIN(ir_start+nblock-1,nrxxs) IF (noncolin) THEN DO ir = ir_start, ir_end rhoc(ir,ibnd-ibnd_inner_start+1) = & ( CONJG(exxbuff(ir,ibnd-all_start(wegrp)+iexx_start,ikq)) * & temppsic_nc(ir,1,ii) + & CONJG(exxbuff(nrxxs+ir,ibnd-all_start(wegrp)+iexx_start,ikq)) * & temppsic_nc(ir,2,ii) ) * omega_inv ENDDO ELSE DO ir = ir_start, ir_end rhoc(ir,ibnd-ibnd_inner_start+1) = omega_inv * & CONJG(exxbuff(ir,ibnd-all_start(wegrp)+iexx_start,ikq)) * & temppsic(ir,ii) ENDDO ENDIF ENDDO ENDDO !$omp end parallel do ! ! augment the "charge" in real space IF (okvan .AND. tqr) THEN !$omp parallel do default(shared) private(ibnd) firstprivate(ibnd_inner_start,ibnd_inner_end) DO ibnd = ibnd_inner_start, ibnd_inner_end CALL addusxx_r( rhoc(:,ibnd-ibnd_inner_start+1), & becxx(ikq)%k(:,ibnd), becpsi%k(:,jbnd)) ENDDO !$omp end parallel do ENDIF ! ! bring rhoc to G-space #if defined(__USE_MANY_FFT) CALL fwfft ('Rho', prhoc, dfftt, howmany=ibnd_inner_count) #else DO ibnd = ibnd_inner_start, ibnd_inner_end CALL fwfft('Rho', rhoc(:,ibnd-ibnd_inner_start+1), dfftt) ENDDO #endif ! augment the "charge" in G space IF (okvan .AND. .NOT. tqr) THEN DO ibnd = ibnd_inner_start, ibnd_inner_end CALL addusxx_g(dfftt, rhoc(:,ibnd-ibnd_inner_start+1), & xkq, xkp, 'c', becphi_c=becxx(ikq)%k(:,ibnd), & becpsi_c=becpsi%k(:,jbnd)) ENDDO ENDIF ! !$omp parallel do reduction(+:energy) private(vc) DO ibnd = ibnd_inner_start, ibnd_inner_end vc=0.0_DP DO ig=1,dfftt%ngm vc = vc + coulomb_fac(ig,iq,ikk) * & DBLE(rhoc(dfftt%nl(ig),ibnd-ibnd_inner_start+1) *& CONJG(rhoc(dfftt%nl(ig),ibnd-ibnd_inner_start+1))) ENDDO vc = vc * omega * x_occupation(ibnd,ik) / nqs energy = energy - exxalfa * vc * wg(jbnd,ikk) ! IF (okpaw) THEN energy = energy +exxalfa*x_occupation(ibnd,ik)/nqs*wg(jbnd,ikk) & *PAW_xx_energy(becxx(ikq)%k(:,ibnd), becpsi%k(:,jbnd)) ENDIF ENDDO !$omp end parallel do ! !deallocate memory DEALLOCATE( rhoc ) ENDDO & JBND_LOOP ! ENDDO& IJT_LOOP ! get the next nbnd/negrp data IF (negrp > 1) call mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, inter_egrp_comm ) ! END DO !iegrp ! IF ( okvan .AND. .NOT.tqr ) CALL qvan_clean() ENDDO & IQ_LOOP ! ENDDO & IKK_LOOP ! IF (noncolin) THEN DEALLOCATE( temppsic_nc ) ELSE DEALLOCATE( temppsic ) ENDIF ! DEALLOCATE( fac ) ! CALL deallocate_bec_type( becpsi ) ! CALL mp_sum( energy, inter_egrp_comm ) CALL mp_sum( energy, intra_egrp_comm ) CALL mp_sum( energy, inter_pool_comm ) ! exxenergy2_k = energy CALL change_data_structure( .FALSE. ) ! END FUNCTION exxenergy2_k ! ! !----------------------------------------------------------------------- FUNCTION exx_stress() !----------------------------------------------------------------------- !! This is Eq.(10) of PRB 73, 125120 (2006). ! USE constants, ONLY : fpi, e2, pi, tpi USE io_files, ONLY : iunwfc_exx, nwordwfc USE buffers, ONLY : get_buffer USE cell_base, ONLY : alat, omega, bg, at, tpiba USE symm_base, ONLY : nsym, s USE wvfct, ONLY : nbnd, npwx, wg, current_k USE wavefunctions, ONLY : evc USE klist, ONLY : xk, ngk, nks USE lsda_mod, ONLY : lsda, current_spin, isk USE gvect, ONLY : g USE mp_pools, ONLY : npool, inter_pool_comm USE mp_exx, ONLY : inter_egrp_comm, intra_egrp_comm, & ibands, nibands, my_egrp_id, jblock, & egrp_pairs, max_pairs, negrp, me_egrp, & all_start, all_end, iexx_start USE mp, ONLY : mp_sum, mp_circular_shift_left USE fft_base, ONLY : dffts USE fft_interfaces, ONLY : fwfft, invfft USE uspp, ONLY : okvan ! USE exx_base, ONLY : nq1, nq2, nq3, nqs, eps, exxdiv, & x_gamma_extrapolation, on_double_grid, & grid_factor, yukawa, erfc_scrlen, & use_coulomb_vcut_ws, use_coulomb_vcut_spheric, & gau_scrlen, vcut, index_xkq, index_xk, index_sym USE exx_band, ONLY : change_data_structure, transform_evc_to_exx, & g_exx, igk_exx, nwordwfc_exx, evc_exx USE coulomb_vcut_module, ONLY : vcut_get, vcut_spheric_get ! IMPLICIT NONE ! ! ... local variables ! REAL(DP) :: exx_stress(3,3), exx_stress_(3,3) ! COMPLEX(DP),ALLOCATABLE :: tempphic(:), temppsic(:), result(:) COMPLEX(DP),ALLOCATABLE :: tempphic_nc(:,:), temppsic_nc(:,:), & result_nc(:,:) COMPLEX(DP),ALLOCATABLE :: rhoc(:) REAL(DP), ALLOCATABLE :: fac(:), fac_tens(:,:,:), fac_stress(:) INTEGER :: npw, jbnd, ibnd, ik, ikk, ig, ir, ikq, iq, isym INTEGER :: nqi, iqi, beta, nrxxs, ngm INTEGER :: ibnd_loop_start REAL(DP) :: x1, x2 REAL(DP) :: qq, xk_cryst(3), sxk(3), xkq(3), vc(3,3), x, q(3) REAL(DP) :: delta(3,3) INTEGER :: jstart, jend, ii, ipair, jblock_start, jblock_end INTEGER :: iegrp, wegrp INTEGER :: exxbuff_index ! CALL start_clock( 'exx_stress' ) ! CALL transform_evc_to_exx( 0 ) ! IF (npool>1) CALL errore( 'exx_stress', 'stress not available with pools', 1 ) IF (noncolin) CALL errore( 'exx_stress', 'noncolinear stress not implemented', 1 ) IF (okvan) CALL infomsg( 'exx_stress', 'USPP stress not tested' ) ! nrxxs = dfftt%nnr ngm = dfftt%ngm delta = RESHAPE( (/1._dp,0._dp,0._dp, 0._dp,1._dp,0._dp, 0._dp,0._dp,1._dp/), (/3,3/)) exx_stress_ = 0._dp ! ALLOCATE( tempphic(nrxxs), temppsic(nrxxs), rhoc(nrxxs), fac(ngm) ) ALLOCATE( fac_tens(3,3,ngm), fac_stress(ngm) ) ! nqi = nqs ! ! ... loop over k-points DO ikk = 1, nks current_k = ikk IF (lsda) current_spin = isk(ikk) npw = ngk(ikk) ! IF (nks > 1) CALL get_buffer( evc_exx, nwordwfc_exx, iunwfc_exx, ikk ) ! DO iqi = 1, nqi ! iq = iqi ! ikq = index_xkq(current_k,iq) ik = index_xk(ikq) isym = ABS(index_sym(ikq)) ! ! FIXME: use cryst_to_cart and company as above.. xk_cryst(:) = at(1,:)*xk(1,ik)+at(2,:)*xk(2,ik)+at(3,:)*xk(3,ik) IF (index_sym(ikq) < 0) xk_cryst = -xk_cryst sxk(:) = s(:,1,isym)*xk_cryst(1) + & s(:,2,isym)*xk_cryst(2) + & s(:,3,isym)*xk_cryst(3) xkq(:) = bg(:,1)*sxk(1) + bg(:,2)*sxk(2) + bg(:,3)*sxk(3) ! !CALL start_clock ('exxen2_ngmloop') ! !$omp parallel do default(shared), private(ig, beta, q, qq, on_double_grid, x) DO ig = 1, ngm IF (negrp == 1) THEN q(1) = xk(1,current_k) - xkq(1) + g(1,ig) q(2) = xk(2,current_k) - xkq(2) + g(2,ig) q(3) = xk(3,current_k) - xkq(3) + g(3,ig) ELSE q(1) = xk(1,current_k) - xkq(1) + g_exx(1,ig) q(2) = xk(2,current_k) - xkq(2) + g_exx(2,ig) q(3) = xk(3,current_k) - xkq(3) + g_exx(3,ig) ENDIF ! q = q * tpiba qq = ( q(1)*q(1) + q(2)*q(2) + q(3)*q(3) ) ! DO beta = 1, 3 fac_tens(1:3,beta,ig) = q(1:3)*q(beta) ENDDO ! IF (x_gamma_extrapolation) THEN on_double_grid = .TRUE. x= 0.5d0/tpiba*(q(1)*at(1,1)+q(2)*at(2,1)+q(3)*at(3,1))*nq1 on_double_grid = on_double_grid .AND. (ABS(x-NINT(x)) 1.d-8) fac(ig) = 2.d0 * fac(ig) ! ELSEIF ( use_coulomb_vcut_spheric ) THEN fac(ig) = vcut_spheric_get(vcut, q) fac_stress(ig) = 0._dp ! not implemented IF (gamma_only .AND. qq > 1.d-8) fac(ig) = 2.d0 * fac(ig) ! ELSEIF (gau_scrlen > 0) THEN fac(ig) = e2*((pi/gau_scrlen)**(1.5d0))* & EXP(-qq/4.d0/gau_scrlen) * grid_factor fac_stress(ig) = e2*2.d0/4.d0/gau_scrlen * & EXP(-qq/4.d0/gau_scrlen) * & ((pi/gau_scrlen)**(1.5d0))*grid_factor IF (gamma_only) fac(ig) = 2.d0 * fac(ig) IF (gamma_only) fac_stress(ig) = 2.d0 * fac_stress(ig) IF (on_double_grid) fac(ig) = 0._dp IF (on_double_grid) fac_stress(ig) = 0._dp ! ELSEIF (qq > 1.d-8) THEN IF ( erfc_scrlen > 0 ) THEN fac(ig)=e2*fpi/qq*(1._dp-EXP(-qq/4.d0/erfc_scrlen**2)) * grid_factor fac_stress(ig) = -e2*fpi * 2.d0/qq**2 * ( & (1._dp+qq/4.d0/erfc_scrlen**2)*EXP(-qq/4.d0/erfc_scrlen**2) - 1._dp) * & grid_factor ELSE fac(ig)=e2*fpi/( qq + yukawa ) * grid_factor fac_stress(ig) = 2.d0 * e2*fpi/(qq+yukawa)**2 * grid_factor ENDIF ! IF (gamma_only) fac(ig) = 2.d0 * fac(ig) IF (gamma_only) fac_stress(ig) = 2.d0 * fac_stress(ig) IF (on_double_grid) fac(ig) = 0._dp IF (on_double_grid) fac_stress(ig) = 0._dp ! ELSE ! fac(ig) = -exxdiv ! or rather something else (see f.gygi) fac_stress(ig) = 0._dp ! or -exxdiv_stress (not yet implemented) IF ( yukawa> 0._dp .AND. .NOT. x_gamma_extrapolation) THEN fac(ig) = fac(ig) + e2*fpi/( qq + yukawa ) fac_stress(ig) = 2.d0 * e2*fpi/(qq+yukawa)**2 ENDIF IF (erfc_scrlen > 0._dp .AND. .NOT. x_gamma_extrapolation) THEN fac(ig) = e2*fpi / (4.d0*erfc_scrlen**2) fac_stress(ig) = e2*fpi / (8.d0*erfc_scrlen**4) ENDIF ! ENDIF ! ENDDO !$omp end parallel do !CALL stop_clock ('exxen2_ngmloop') DO iegrp = 1, negrp ! ! compute the id of group whose data is currently worked on wegrp = MOD(iegrp+my_egrp_id-1, negrp)+1 ! jblock_start = all_start(wegrp) jblock_end = all_end(wegrp) ! ! loop over bands DO ii = 1, nibands(my_egrp_id+1) ! jbnd = ibands(ii,my_egrp_id+1) ! IF (jbnd==0 .OR. jbnd>nbnd) CYCLE ! !determine which j-bands to calculate jstart = 0 jend = 0 DO ipair=1, max_pairs IF (egrp_pairs(1,ipair,my_egrp_id+1).eq.jbnd)THEN IF (jstart == 0)THEN jstart = egrp_pairs(2,ipair,my_egrp_id+1) jend = jstart ELSE jend = egrp_pairs(2,ipair,my_egrp_id+1) ENDIF ENDIF ENDDO ! jstart = MAX(jstart,jblock_start) jend = MIN(jend,jblock_end) ! temppsic(:) = ( 0._dp, 0._dp ) !$omp parallel do default(shared), private(ig) DO ig = 1, npw temppsic(dfftt%nl(igk_exx(ig,ikk))) = evc_exx(ig,ii) ENDDO !$omp end parallel do ! IF (gamma_only) THEN !$omp parallel do default(shared), private(ig) DO ig = 1, npw temppsic(dfftt%nlm(igk_exx(ig,ikk))) = CONJG(evc_exx(ig,ii)) ENDDO !$omp end parallel do ENDIF ! CALL invfft( 'Wave', temppsic, dfftt ) ! IF (gamma_only) THEN ! IF (MOD(jstart,2) == 0) THEN ibnd_loop_start = jstart-1 ELSE ibnd_loop_start = jstart ENDIF ! DO ibnd = ibnd_loop_start, jend, 2 !for each band of psi ! exxbuff_index = (ibnd+1)/2-(all_start(wegrp)+1)/2+(iexx_start+1)/2 ! IF ( ibnd < jstart ) THEN x1 = 0._dp ELSE x1 = x_occupation(ibnd,ik) ENDIF ! IF ( ibnd == jend) THEN x2 = 0._dp ELSE x2 = x_occupation(ibnd+1,ik) ENDIF ! IF ( ABS(x1) < eps_occ .AND. ABS(x2) < eps_occ ) CYCLE ! ! calculate rho in real space !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs tempphic(ir) = exxbuff(ir,exxbuff_index,ikq) rhoc(ir) = CONJG(tempphic(ir))*temppsic(ir) / omega ENDDO !$omp end parallel do ! bring it to G-space CALL fwfft( 'Rho', rhoc, dfftt ) ! vc = 0._dp !$omp parallel do default(shared), private(ig), reduction(+:vc) DO ig = 1, ngm ! vc(:,:) = vc(:,:) + x1 * 0.25_dp * & ABS( rhoc(dfftt%nl(ig)) + & CONJG(rhoc(dfftt%nlm(ig))))**2 * & (fac_tens(:,:,ig)*fac_stress(ig)/2.d0 - delta(:,:)*fac(ig)) vc(:,:) = vc(:,:) + x2 * 0.25_dp * & ABS( rhoc(dfftt%nl(ig)) - & CONJG(rhoc(dfftt%nlm(ig))))**2 * & (fac_tens(:,:,ig)*fac_stress(ig)/2.d0 - delta(:,:)*fac(ig)) ENDDO !$omp end parallel do vc = vc / nqs / 4.d0 exx_stress_ = exx_stress_ + exxalfa * vc * wg(jbnd,ikk) ENDDO ! ELSE ! DO ibnd = jstart, jend !for each band of psi ! IF ( ABS(x_occupation(ibnd,ik)) < 1.d-6) CYCLE ! ! calculate rho in real space !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs tempphic(ir) = exxbuff(ir,ibnd-all_start(wegrp)+iexx_start,ikq) rhoc(ir) = CONJG(tempphic(ir))*temppsic(ir) / omega ENDDO !$omp end parallel do ! ! bring it to G-space CALL fwfft( 'Rho', rhoc, dfftt ) ! vc = 0._dp !$omp parallel do default(shared), private(ig), reduction(+:vc) DO ig = 1, ngm vc(:,:) = vc(:,:) + rhoc(dfftt%nl(ig)) * & CONJG(rhoc(dfftt%nl(ig))) * & (fac_tens(:,:,ig)*fac_stress(ig)/2.d0 - & delta(:,:)*fac(ig)) ENDDO !$omp end parallel do ! vc = vc * x_occupation(ibnd,ik) / nqs / 4.d0 exx_stress_ = exx_stress_ + exxalfa * vc * wg(jbnd,ikk) ! ENDDO ! ENDIF ! gamma or k-points ! ENDDO ! jbnd ! ! get the next nbnd/negrp data IF (negrp > 1) CALL mp_circular_shift_left( exxbuff(:,:,ikq), me_egrp, & inter_egrp_comm ) ! ENDDO ! iegrp ! ENDDO ! iqi ! ENDDO ! ikk ! DEALLOCATE( tempphic, temppsic, rhoc, fac, fac_tens, fac_stress ) ! CALL mp_sum( exx_stress_, intra_egrp_comm ) CALL mp_sum( exx_stress_, inter_egrp_comm ) CALL mp_sum( exx_stress_, inter_pool_comm ) ! exx_stress = exx_stress_ ! CALL change_data_structure( .FALSE. ) ! CALL stop_clock( 'exx_stress' ) ! END FUNCTION exx_stress ! ! !---------------------------------------------------------------------- SUBROUTINE compute_becpsi( npw_, igk_, q_, evc_exx, becpsi_k ) !---------------------------------------------------------------------- !! Calculates beta functions (Kleinman-Bylander projectors), with !! structure factor, for all atoms, in reciprocal space. !! FIXME: why so much replicated code? ! USE kinds, ONLY : DP USE ions_base, ONLY : nat, ntyp => nsp, ityp, tau USE cell_base, ONLY : tpiba, omega USE constants, ONLY : tpi USE gvect, ONLY : eigts1, eigts2, eigts3, mill, g USE wvfct, ONLY : npwx, nbnd USE us, ONLY : nqx, dq, tab, tab_d2y, spline_ps USE m_gth, ONLY : mk_ffnl_gth USE splinelib USE uspp, ONLY : nkb, nhtol, nhtolm, indv USE uspp_param, ONLY : upf, lmaxkb, nhm, nh USE becmod, ONLY : calbec USE mp_exx, ONLY : ibands, nibands, my_egrp_id ! IMPLICIT NONE ! INTEGER, INTENT(IN) :: npw_ !! number of PWs INTEGER, INTENT(IN) :: igk_(npw_) !! indices of G in the list of q+G vectors REAL(DP), INTENT(IN) :: q_(3) !! q vector (2pi/a units) COMPLEX(DP), INTENT(IN) :: evc_exx(npwx,nibands(my_egrp_id+1)) !! wavefunctions from the PW set to exx COMPLEX(DP), INTENT(OUT) :: becpsi_k(nkb,nibands(my_egrp_id+1)) !! for k points ! ! ... local variables ! COMPLEX(DP) :: vkb_(npwx,1) !beta functions (npw_ <= npwx) ! INTEGER :: i0, i1, i2, i3, ig, lm, na, nt, nb, ih, jkb ! REAL(DP) :: px, ux, vx, wx, arg REAL(DP), ALLOCATABLE :: gk(:,:), qg(:), vq(:), ylm(:,:), vkb1(:,:) ! COMPLEX(DP) :: phase, pref COMPLEX(DP), ALLOCATABLE :: sk(:) ! REAL(DP), ALLOCATABLE :: xdata(:) INTEGER :: iq INTEGER :: istart, iend ! istart = ibands(1,my_egrp_id+1) iend = ibands(nibands(my_egrp_id+1),my_egrp_id+1) ! IF (lmaxkb < 0) RETURN ! ALLOCATE( vkb1(npw_,nhm) ) ALLOCATE( sk(npw_) ) ALLOCATE( qg(npw_) ) ALLOCATE( vq(npw_) ) ALLOCATE( ylm(npw_,(lmaxkb+1)**2) ) ALLOCATE( gk(3,npw_) ) ! ! write(*,'(3i4,i5,3f10.5)') size(tab,1), size(tab,2), size(tab,3), size(vq), q_ ! DO ig = 1, npw_ gk(1,ig) = q_(1) + g(1,igk_(ig)) gk(2,ig) = q_(2) + g(2,igk_(ig)) gk(3,ig) = q_(3) + g(3,igk_(ig)) qg(ig) = gk(1,ig)**2 + gk(2,ig)**2 + gk(3,ig)**2 ENDDO ! CALL ylmr2( (lmaxkb+1)**2, npw_, gk, qg, ylm ) ! ! ... set now qg=|q+G| in atomic units ! DO ig = 1, npw_ qg(ig) = SQRT(qg(ig))*tpiba ENDDO ! IF (spline_ps) THEN ALLOCATE( xdata(nqx) ) DO iq = 1, nqx xdata(iq) = (iq - 1) * dq ENDDO ENDIF ! |beta_lm(q)> = (4pi/omega).Y_lm(q).f_l(q).(i^l).S(q) jkb = 0 ! DO nt = 1, ntyp ! ... calculate beta in G-space using an interpolation table: ! f_l(q)=\int _0 ^\infty dr r^2 f_l(r) j_l(q.r) DO nb = 1, upf(nt)%nbeta IF ( upf(nt)%is_gth ) THEN CALL mk_ffnl_gth( nt, nb, npw_, omega, qg, vq ) ELSE DO ig = 1, npw_ IF (spline_ps) THEN vq(ig) = splint(xdata, tab(:,nb,nt), tab_d2y(:,nb,nt), qg(ig)) ELSE px = qg (ig) / dq - INT(qg (ig) / dq) ux = 1.d0 - px vx = 2.d0 - px wx = 3.d0 - px i0 = INT( qg (ig) / dq ) + 1 i1 = i0 + 1 i2 = i0 + 2 i3 = i0 + 3 vq (ig) = tab (i0, nb, nt) * ux * vx * wx / 6.d0 + & tab (i1, nb, nt) * px * vx * wx / 2.d0 - & tab (i2, nb, nt) * px * ux * wx / 2.d0 + & tab (i3, nb, nt) * px * ux * vx / 6.d0 ENDIF ENDDO ENDIF ! ! ... add spherical harmonic part (Y_lm(q)*f_l(q)) DO ih = 1, nh(nt) IF (nb == indv(ih,nt)) THEN !l = nhtol (ih,nt) lm = nhtolm(ih,nt) DO ig = 1, npw_ vkb1(ig,ih) = ylm(ig,lm) * vq(ig) ENDDO ENDIF ENDDO ! ENDDO ! ! ... vkb1 contains all betas including angular part for type nt ! now add the structure factor and factor (-i)^l ! DO na = 1, nat ! ordering: first all betas for atoms of type 1 ! then all betas for atoms of type 2 and so on IF (ityp(na) == nt) THEN arg = (q_(1) * tau(1,na) + & q_(2) * tau(2,na) + & q_(3) * tau(3,na) ) * tpi phase = CMPLX(COS(arg), - SIN(arg), KIND=DP) DO ig = 1, npw_ sk (ig) = eigts1(mill(1,igk_(ig)), na) * & eigts2(mill(2,igk_(ig)), na) * & eigts3(mill(3,igk_(ig)), na) ENDDO ! DO ih = 1, nh (nt) jkb = jkb + 1 pref = (0.d0, -1.d0)**nhtol(ih,nt) * phase DO ig = 1, npw_ vkb_(ig,1) = vkb1(ig,ih) * sk(ig) * pref ENDDO ! DO ig = npw_+1, npwx vkb_(ig, 1) = (0.0_DP, 0.0_DP) ENDDO ! CALL calbec( npw_, vkb_, evc_exx, becpsi_k(jkb:jkb,:), & nibands(my_egrp_id+1) ) ENDDO ! ENDIF ! ENDDO ENDDO ! DEALLOCATE( gk ) DEALLOCATE( ylm ) DEALLOCATE( vq ) DEALLOCATE( qg ) DEALLOCATE( sk ) DEALLOCATE( vkb1 ) ! RETURN ! END SUBROUTINE compute_becpsi ! ! !----------------------------------------------------------------------------- SUBROUTINE aceinit( DoLoc, exex ) !---------------------------------------------------------------------------- !! ACE Initialization ! USE wvfct, ONLY : nbnd, npwx, current_k USE klist, ONLY : nks, xk, ngk, igk_k USE uspp, ONLY : nkb, vkb, okvan USE becmod, ONLY : allocate_bec_type, deallocate_bec_type, & bec_type, calbec USE lsda_mod, ONLY : current_spin, lsda, isk USE io_files, ONLY : nwordwfc, iunwfc USE buffers, ONLY : get_buffer USE mp_pools, ONLY : inter_pool_comm USE mp_bands, ONLY : intra_bgrp_comm USE mp, ONLY : mp_sum USE wavefunctions, ONLY : evc ! IMPLICIT NONE ! LOGICAL, INTENT(IN) :: DoLoc !! if TRUE calculates exact exchange with SCDM orbitals REAL(DP), OPTIONAL, INTENT(OUT) :: exex !! ACE energy ! ! ... local variables ! REAL(DP) :: ee, eexx INTEGER :: ik, npw TYPE(bec_type) :: becpsi ! IF (nbndproj < x_nbnd_occ .OR. nbndproj > nbnd) THEN WRITE( stdout, '(3(A,I4))' ) ' occ = ', x_nbnd_occ, ' proj = ', nbndproj, & ' tot = ', nbnd CALL errore( 'aceinit', 'n_proj must be between occ and tot.', 1 ) ENDIF ! IF (.NOT. ALLOCATED(xi)) ALLOCATE( xi(npwx*npol,nbndproj,nks) ) IF ( okvan ) CALL allocate_bec_type( nkb, nbnd, becpsi ) ! eexx = 0.0d0 xi = (0.0d0,0.0d0) ! DO ik = 1, nks npw = ngk(ik) current_k = ik IF ( lsda ) current_spin = isk(ik) IF ( nks > 1 ) CALL get_buffer( evc, nwordwfc, iunwfc, ik ) IF ( okvan ) THEN CALL init_us_2( npw, igk_k(1,ik), xk(:,ik), vkb ) CALL calbec( npw, vkb, evc, becpsi, nbnd ) ENDIF IF (gamma_only) THEN CALL aceinit_gamma( DoLoc, npw, nbnd, evc, xi(1,1,ik), becpsi, ee ) ELSE CALL aceinit_k( DoLoc, npw, nbnd, evc, xi(1,1,ik), becpsi, ee ) ENDIF eexx = eexx + ee ENDDO ! CALL mp_sum( eexx, inter_pool_comm ) ! WRITE(stdout,'(/,5X,"ACE energy",f15.8)') eexx ! IF (PRESENT(exex)) exex = eexx IF ( okvan ) CALL deallocate_bec_type( becpsi ) ! domat = .FALSE. ! END SUBROUTINE aceinit ! ! !--------------------------------------------------------------------------------- SUBROUTINE aceinit_gamma( DoLoc, nnpw, nbnd, phi, xitmp, becpsi, exxe ) !------------------------------------------------------------------------------- !! Compute xi(npw,nbndproj) for the ACE method. ! USE becmod, ONLY : bec_type USE lsda_mod, ONLY : current_spin USE mp, ONLY : mp_stop ! IMPLICIT NONE ! LOGICAL, INTENT(IN) :: DoLoc !! if TRUE calculates exact exchange with SCDM orbitals INTEGER :: nnpw !! number of pw INTEGER :: nbnd !! number of bands COMPLEX(DP) :: phi(nnpw,nbnd) !! wavefunction COMPLEX(DP) :: xitmp(nnpw,nbndproj) !! xi(npw,nbndproj) TYPE(bec_type), INTENT(IN) :: becpsi !! REAL(DP) :: exxe !! exx energy ! ! ... local variables ! INTEGER :: nrxxs REAL(DP), ALLOCATABLE :: mexx(:,:) REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 LOGICAL :: domat0 ! CALL start_clock( 'aceinit' ) ! nrxxs = dfftt%nnr * npol ! ALLOCATE( mexx(nbndproj,nbndproj) ) xitmp = (Zero,Zero) mexx = Zero ! IF ( DoLoc ) then CALL vexx_loc( nnpw, nbndproj, xitmp, mexx ) CALL MatSymm( 'S', 'L', mexx,nbndproj ) ELSE ! |xi> = Vx[phi]|phi> CALL vexx( nnpw, nnpw, nbndproj, phi, xitmp, becpsi ) ! mexx = CALL matcalc( 'exact', .TRUE., 0, nnpw, nbndproj, nbndproj, phi, xitmp, mexx, exxe ) ! |xi> = -One * Vx[phi]|phi> * rmexx^T ENDIF ! CALL aceupdate( nbndproj, nnpw, xitmp, mexx ) ! DEALLOCATE( mexx ) ! IF ( local_thr > 0.0d0 ) THEN domat0 = domat domat = .TRUE. CALL vexxace_gamma( nnpw, nbndproj, evc0(1,1,current_spin), exxe ) evc0(:,:,current_spin) = phi(:,:) domat = domat0 ENDIF ! CALL stop_clock( 'aceinit' ) ! END SUBROUTINE aceinit_gamma ! ! !---------------------------------------------------------------------------------- SUBROUTINE vexxace_gamma( nnpw, nbnd, phi, exxe, vphi ) !------------------------------------------------------------------------------- !! Do the ACE potential and (optional) print the ACE matrix representation. ! USE wvfct, ONLY : current_k, wg USE lsda_mod, ONLY : current_spin ! IMPLICIT NONE ! INTEGER :: nnpw !! number of plane waves INTEGER :: nbnd !! number of bands COMPLEX(DP) :: phi(nnpw,nbnd) !! wave function REAL(DP) :: exxe !! exx energy COMPLEX(DP), OPTIONAL :: vphi(nnpw,nbnd) !! v times phi ! ! ... local variables ! INTEGER :: i, ik REAL*8, ALLOCATABLE :: rmexx(:,:) COMPLEX(DP),ALLOCATABLE :: cmexx(:,:), vv(:,:) REAL*8, PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 ! CALL start_clock( 'vexxace' ) ! ALLOCATE( vv(nnpw,nbnd) ) ! IF (PRESENT(vphi)) THEN vv = vphi ELSE vv = (Zero,Zero) ENDIF ! ! do the ACE potential ALLOCATE( rmexx(nbndproj,nbnd), cmexx(nbndproj,nbnd) ) ! rmexx = Zero cmexx = (Zero,Zero) ! CALL matcalc( '', .FALSE. , 0, nnpw, nbndproj, nbnd, xi(1,1,current_k), & phi, rmexx, exxe ) ! |vv> = |vphi> + (-One) * |xi> * cmexx = (One,Zero)*rmexx ! CALL ZGEMM( 'N', 'N', nnpw, nbnd, nbndproj, -(One,Zero), xi(1,1,current_k), & nnpw, cmexx, nbndproj, (One,Zero), vv, nnpw ) ! DEALLOCATE( cmexx, rmexx ) ! IF (domat) THEN ALLOCATE( rmexx(nbnd,nbnd) ) CALL matcalc( 'ACE', .TRUE., 0, nnpw, nbnd, nbnd, phi, vv, rmexx, exxe ) DEALLOCATE( rmexx ) #if defined(__DEBUG) WRITE(stdout,'(4(A,I3),A,I9,A,f12.6)') 'vexxace: nbnd=', nbnd, ' nbndproj=',nbndproj, & ' k=',current_k,' spin=',current_spin,' npw=', & nnpw, ' E=',exxe ELSE WRITE(stdout,'(4(A,I3),A,I9)') 'vexxace: nbnd=', nbnd, ' nbndproj=',nbndproj, & ' k=',current_k,' spin=',current_spin,' npw=', & nnpw #endif ENDIF ! IF (PRESENT(vphi)) vphi = vv DEALLOCATE( vv ) ! CALL stop_clock( 'vexxace' ) ! END SUBROUTINE vexxace_gamma ! ! !------------------------------------------------------------------------------------------- SUBROUTINE aceupdate( nbndproj, nnpw, xitmp, rmexx ) !---------------------------------------------------------------------------------------- !! Build the ACE operator from the potential amd matrix (rmexx is assumed symmetric !! and only the Lower Triangular part is considered). ! IMPLICIT NONE ! INTEGER :: nbndproj !! number of bands INTEGER :: nnpw !! number of PW COMPLEX(DP) :: xitmp(nnpw,nbndproj) !! xi(nnpw,nbndproj) REAL(DP) :: rmexx(nbndproj,nbndproj) !! |xi> = -One * Vx[phi]|phi> * rmexx^T ! ! ... local variables ! COMPLEX(DP), ALLOCATABLE :: cmexx(:,:) REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 ! CALL start_clock( 'aceupdate' ) ! ! rmexx = -(Cholesky(rmexx))^-1 rmexx = -rmexx ! CALL invchol( nbndproj, rmexx ) CALL MatChol( nbndproj, rmexx ) CALL MatInv( 'L', nbndproj, rmexx ) ! ! |xi> = -One * Vx[phi]|phi> * rmexx^T ALLOCATE( cmexx(nbndproj,nbndproj) ) cmexx = (One,Zero)*rmexx CALL ZTRMM( 'R', 'L', 'C', 'N', nnpw, nbndproj, (One,Zero), cmexx, nbndproj, xitmp, nnpw ) ! DEALLOCATE( cmexx ) ! CALL stop_clock( 'aceupdate' ) ! END SUBROUTINE ! ! !--------------------------------------------------------------------------------------------- SUBROUTINE aceinit_k( DoLoc, nnpw, nbnd, phi, xitmp, becpsi, exxe ) !----------------------------------------------------------------------------------------- !! Compute xi(npw,nbndproj) for the ACE method. ! USE becmod, ONLY : bec_type USE wvfct, ONLY : current_k, npwx USE klist, ONLY : wk USE noncollin_module, ONLY : npol ! IMPLICIT NONE ! LOGICAL, INTENT(IN) :: DoLoc !! if TRUE calculates exact exchange with SCDM orbitals INTEGER :: nnpw !! number of PW INTEGER :: nbnd !! number of bands COMPLEX(DP) :: phi(npwx*npol,nbnd) !! wave function COMPLEX(DP) :: xitmp(npwx*npol,nbndproj) !! xi(nnpw,nbndproj) TYPE(bec_type), INTENT(IN) :: becpsi !! REAL(DP) :: exxe !! exx energy ! ! ... local variables ! COMPLEX(DP), ALLOCATABLE :: mexx(:,:) REAL(DP) :: exxe0 REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 INTEGER :: i LOGICAL :: domat0 ! CALL start_clock( 'aceinit' ) ! IF (nbndproj>nbnd) CALL errore( 'aceinit_k', 'nbndproj greater than nbnd.', 1 ) IF (nbndproj<=0) CALL errore( 'aceinit_k', 'nbndproj le 0.', 1 ) ! ALLOCATE( mexx(nbndproj,nbndproj) ) xitmp = (Zero,Zero) mexx = (Zero,Zero) IF ( DoLoc ) THEN CALL vexx_loc_k( nnpw, nbndproj, xitmp, mexx, exxe ) CALL MatSymm_k( 'S', 'L', mexx, nbndproj ) ELSE ! |xi> = Vx[phi]|phi> CALL vexx( npwx, nnpw, nbndproj, phi, xitmp, becpsi ) ! mexx = CALL matcalc_k( 'exact', .TRUE., 0, current_k, npwx*npol, nbndproj, nbndproj, & phi, xitmp, mexx, exxe ) ENDIF #if defined(__DEBUG) WRITE( stdout,'(3(A,I3),A,I9,A,f12.6)') 'aceinit_k: nbnd=', nbnd, ' nbndproj=',nbndproj, & ' k=',current_k,' npw=',nnpw,' Ex(k)=',exxe #endif ! Skip k-points that have exactly zero weight IF(wk(current_k)/=0._dp)THEN ! |xi> = -One * Vx[phi]|phi> * rmexx^T CALL aceupdate_k( nbndproj, nnpw, xitmp, mexx ) ENDIF ! DEALLOCATE( mexx ) ! IF ( DoLoc ) THEN domat0 = domat domat = .TRUE. CALL vexxace_k( nnpw, nbnd, evc0(1,1,current_k), exxe ) evc0(:,:,current_k) = phi(:,:) domat = domat0 ENDIF ! CALL stop_clock( 'aceinit' ) ! END SUBROUTINE aceinit_k ! ! !------------------------------------------------------------------------------ SUBROUTINE aceupdate_k( nbndproj, nnpw, xitmp, mexx ) !---------------------------------------------------------------------------- !! Updates xi(npw,nbndproj) for the ACE method. ! USE wvfct, ONLY : npwx USE noncollin_module, ONLY : noncolin, npol ! IMPLICIT NONE ! INTEGER :: nbndproj !! number of bands INTEGER :: nnpw !! number of PW COMPLEX(DP) :: mexx(nbndproj,nbndproj) !! mexx = -(Cholesky(mexx))^-1 COMPLEX(DP) :: xitmp(npwx*npol,nbndproj) !! |xi> = -One * Vx[phi]|phi> * mexx^T ! CALL start_clock( 'aceupdate' ) ! ! mexx = -(Cholesky(mexx))^-1 mexx = -mexx CALL invchol_k( nbndproj, mexx ) ! ! |xi> = -One * Vx[phi]|phi> * mexx^T CALL ZTRMM( 'R', 'L', 'C', 'N', npwx*npol, nbndproj, (1.0_dp,0.0_dp), mexx,nbndproj, & xitmp, npwx*npol ) ! CALL stop_clock( 'aceupdate' ) ! END SUBROUTINE aceupdate_k ! ! !-------------------------------------------------------------------------------------- SUBROUTINE vexxace_k( nnpw, nbnd, phi, exxe, vphi ) !----------------------------------------------------------------------------------- !! Do the ACE potential and (optional) print the ACE matrix representation. ! USE becmod, ONLY : calbec USE wvfct, ONLY : current_k, npwx USE noncollin_module, ONLY : npol ! IMPLICIT NONE ! REAL(DP) :: exxe !! exx energy INTEGER :: nnpw !! number of PW INTEGER :: nbnd !! number of bands COMPLEX(DP) :: phi(npwx*npol,nbnd) !! wave function COMPLEX(DP), OPTIONAL :: vphi(npwx*npol,nbnd) !! ACE potential ! ! ... local variables ! INTEGER :: i COMPLEX(DP), ALLOCATABLE :: cmexx(:,:), vv(:,:) REAL*8, PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0, Pt5=0.50d0 ! CALL start_clock( 'vexxace' ) ! ALLOCATE( vv(npwx*npol,nbnd) ) IF (PRESENT(vphi)) THEN vv = vphi ELSE vv = (Zero, Zero) ENDIF ! ! do the ACE potential! ALLOCATE( cmexx(nbndproj,nbnd) ) cmexx = (Zero,Zero) ! CALL matcalc_k( '', .FALSE., 0, current_k, npwx*npol, nbndproj, nbnd, & xi(1,1,current_k), phi, cmexx, exxe ) ! ! |vv> = |vphi> + (-One) * |xi> * ! CALL ZGEMM( 'N', 'N', npwx*npol, nbnd, nbndproj, -(One,Zero), xi(1,1,current_k), & npwx*npol, cmexx, nbndproj, (One,Zero), vv, npwx*npol ) ! IF (domat) THEN ! IF ( nbndproj /= nbnd) THEN DEALLOCATE( cmexx ) ALLOCATE( cmexx(nbnd,nbnd) ) ENDIF ! CALL matcalc_k( 'ACE', .TRUE., 0, current_k, npwx*npol, nbnd, nbnd, phi, vv, cmexx, exxe ) ! #if defined(__DEBUG) WRITE(stdout,'(3(A,I3),A,I9,A,f12.6)') 'vexxace_k: nbnd=', nbnd, ' nbndproj=',nbndproj, & ' k=',current_k,' npw=',nnpw, ' Ex(k)=',exxe ELSE WRITE(stdout,'(3(A,I3),A,I9)') 'vexxace_k: nbnd=', nbnd, ' nbndproj=',nbndproj, & ' k=',current_k,' npw=',nnpw #endif ENDIF ! IF (PRESENT(vphi)) vphi = vv DEALLOCATE( vv, cmexx ) ! CALL stop_clock( 'vexxace' ) ! END SUBROUTINE vexxace_k ! ! !--------------------------------------------------------------------------------- SUBROUTINE vexx_loc( npw, nbnd, hpsi, mexx ) !--------------------------------------------------------------------------------- !! Exact exchange with SCDM orbitals. !! Vx|phi> = Vx|psi> ^(-1) . !! locmat contains localization integrals. ! USE noncollin_module, ONLY : npol USE cell_base, ONLY : omega, alat USE wvfct, ONLY : current_k USE klist, ONLY : xk, nks, nkstot USE fft_interfaces, ONLY : fwfft, invfft USE mp, ONLY : mp_stop, mp_barrier, mp_sum USE mp_bands, ONLY : intra_bgrp_comm, me_bgrp, nproc_bgrp USE exx_base, ONLY : nqs, xkq_collect, index_xkq, index_xk, & g2_convolution ! IMPLICIT NONE ! INTEGER :: npw !! number of PW INTEGER :: nbnd !! number of bands COMPLEX(DP) :: hpsi(npw,nbnd) !! hpsi REAL(DP) :: mexx(nbnd,nbnd) !! mexx contains in output the exchange matrix ! ! ... local variables ! INTEGER :: nrxxs, npairs, ntot, NBands INTEGER :: ig, ir, ik, ikq, iq, ibnd, jbnd, kbnd, NQR INTEGER :: current_ik REAL(DP) :: exxe COMPLEX(DP), ALLOCATABLE :: rhoc(:), vc(:), RESULT(:,:) REAL(DP), ALLOCATABLE :: fac(:) REAL(DP) :: xkp(3), xkq(3) INTEGER, EXTERNAL :: global_kpoint_index ! WRITE( stdout, '(5X,A)' ) ' ' WRITE( stdout, '(5X,A)' ) 'Exact-exchange with localized orbitals' ! CALL start_clock( 'vexxloc' ) ! WRITE( stdout,'(7X,A,f24.12)' ) 'local_thr =', local_thr nrxxs = dfftt%nnr NQR = nrxxs*npol ! ! ... exchange projected onto localized orbitals WRITE( stdout,'(A)' ) 'Allocating exx quantities...' ALLOCATE( fac(dfftt%ngm) ) ALLOCATE( rhoc(nrxxs), vc(NQR) ) ALLOCATE( RESULT(nrxxs,nbnd) ) WRITE( stdout,'(A)' ) 'Allocations done.' ! current_ik = global_kpoint_index( nkstot, current_k ) xkp = xk(:,current_k) ! vc = (0.0d0, 0.0d0) npairs = 0 ! DO iq = 1, nqs ikq = index_xkq(current_ik,iq) ik = index_xk(ikq) xkq = xkq_collect(:,ikq) ! CALL g2_convolution( dfftt%ngm, gt, xkp, xkq, fac ) ! RESULT = (0.0d0, 0.0d0) ! DO ibnd = 1, nbnd ! IF (x_occupation(ibnd,ikq) > 0.0d0) THEN ! DO ir = 1, NQR rhoc(ir) = locbuff(ir,ibnd,ikq) * locbuff(ir,ibnd,ikq) / omega ENDDO ! CALL fwfft( 'Rho', rhoc, dfftt ) ! vc = (0.0d0, 0.0d0) DO ig = 1, dfftt%ngm vc(dfftt%nl(ig)) = fac(ig) * rhoc(dfftt%nl(ig)) vc(dfftt%nlm(ig)) = fac(ig) * rhoc(dfftt%nlm(ig)) ENDDO ! CALL invfft( 'Rho', vc, dfftt ) ! DO ir = 1, NQR RESULT(ir,ibnd) = RESULT(ir,ibnd) + locbuff(ir,ibnd,ikq) * vc(ir) ENDDO ! ENDIF ! DO kbnd = 1, ibnd-1 IF ( (locmat(ibnd,kbnd,ikq) > local_thr) .AND. & ( (x_occupation(ibnd,ikq) > 0.0d0) .OR. & (x_occupation(kbnd,ikq) > 0.0d0) ) ) THEN ! !write(stdout,'(3I4,3f12.6,A)') ikq, ibnd, kbnd, x_occupation(ibnd,ikq), & ! x_occupation(kbnd,ikq), locmat(ibnd,kbnd,ikq), ' IN ' ! DO ir = 1, NQR rhoc(ir) = locbuff(ir,ibnd,ikq) * locbuff(ir,kbnd,ikq) / omega ENDDO ! npairs = npairs + 1 ! CALL fwfft( 'Rho', rhoc, dfftt ) ! vc = (0.0d0, 0.0d0) ! DO ig = 1, dfftt%ngm vc(dfftt%nl(ig)) = fac(ig) * rhoc(dfftt%nl(ig)) vc(dfftt%nlm(ig)) = fac(ig) * rhoc(dfftt%nlm(ig)) ENDDO ! CALL invfft( 'Rho', vc, dfftt ) ! DO ir = 1, NQR RESULT(ir,kbnd) = RESULT(ir,kbnd) + x_occupation(ibnd,ikq) * locbuff(ir,ibnd,ikq) * vc(ir) ENDDO ! DO ir = 1, NQR RESULT(ir,ibnd) = RESULT(ir,ibnd) + x_occupation(kbnd,ikq) * locbuff(ir,kbnd,ikq) * vc(ir) ENDDO ! ELSE ! write(stdout,'(3I4,3f12.6,A)') ikq, ibnd, kbnd, x_occupation(ibnd,ikq), & ! x_occupation(kbnd,ikq), locmat(ibnd,kbnd,ikq), ' OUT ' ENDIF ! ENDDO ! ENDDO ! DO jbnd = 1, nbnd ! CALL fwfft( 'Wave', RESULT(:,jbnd), dfftt ) ! DO ig = 1, npw hpsi(ig,jbnd) = hpsi(ig,jbnd) - exxalfa*RESULT(dfftt%nl(ig),jbnd) ENDDO ! ENDDO ! ENDDO ! DEALLOCATE( fac, vc ) DEALLOCATE( RESULT ) ! ! ... localized functions to G-space and exchange matrix onto localized functions ALLOCATE( RESULT(npw,nbnd) ) RESULT = (0.0d0,0.0d0) ! DO jbnd = 1, nbnd rhoc(:) = DBLE(locbuff(:,jbnd,ikq)) + (0.0d0,1.0d0)*0.0d0 CALL fwfft( 'Wave' , rhoc, dfftt ) DO ig = 1, npw RESULT(ig,jbnd) = rhoc(dfftt%nl(ig)) ENDDO ENDDO ! DEALLOCATE( rhoc ) ! CALL matcalc( 'M1-', .TRUE., 0, npw, nbnd, nbnd, RESULT, hpsi, mexx, exxe ) ! DEALLOCATE( RESULT ) ! NBands = INT(SUM(x_occupation(:,ikq))) ntot = NBands * (NBands-1)/2 + NBands * (nbnd-NBands) WRITE( stdout,'(7X,2(A,I12),A,f12.2)') ' Pairs(full): ', ntot, & ' Pairs(included): ', npairs, & ' Pairs(%): ', DBLE(npairs)/DBLE(ntot)*100.0d0 ! CALL stop_clock( 'vexxloc' ) ! END SUBROUTINE vexx_loc ! ! !---------------------------------------------------------------------------------------------- SUBROUTINE compute_density( DoPrint, Shift, CenterPBC, SpreadPBC, Overlap, PsiI, PsiJ, NQR, & ibnd, jbnd ) !------------------------------------------------------------------------------------------- !! Manipulate density: get pair density, center, spread, absolute overlap. !! Shift: !! .FALSE. refer the centers to the cell -L/2 ... +L/2; !! .TRUE. shift the centers to the cell 0 ... L (presumably the one given in input). ! USE constants, ONLY : pi, bohr_radius_angs USE cell_base, ONLY : alat, omega USE mp, ONLY : mp_sum USE mp_bands, ONLY : intra_bgrp_comm USE fft_types, ONLY : fft_index_to_3d ! IMPLICIT NONE ! LOGICAL, INTENT(IN) :: DoPrint !! whether to print or not the quantities LOGICAL, INTENT(IN) :: Shift !! .FALSE. Centers with respect to the minimum image cell convention; !! .TRUE. Centers shifted to the input cell. REAL(DP), INTENT(OUT) :: CenterPBC(3) REAL(DP), INTENT(OUT) :: SpreadPBC(3) REAL(DP), INTENT(OUT) :: Overlap INTEGER, INTENT(IN) :: NQR REAL(DP), INTENT(IN) :: PsiI(NQR) REAL(DP), INTENT(IN) :: PsiJ(NQR) INTEGER, INTENT(IN) :: ibnd INTEGER, INTENT(IN) :: jbnd ! ! ... local variables ! REAL(DP) :: vol, rbuff, TotSpread INTEGER :: ir, i, j, k LOGICAL :: offrange COMPLEX(DP) :: cbuff(3) REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0 ! vol = omega / DBLE(dfftt%nr1 * dfftt%nr2 * dfftt%nr3) ! CenterPBC = Zero SpreadPBC = Zero Overlap = Zero cbuff = (Zero,Zero) rbuff = Zero ! DO ir = 1, dfftt%nr1x*dfftt%my_nr2p*dfftt%my_nr3p ! ! ... three dimensional indexes ! CALL fft_index_to_3d (ir, dfftt, i,j,k, offrange) IF ( offrange ) CYCLE ! rbuff = PsiI(ir) * PsiJ(ir) / omega Overlap = Overlap + ABS(rbuff)*vol cbuff(1) = cbuff(1) + rbuff*EXP((Zero,One)*Two*pi*DBLE(i)/DBLE(dfftt%nr1))*vol cbuff(2) = cbuff(2) + rbuff*EXP((Zero,One)*Two*pi*DBLE(j)/DBLE(dfftt%nr2))*vol cbuff(3) = cbuff(3) + rbuff*EXP((Zero,One)*Two*pi*DBLE(k)/DBLE(dfftt%nr3))*vol ENDDO ! CALL mp_sum( cbuff, intra_bgrp_comm ) CALL mp_sum( Overlap, intra_bgrp_comm ) ! CenterPBC(1) = alat/Two/pi*AIMAG( LOG(cbuff(1)) ) CenterPBC(2) = alat/Two/pi*AIMAG( LOG(cbuff(2)) ) CenterPBC(3) = alat/Two/pi*AIMAG( LOG(cbuff(3)) ) ! IF (Shift) THEN IF (CenterPBC(1) < Zero) CenterPBC(1) = CenterPBC(1) + alat IF (CenterPBC(2) < Zero) CenterPBC(2) = CenterPBC(2) + alat IF (CenterPBC(3) < Zero) CenterPBC(3) = CenterPBC(3) + alat ENDIF ! rbuff = DBLE(cbuff(1))**2 + AIMAG(cbuff(1))**2 SpreadPBC(1) = -(alat/Two/pi)**2 * DLOG(rbuff) rbuff = DBLE(cbuff(2))**2 + AIMAG(cbuff(2))**2 SpreadPBC(2) = -(alat/Two/pi)**2 * DLOG(rbuff) rbuff = DBLE(cbuff(3))**2 + AIMAG(cbuff(3))**2 SpreadPBC(3) = -(alat/Two/pi)**2 * DLOG(rbuff) TotSpread = (SpreadPBC(1) + SpreadPBC(2) + SpreadPBC(3))*bohr_radius_angs**2 ! IF (DoPrint) THEN WRITE(stdout,'(A,2I4)') 'MOs: ', ibnd, jbnd WRITE(stdout,'(A,10f12.6)') 'Absolute Overlap: ', Overlap WRITE(stdout,'(A,10f12.6)') 'Center(PBC)[A]: ', CenterPBC(1)*bohr_radius_angs, & CenterPBC(2)*bohr_radius_angs, CenterPBC(3)*bohr_radius_angs WRITE(stdout,'(A,10f12.6)') 'Spread [A**2]: ', SpreadPBC(1)*bohr_radius_angs**2, & SpreadPBC(2)*bohr_radius_angs**2, SpreadPBC(3)*bohr_radius_angs**2 WRITE(stdout,'(A,10f12.6)') 'Total Spread [A**2]: ', TotSpread ENDIF ! IF (TotSpread < Zero) CALL errore( 'compute_density', 'Negative spread found', 1 ) ! END SUBROUTINE compute_density ! ! !------------------------------------------------------------------------------------ SUBROUTINE compute_density_k( DoPrint, Shift, CenterPBC, SpreadPBC, Overlap, PsiI, & PsiJ, NQR, ibnd, jbnd ) !---------------------------------------------------------------------------------- !! Manipulate density: get pair density, center, spread, absolute overlap. !! Shift: !! .FALSE. refer the centers to the cell -L/2 ... +L/2 !! .TRUE. shift the centers to the cell 0 ... L (presumably the !! one given in input ) ! USE constants, ONLY : pi, bohr_radius_angs USE cell_base, ONLY : alat, omega USE mp, ONLY : mp_sum USE mp_bands, ONLY : intra_bgrp_comm USE fft_types, ONLY : fft_index_to_3d ! IMPLICIT NONE ! LOGICAL, INTENT(IN) :: DoPrint !! whether to print or not the quantities LOGICAL, INTENT(IN) :: Shift !! .FALSE. Centers with respect to the minimum image cell convention !! .TRUE. Centers shifted to the input cell REAL(DP), INTENT(OUT) :: CenterPBC(3) !! Coordinates of the center REAL(DP), INTENT(OUT) :: SpreadPBC(3) REAL(DP), INTENT(OUT) :: Overlap INTEGER, INTENT(IN) :: NQR COMPLEX(DP), INTENT(IN) :: PsiI(NQR) COMPLEX(DP), INTENT(IN) :: PsiJ(NQR) INTEGER, INTENT(IN) :: ibnd INTEGER, INTENT(IN) :: jbnd ! ! ... local variables ! REAL(DP) :: vol, TotSpread, rbuff INTEGER :: ir, i, j, k LOGICAL :: offrange COMPLEX(DP) :: cbuff(3) REAL(DP), PARAMETER :: Zero=0.0d0, One=1.0d0, Two=2.0d0 ! vol = omega / DBLE(dfftt%nr1 * dfftt%nr2 * dfftt%nr3) ! CenterPBC = Zero SpreadPBC = Zero Overlap = Zero cbuff = (Zero, Zero) rbuff = Zero ! DO ir = 1, dfftt%nr1x*dfftt%my_nr2p*dfftt%my_nr3p ! ! ... three dimensional indexes ! CALL fft_index_to_3d (ir, dfftt, i,j,k, offrange) IF ( offrange ) CYCLE ! rbuff = ABS(PsiI(ir) * CONJG(PsiJ(ir)) / omega ) Overlap = Overlap + rbuff*vol cbuff(1) = cbuff(1) + rbuff*EXP((Zero,One)*Two*pi*DBLE(i)/DBLE(dfftt%nr1))*vol cbuff(2) = cbuff(2) + rbuff*EXP((Zero,One)*Two*pi*DBLE(j)/DBLE(dfftt%nr2))*vol cbuff(3) = cbuff(3) + rbuff*EXP((Zero,One)*Two*pi*DBLE(k)/DBLE(dfftt%nr3))*vol ENDDO ! CALL mp_sum( cbuff, intra_bgrp_comm ) CALL mp_sum( Overlap, intra_bgrp_comm ) ! CenterPBC(1) = alat/Two/pi*AIMAG(LOG(cbuff(1))) CenterPBC(2) = alat/Two/pi*AIMAG(LOG(cbuff(2))) CenterPBC(3) = alat/Two/pi*AIMAG(LOG(cbuff(3))) ! IF (Shift) THEN IF (CenterPBC(1) < Zero) CenterPBC(1) = CenterPBC(1) + alat IF (CenterPBC(2) < Zero) CenterPBC(2) = CenterPBC(2) + alat IF (CenterPBC(3) < Zero) CenterPBC(3) = CenterPBC(3) + alat ENDIF ! rbuff = DBLE(cbuff(1))**2 + AIMAG(cbuff(1))**2 SpreadPBC(1) = -(alat/Two/pi)**2 * LOG(rbuff) rbuff = DBLE(cbuff(2))**2 + AIMAG(cbuff(2))**2 SpreadPBC(2) = -(alat/Two/pi)**2 * LOG(rbuff) rbuff = DBLE(cbuff(3))**2 + AIMAG(cbuff(3))**2 SpreadPBC(3) = -(alat/Two/pi)**2 * LOG(rbuff) TotSpread = (SpreadPBC(1) + SpreadPBC(2) + SpreadPBC(3))*bohr_radius_angs**2 ! IF (DoPrint) THEN WRITE(stdout,'(A,2I4)') 'MOs: ', ibnd, jbnd WRITE(stdout,'(A,10f12.6)') 'Absolute Overlap: ', Overlap WRITE(stdout,'(A,10f12.6)') 'Center(PBC)[A]: ', CenterPBC(1)*bohr_radius_angs, & CenterPBC(2)*bohr_radius_angs, CenterPBC(3)*bohr_radius_angs WRITE(stdout,'(A,10f12.6)') 'Spread [A**2]: ', SpreadPBC(1)*bohr_radius_angs**2, & SpreadPBC(2)*bohr_radius_angs**2, SpreadPBC(3)*bohr_radius_angs**2 WRITE(stdout,'(A,10f12.6)') 'Total Spread [A**2]: ', TotSpread ENDIF ! IF (TotSpread < Zero) CALL errore( 'compute_density_k', 'Negative spread found', 1 ) ! END SUBROUTINE compute_density_k ! ! !------------------------------------------------------------------------ SUBROUTINE vexx_loc_k( npw, NBands, hpsi, mexx, exxe ) !----------------------------------------------------------------------- !! Generic, k-point version of \(\texttt{vexx}\). ! USE cell_base, ONLY : omega USE gvect, ONLY : ngm, g USE wvfct, ONLY : current_k, npwx USE klist, ONLY : xk, nks, nkstot USE fft_interfaces, ONLY : fwfft, invfft USE exx_base, ONLY : index_xkq, nqs, index_xk, xkq_collect, & g2_convolution USE exx_band, ONLY : igk_exx ! IMPLICIT NONE ! INTEGER :: npw !! number of PW INTEGER :: NBands !! number of bands COMPLEX(DP) :: hpsi(npwx*npol,NBands) !! h psi COMPLEX(DP) :: mexx(NBands,NBands) !! mexx contains in output the exchange matrix REAL(DP) :: exxe !! exx energy ! ! ... local variables ! COMPLEX(DP), ALLOCATABLE :: RESULT(:), RESULT2(:,:) COMPLEX(DP), ALLOCATABLE :: rhoc(:), vc(:) REAL(DP), ALLOCATABLE :: fac(:) INTEGER :: ibnd, jbnd, ik, ikq, iq INTEGER :: ir, ig, NBin, NBtot INTEGER :: current_ik, current_jk INTEGER :: nrxxs REAL(DP) :: xkp(3) REAL(DP) :: xkq(3) ! INTEGER, EXTERNAL :: global_kpoint_index ! CALL start_clock( 'vexxloc' ) ! ALLOCATE( fac(dfftt%ngm) ) ! nrxxs = dfftt%nnr ! ALLOCATE( RESULT(nrxxs) ) ALLOCATE( rhoc(nrxxs), vc(nrxxs) ) ! current_ik = global_kpoint_index ( nkstot, current_k ) current_jk = index_xkq(current_ik,1) ! NBin = 0 NBtot = 0 xkp = xk(:,current_k) DO jbnd = 1, NBands RESULT = (0.0_DP, 0.0_DP) DO iq = 1, nqs ikq = index_xkq(current_ik,iq) ik = index_xk(ikq) xkq = xkq_collect(:,ikq) CALL g2_convolution( dfftt%ngm, gt, xkp, xkq, fac ) DO ibnd = 1, NBands ! IF ( abs(x_occupation(ibnd,ik)) < eps_occ) CYCLE ! NBtot = NBtot + 1 IF ((exxmat(ibnd,ikq,jbnd,current_k) > local_thr).AND. & ((x_occupation(ibnd,ik) > eps_occ))) THEN NBin = NBin + 1 ! ! write(stdout,'(4I4,f12.6,A)') ibnd, ikq, jbnd, current_k, exxmat(ibnd,ikq,jbnd,current_k), ' IN ' !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs rhoc(ir)=CONJG(exxbuff(ir,ibnd,ikq))*exxbuff(ir,jbnd,current_jk) / omega ENDDO !$omp end parallel do CALL fwfft( 'Rho', rhoc, dfftt ) vc = (0._DP, 0._DP) !$omp parallel do default(shared), private(ig) DO ig = 1, dfftt%ngm vc(dfftt%nl(ig)) = & fac(ig) * rhoc(dfftt%nl(ig)) * x_occupation(ibnd,ik) / nqs ENDDO !$omp end parallel do CALL invfft( 'Rho', vc, dfftt ) !$omp parallel do default(shared), private(ir) DO ir = 1, nrxxs RESULT(ir) = RESULT(ir) + vc(ir)*exxbuff(ir,ibnd,ikq) ENDDO !$omp end parallel do ! ELSE ! write(stdout,'(4I4,f12.6,A)') ibnd, ikq, jbnd, current_k, exxmat(ibnd,ikq,jbnd,current_k), ' OUT' ENDIF ENDDO ENDDO ! CALL fwfft( 'Wave', RESULT, dfftt ) !$omp parallel do default(shared), private(ig) DO ig = 1, npw hpsi(ig,jbnd) = hpsi(ig,jbnd) - exxalfa*RESULT(dfftt%nl(igk_exx(ig,current_k))) ENDDO !$omp end parallel do ENDDO ! DEALLOCATE( RESULT ) DEALLOCATE( vc, fac ) ! ! ... Localized functions to G-space and exchange matrix onto localized functions ALLOCATE( RESULT2(npwx,NBands) ) RESULT2 = (0.0d0,0.0d0) ! DO jbnd = 1, NBands rhoc(:) = exxbuff(:,jbnd,current_jk) CALL fwfft( 'Wave' , rhoc, dfftt ) DO ig = 1, npw RESULT2(ig,jbnd) = rhoc(dfftt%nl(igk_exx(ig,current_k))) ENDDO ENDDO ! DEALLOCATE( rhoc ) CALL matcalc_k( 'M1-', .TRUE., 0, current_k, npwx*npol, NBands, NBands, RESULT2, hpsi, mexx, exxe ) DEALLOCATE( RESULT2 ) ! WRITE(stdout,'(7X,2(A,I12),A,f12.2)') ' Pairs(full): ', NBtot, & ' Pairs(included): ', NBin, & ' Pairs(%): ', DBLE(NBin)/DBLE(NBtot)*100.0d0 ! CALL stop_clock( 'vexxloc' ) ! END SUBROUTINE vexx_loc_k ! ! END MODULE exx