1!=========================================================================== 2! 3! Routines() 4! 5! (1) mtxel() Originally By (?) Last Modified 5/6/2012 (FHJ) 6! 7! Compute matrix elements (gme) for valence state iv with all 8! conduction bands and for all G-vectors. 9! 10! <c,k,ispin|exp{-i(q+G).r}|v,k+q,ispin> = [M_{vc}(k,q,G)]^* 11! 12! On exit, 13! pol%eden(band,spin) = 1/(e_val-e_cond) = energy denominators 14! pol%gme(band,g-vector,spin) = plane wave matrix elements 15! pol%isrtx orders the |G(i)|^2 i=1,pol%nmtx 16! vwfn%isort orders |qk+g|^2 (in vwfn type) 17! 18! energies are apparently assumed in Rydbergs. 19! 20!=========================================================================== 21 22#include "f_defs.h" 23 24module mtxel_m 25 26 use global_m 27 use fftw_m 28 use misc_m 29 use lin_denominator_m 30 use timing_m, only: timing => epsilon_timing 31 32 implicit none 33 34 private 35 36 public :: mtxel_init_FFT_cond, mtxel_free_FFT_cond, mtxel 37 38contains 39 40!> Precalculates the FFTs for all conduction bands 41subroutine mtxel_init_FFT_cond(gvec,pol,cwfn,kp) 42 type (gspace), intent(in) :: gvec 43 type (polarizability), intent(inout) :: pol 44 type (conduction_wfns), intent(inout) :: cwfn 45 type (kpoints), intent(inout) :: kp 46 47 real(DP) :: scale 48 integer, dimension(3) :: Nfft 49 integer :: j, offset_g, jsp, ict 50 51 PUSH_SUB(mtxel_init_FFT_cond) 52 53 if (kp%nspinor>1) then 54 call die('Epsilon on Steroids only supports one spin at the moment.') 55 endif 56 57 if( peinf%inode == 0 ) call timing%start(timing%opt_fft) 58 if( peinf%inode == 0 ) call timing%start(timing%opt_fft_fft) 59 call setup_FFT_sizes(pol%FFTgrid,Nfft,scale) 60 SAFE_ALLOCATE(cwfn%wfn_fft, (Nfft(1),Nfft(2),Nfft(3),peinf%ncownactual)) 61 62 jsp = 1 63! JRD XXX We should we be doing a many_fft call here or what is the point of allocating a bigger array 64 do j=1,peinf%ncownactual 65 offset_g = (j-1)*cwfn%ngc 66 67 call put_into_fftbox(cwfn%ngc,cwfn%zc(offset_g+1:,jsp),gvec%components,cwfn%isort,cwfn%wfn_fft(:,:,:,j),Nfft) 68 69 call do_FFT(cwfn%wfn_fft(:,:,:,j),Nfft,1) 70 enddo 71 72 if ( peinf%inode == 0 ) call timing%stop(timing%opt_fft) 73 if ( peinf%inode == 0 ) call timing%stop(timing%opt_fft_fft) 74 75 POP_SUB(mtxel_init_FFT_cond) 76 return 77 78end subroutine mtxel_init_FFT_cond 79 80!> Frees ffts_cond buffer 81subroutine mtxel_free_FFT_cond(cwfn) 82 type (conduction_wfns), intent(inout) :: cwfn 83 84 PUSH_SUB(mtxel_free_FFT_cond) 85 86 SAFE_DEALLOCATE_P(cwfn%wfn_fft) 87 88 POP_SUB(mtxel_free_FFT_cond) 89 return 90 91end subroutine mtxel_free_FFT_cond 92 93subroutine mtxel(iv,gvec,vwfn,cwfn,pol,ispin,irk,kp,kpq,rank_mtxel,kfact) 94 integer, intent(in) :: iv 95 type (gspace), intent(in) :: gvec 96 type (valence_wfns), intent(in) :: vwfn 97 type (conduction_wfns), intent(in) :: cwfn 98 type (polarizability), intent(inout) :: pol 99 type (kpoints), intent(inout) :: kp,kpq 100 integer, intent(in) :: ispin,irk 101 integer, intent(in) :: rank_mtxel 102 real(DP), intent(in) :: kfact 103 104 integer :: ic_loc, ic, ic_FE, offset_g, jsp, ig, iv_loc 105 integer :: freq_idx 106 integer :: ia, ib, ict 107 108 integer, dimension(3) :: Nfft 109 real(DP) :: scale, eden 110 complex(DPC), dimension(:,:,:), allocatable :: fftbox1,fftbox2 111 SCALAR, dimension(:), allocatable :: tmparray 112 logical :: keep_transition 113 114 PUSH_SUB(mtxel) 115 116 if(peinf%inode.eq.0) call timing%start(timing%mtxel_fft) 117 118 ! FHJ: notation: 119 ! iv_loc -> local valence band index 120 ! iv -> global valence band index (iv==1 := lowest-energy band) 121 ! ic_loc -> local conduction band index 122 ! ic -> global conduction band (ic==nv+1 := lowest-energy unocc. band) 123 ! ic_FE -> global conduction band, but starting at FE 124 ! (ic_FE==1 := first cond band). Rarely used. 125 126 iv_loc = peinf%indexv(iv) 127 128 if(pol%nfreq_group .gt. 1 .and. pol%gcomm .eq. 0) then 129 freq_idx = 1 130 else 131 freq_idx = rank_mtxel+1 132 endif 133 134!-------------------------- 135! Use FFTs to calculate matrix elements 136 137! Compute size of FFT box we need 138 139 call setup_FFT_sizes(pol%FFTgrid,Nfft,scale) 140! Allocate FFT boxes 141 SAFE_ALLOCATE(fftbox2, (Nfft(1),Nfft(2),Nfft(3))) 142 143! Put the data for valence band iv into FFT box 1 and do the FFT 144 145 if (pol%os_opt_ffts/=2) then 146 SAFE_ALLOCATE(fftbox1, (Nfft(1),Nfft(2),Nfft(3))) 147 endif 148 149! JRD XXX This needs to be threaded 150!$OMP PARALLEL DO collapse(2) 151 do ic_loc = 1, peinf%ncownactual 152 do ig = 1, pol%nmtx 153 pol%gme(ig, ic_loc, iv_loc, ispin, irk, freq_idx) = ZERO 154 enddo 155 enddo 156 157 SAFE_ALLOCATE(tmparray, (pol%nmtx)) 158 159 do jsp=ispin,ispin*kp%nspinor 160 161 if (pol%os_opt_ffts/=2) then 162 call put_into_fftbox(vwfn%ngv,vwfn%zv(:,jsp),gvec%components,vwfn%isort,fftbox1,Nfft) 163 call do_FFT(fftbox1,Nfft,1) 164 ! We need the complex conjugate of u_{vk+q)(r) for the cross correlation 165 call conjg_fftbox(fftbox1,Nfft) 166 endif 167 168! Now we loop over the conduction states and get the matrix elements: 169! 1. Get conduction wave function and put it into box 2, 170! 2. do FFT, get u_{ck}(r) 171! 3. multiply by box1 contents, get F(r) = [u_{vk+q)(r)]^* u_{ck}(r) 172! 4. do FFT again, and extract the resulting matrix elements and put the into pol 173! We conjugate the final result since we really want <ck|e^{-i(q+G).r}|vk+q> 174! but we have calculated <vk+q|e^{i(q+G).r}|ck>. 175 176 do ic_loc = 1, peinf%ncownactual 177 ic_FE = peinf%invindexc(ic_loc) 178 ic = vwfn%nband + ic_FE 179 offset_g = (ic_loc-1)*cwfn%ngc 180 181 if (pol%os_opt_ffts==2) then 182 ! FHJ: optimization level 2 precomputed all the FFTs 183 184 if(peinf%inode.eq.0) call timing%start(timing%fft_put) 185 186!$OMP PARALLEL DO collapse(2) PRIVATE(ia,ib,ict) 187 do ia = 1, Nfft(3) 188 do ib = 1, Nfft(2) 189 do ict = 1, Nfft(1) 190 fftbox2(ict,ib,ia) = vwfn%wfn_fft(ict,ib,ia,iv_loc) * cwfn%wfn_fft(ict,ib,ia,ic_loc) 191 enddo 192 enddo 193 enddo 194!$OMP END PARALLEL DO 195 196 if(peinf%inode.eq.0) call timing%stop(timing%fft_put) 197 198 else 199 if (pol%os_opt_ffts==1) then 200 201 if(peinf%inode.eq.0) call timing%start(timing%fft_put) 202 203 ! FHJ: Optimization level 1 precalculated at least these cond. FFTs 204!$OMP PARALLEL DO collapse(2) PRIVATE (ia,ib,ict) 205 do ia = 1, Nfft(3) 206 do ib = 1, Nfft(2) 207 do ict = 1, Nfft(1) 208 fftbox2(ict,ib,ia) = fftbox1(ict,ib,ia) * cwfn%wfn_fft(ict,ib,ia,ic_loc) 209 enddo 210 enddo 211 enddo 212!$OMP END PARALLEL DO 213 214 if(peinf%inode.eq.0) call timing%stop(timing%fft_put) 215 216 else 217 call put_into_fftbox(cwfn%ngc,cwfn%zc(offset_g+1:,jsp),gvec%components,cwfn%isort,fftbox2,Nfft) 218 call do_FFT(fftbox2,Nfft,1) 219 call multiply_fftboxes(fftbox1,fftbox2,Nfft) 220 endif 221 endif 222 223 call do_FFT(fftbox2,Nfft,1) 224 call get_from_fftbox(pol%nmtx,tmparray,gvec%components,pol%isrtx,fftbox2,Nfft,scale) 225 226 keep_transition = .true. 227 228 if(peinf%inode.eq.0) call timing%start(timing%fft_mltply) 229 230 if (keep_transition) then 231!$OMP PARALLEL DO 232 do ia = 1, pol%nmtx 233 pol%gme(ia, ic_loc, iv_loc, ispin, irk, freq_idx) = & 234 pol%gme(ia, ic_loc, iv_loc, ispin, irk, freq_idx) + MYCONJG(tmparray(ia))*kfact 235 enddo 236!$OMP END PARALLEL DO 237 endif 238 239 ! Get energy denominator (static), or transition energy (FF) 240 call get_eden() 241 242 if (kp%nspinor.eq.1 .or. jsp.eq.2) then 243 if (pol%freq_dep .eq. 0) then 244!$OMP PARALLEL DO 245 do ia = 1, pol%nmtx 246 pol%gme(ia, ic_loc, iv_loc, ispin, irk, 1) = & 247 pol%gme(ia, ic_loc, iv_loc, ispin, irk, 1) * & 248 sqrt(-1D0*eden) 249 enddo 250!$OMP END PARALLEL DO 251 endif 252 endif 253 254 if(peinf%inode.eq.0) call timing%stop(timing%fft_mltply) 255 256 enddo 257 258 enddo 259 260 SAFE_DEALLOCATE(tmparray) 261 262! We are done, so deallocate FFT boxes 263 if (pol%os_opt_ffts.ne.2) then 264 SAFE_DEALLOCATE(fftbox1) 265 endif 266 SAFE_DEALLOCATE(fftbox2) 267 268 269! End FFT Case 270!--------------------------- 271 272 if(peinf%inode.eq.0) call timing%stop(timing%mtxel_fft) 273 274 POP_SUB(mtxel) 275 276 return 277 278contains 279 280 !> FHJ: Get energy denominator (static), or transition energy (FF) 281 subroutine get_eden() 282 type(cvpair_info) :: lin_edenTemp 283 real(DP) :: eval, econd, occ_v, occ_c, occ_diff 284 real(DP) :: vk(2), vkq(2) 285 286 PUSH_SUB(mtxel.get_eden) 287 288 if(peinf%inode.eq.0) call timing%start(timing%mtxel_denom) 289 290 ! FHJ: See convention for conduction/valence band indices above. 291 eval = vwfn%ev(iv,ispin) 292 econd = cwfn%ec(ic, ispin) 293 eden = 0d0 294 295 ! guess occupations based on efermi; eventually this should be replaced by use of kp%occ 296 if(eval*ryd > pol%efermi + TOL_Degeneracy) then 297 occ_v = 0d0 298 else if (eval*ryd < pol%efermi - TOL_Degeneracy) then 299 occ_v = 1d0 300 else 301 occ_v = 0.5 ! within TOL_Degeneracy of the Fermi level, use FD(E_F) = 1/2 302 endif 303 if(econd*ryd > pol%efermi + TOL_Degeneracy) then 304 occ_c = 0d0 305 else if (econd*ryd < pol%efermi - TOL_Degeneracy) then 306 occ_c = 1d0 307 else 308 occ_c = 0.5 ! within TOL_Degeneracy of the Fermi level, use FD(E_F) = 1/2 309 endif 310 occ_diff = occ_v - occ_c 311 312 ! FHJ: Note that eden means different things depending on pol%freq_dep 313 ! static: eden := 1/(transition energy). 314 ! FF: eden := (transition energy). (I know...) 315 ! In the static case, we lump sqrt(eden) into the matrix elements gme. 316 ! In the FF case, we have to put it by hand for each frequency we evaluate chi. 317 if (pol%freq_dep==0) then 318 319 if(eval - econd < TOL_Degeneracy .and. occ_diff > TOL_Zero) then 320 ! avoid dividing by zero or making eden > 0 321 eden = occ_diff / (eval - econd) 322 else 323 eden = 0d0 ! in this case, occ_diff = 0 too 324 endif 325 326 327 elseif (pol%freq_dep==2 .or. pol%freq_dep==3) then 328 329 ! FHJ: In chi_summation, we explicitly neglect transitions if eden<TOL_ZERO. 330 ! That`s the way we keep track of forbidden transitions. 331 if(eval - econd < TOL_Degeneracy .and. occ_diff > TOL_Zero) then 332 eden = (eval - econd) / occ_diff 333 else 334 eden = 0.0d0 335 endif 336 337 338 pol%edenDyn(iv_loc, ic_loc, ispin, irk, freq_idx) = eden 339 340 endif !pol%freq_dep 341 342 if(peinf%inode.eq.0) call timing%stop(timing%mtxel_denom) 343 344 POP_SUB(mtxel.get_eden) 345 346 end subroutine get_eden 347 348end subroutine mtxel 349 350end module mtxel_m 351