1!===========================================================================
2!
3! Routines()
4!
5! (1) mtxel()   Originally By (?)               Last Modified 5/6/2012 (FHJ)
6!
7!     Compute matrix elements (gme) for valence state iv with all
8!     conduction bands and for all G-vectors.
9!
10!                <c,k,ispin|exp{-i(q+G).r}|v,k+q,ispin> = [M_{vc}(k,q,G)]^*
11!
12!     On exit,
13!       pol%eden(band,spin) = 1/(e_val-e_cond) = energy denominators
14!       pol%gme(band,g-vector,spin) = plane wave matrix elements
15!       pol%isrtx   orders the |G(i)|^2   i=1,pol%nmtx
16!       vwfn%isort  orders |qk+g|^2    (in vwfn type)
17!
18!       energies are apparently assumed in Rydbergs.
19!
20!===========================================================================
21
22#include "f_defs.h"
23
24module mtxel_m
25
26  use global_m
27  use fftw_m
28  use misc_m
29  use lin_denominator_m
30  use timing_m, only: timing => epsilon_timing
31
32  implicit none
33
34  private
35
36  public :: mtxel_init_FFT_cond, mtxel_free_FFT_cond, mtxel
37
38contains
39
40!> Precalculates the FFTs for all conduction bands
41subroutine mtxel_init_FFT_cond(gvec,pol,cwfn,kp)
42  type (gspace), intent(in) :: gvec
43  type (polarizability), intent(inout) :: pol
44  type (conduction_wfns), intent(inout) :: cwfn
45  type (kpoints), intent(inout) :: kp
46
47  real(DP) :: scale
48  integer, dimension(3) :: Nfft
49  integer :: j, offset_g, jsp, ict
50
51  PUSH_SUB(mtxel_init_FFT_cond)
52
53  if (kp%nspinor>1) then
54      call die('Epsilon on Steroids only supports one spin at the moment.')
55  endif
56
57  if( peinf%inode == 0 ) call timing%start(timing%opt_fft)
58  if( peinf%inode == 0 ) call timing%start(timing%opt_fft_fft)
59  call setup_FFT_sizes(pol%FFTgrid,Nfft,scale)
60  SAFE_ALLOCATE(cwfn%wfn_fft, (Nfft(1),Nfft(2),Nfft(3),peinf%ncownactual))
61
62  jsp = 1
63! JRD XXX We should we be doing a many_fft call here or what is the point of allocating a bigger array
64  do j=1,peinf%ncownactual
65    offset_g = (j-1)*cwfn%ngc
66
67    call put_into_fftbox(cwfn%ngc,cwfn%zc(offset_g+1:,jsp),gvec%components,cwfn%isort,cwfn%wfn_fft(:,:,:,j),Nfft)
68
69    call do_FFT(cwfn%wfn_fft(:,:,:,j),Nfft,1)
70  enddo
71
72  if ( peinf%inode == 0 ) call timing%stop(timing%opt_fft)
73  if ( peinf%inode == 0 ) call timing%stop(timing%opt_fft_fft)
74
75  POP_SUB(mtxel_init_FFT_cond)
76  return
77
78end subroutine mtxel_init_FFT_cond
79
80!> Frees ffts_cond buffer
81subroutine mtxel_free_FFT_cond(cwfn)
82  type (conduction_wfns), intent(inout) :: cwfn
83
84  PUSH_SUB(mtxel_free_FFT_cond)
85
86  SAFE_DEALLOCATE_P(cwfn%wfn_fft)
87
88  POP_SUB(mtxel_free_FFT_cond)
89  return
90
91end subroutine mtxel_free_FFT_cond
92
93subroutine mtxel(iv,gvec,vwfn,cwfn,pol,ispin,irk,kp,kpq,rank_mtxel,kfact)
94  integer, intent(in) :: iv
95  type (gspace), intent(in) :: gvec
96  type (valence_wfns), intent(in) :: vwfn
97  type (conduction_wfns), intent(in) :: cwfn
98  type (polarizability), intent(inout) :: pol
99  type (kpoints), intent(inout) :: kp,kpq
100  integer, intent(in) :: ispin,irk
101  integer, intent(in) :: rank_mtxel
102  real(DP), intent(in) :: kfact
103
104  integer :: ic_loc, ic, ic_FE, offset_g, jsp, ig, iv_loc
105  integer :: freq_idx
106  integer :: ia, ib, ict
107
108  integer, dimension(3) :: Nfft
109  real(DP) :: scale, eden
110  complex(DPC), dimension(:,:,:), allocatable :: fftbox1,fftbox2
111  SCALAR, dimension(:), allocatable :: tmparray
112  logical :: keep_transition
113
114  PUSH_SUB(mtxel)
115
116  if(peinf%inode.eq.0) call timing%start(timing%mtxel_fft)
117
118  ! FHJ: notation:
119  ! iv_loc -> local valence band index
120  ! iv -> global valence band index (iv==1 := lowest-energy band)
121  ! ic_loc -> local conduction band index
122  ! ic -> global conduction band (ic==nv+1 := lowest-energy unocc. band)
123  ! ic_FE -> global conduction band, but starting at FE
124  !          (ic_FE==1 := first cond band). Rarely used.
125
126  iv_loc = peinf%indexv(iv)
127
128  if(pol%nfreq_group .gt. 1 .and. pol%gcomm .eq. 0) then
129    freq_idx = 1
130  else
131    freq_idx = rank_mtxel+1
132  endif
133
134!--------------------------
135! Use FFTs to calculate matrix elements
136
137! Compute size of FFT box we need
138
139  call setup_FFT_sizes(pol%FFTgrid,Nfft,scale)
140! Allocate FFT boxes
141  SAFE_ALLOCATE(fftbox2, (Nfft(1),Nfft(2),Nfft(3)))
142
143! Put the data for valence band iv into FFT box 1 and do the FFT
144
145  if (pol%os_opt_ffts/=2) then
146    SAFE_ALLOCATE(fftbox1, (Nfft(1),Nfft(2),Nfft(3)))
147  endif
148
149! JRD XXX This needs to be threaded
150!$OMP PARALLEL DO collapse(2)
151  do ic_loc = 1, peinf%ncownactual
152    do ig = 1, pol%nmtx
153      pol%gme(ig, ic_loc, iv_loc, ispin, irk, freq_idx) = ZERO
154    enddo
155  enddo
156
157  SAFE_ALLOCATE(tmparray, (pol%nmtx))
158
159  do jsp=ispin,ispin*kp%nspinor
160
161    if (pol%os_opt_ffts/=2) then
162      call put_into_fftbox(vwfn%ngv,vwfn%zv(:,jsp),gvec%components,vwfn%isort,fftbox1,Nfft)
163      call do_FFT(fftbox1,Nfft,1)
164      ! We need the complex conjugate of u_{vk+q)(r) for the cross correlation
165      call conjg_fftbox(fftbox1,Nfft)
166    endif
167
168! Now we loop over the conduction states and get the matrix elements:
169! 1. Get conduction wave function and put it into box 2,
170! 2. do FFT, get u_{ck}(r)
171! 3. multiply by box1 contents, get F(r) = [u_{vk+q)(r)]^* u_{ck}(r)
172! 4. do FFT again, and extract the resulting matrix elements and put the into pol
173! We conjugate the final result since we really want <ck|e^{-i(q+G).r}|vk+q>
174! but we have calculated <vk+q|e^{i(q+G).r}|ck>.
175
176    do ic_loc = 1, peinf%ncownactual
177      ic_FE = peinf%invindexc(ic_loc)
178      ic = vwfn%nband + ic_FE
179      offset_g = (ic_loc-1)*cwfn%ngc
180
181      if (pol%os_opt_ffts==2) then
182        ! FHJ: optimization level 2 precomputed all the FFTs
183
184        if(peinf%inode.eq.0) call timing%start(timing%fft_put)
185
186!$OMP PARALLEL DO collapse(2) PRIVATE(ia,ib,ict)
187        do ia = 1, Nfft(3)
188        do ib = 1, Nfft(2)
189          do ict = 1, Nfft(1)
190            fftbox2(ict,ib,ia) = vwfn%wfn_fft(ict,ib,ia,iv_loc) * cwfn%wfn_fft(ict,ib,ia,ic_loc)
191          enddo
192        enddo
193        enddo
194!$OMP END PARALLEL DO
195
196        if(peinf%inode.eq.0) call timing%stop(timing%fft_put)
197
198     else
199        if (pol%os_opt_ffts==1) then
200
201          if(peinf%inode.eq.0) call timing%start(timing%fft_put)
202
203          ! FHJ: Optimization level 1 precalculated at least these cond. FFTs
204!$OMP PARALLEL DO collapse(2) PRIVATE (ia,ib,ict)
205          do ia = 1, Nfft(3)
206          do ib = 1, Nfft(2)
207            do ict = 1, Nfft(1)
208              fftbox2(ict,ib,ia) = fftbox1(ict,ib,ia) * cwfn%wfn_fft(ict,ib,ia,ic_loc)
209            enddo
210          enddo
211          enddo
212!$OMP END PARALLEL DO
213
214          if(peinf%inode.eq.0) call timing%stop(timing%fft_put)
215
216        else
217          call put_into_fftbox(cwfn%ngc,cwfn%zc(offset_g+1:,jsp),gvec%components,cwfn%isort,fftbox2,Nfft)
218          call do_FFT(fftbox2,Nfft,1)
219          call multiply_fftboxes(fftbox1,fftbox2,Nfft)
220        endif
221      endif
222
223      call do_FFT(fftbox2,Nfft,1)
224      call get_from_fftbox(pol%nmtx,tmparray,gvec%components,pol%isrtx,fftbox2,Nfft,scale)
225
226      keep_transition = .true.
227
228      if(peinf%inode.eq.0) call timing%start(timing%fft_mltply)
229
230      if (keep_transition) then
231!$OMP PARALLEL DO
232        do ia = 1, pol%nmtx
233          pol%gme(ia, ic_loc, iv_loc, ispin, irk, freq_idx) = &
234            pol%gme(ia, ic_loc, iv_loc, ispin, irk, freq_idx) + MYCONJG(tmparray(ia))*kfact
235        enddo
236!$OMP END PARALLEL DO
237      endif
238
239      ! Get energy denominator (static), or transition energy (FF)
240      call get_eden()
241
242      if (kp%nspinor.eq.1 .or. jsp.eq.2) then
243        if (pol%freq_dep .eq. 0) then
244!$OMP PARALLEL DO
245          do ia = 1, pol%nmtx
246            pol%gme(ia, ic_loc, iv_loc, ispin, irk, 1) = &
247              pol%gme(ia, ic_loc, iv_loc, ispin, irk, 1) * &
248              sqrt(-1D0*eden)
249          enddo
250!$OMP END PARALLEL DO
251        endif
252      endif
253
254      if(peinf%inode.eq.0) call timing%stop(timing%fft_mltply)
255
256    enddo
257
258  enddo
259
260  SAFE_DEALLOCATE(tmparray)
261
262! We are done, so deallocate FFT boxes
263  if (pol%os_opt_ffts.ne.2) then
264    SAFE_DEALLOCATE(fftbox1)
265  endif
266  SAFE_DEALLOCATE(fftbox2)
267
268
269! End FFT Case
270!---------------------------
271
272  if(peinf%inode.eq.0) call timing%stop(timing%mtxel_fft)
273
274  POP_SUB(mtxel)
275
276  return
277
278contains
279
280  !> FHJ: Get energy denominator (static), or transition energy (FF)
281  subroutine get_eden()
282    type(cvpair_info) :: lin_edenTemp
283    real(DP) :: eval, econd, occ_v, occ_c, occ_diff
284    real(DP) :: vk(2), vkq(2)
285
286    PUSH_SUB(mtxel.get_eden)
287
288    if(peinf%inode.eq.0) call timing%start(timing%mtxel_denom)
289
290    ! FHJ: See convention for conduction/valence band indices above.
291    eval = vwfn%ev(iv,ispin)
292    econd = cwfn%ec(ic, ispin)
293    eden = 0d0
294
295    ! guess occupations based on efermi; eventually this should be replaced by use of kp%occ
296    if(eval*ryd > pol%efermi + TOL_Degeneracy) then
297      occ_v = 0d0
298    else if (eval*ryd < pol%efermi - TOL_Degeneracy) then
299      occ_v = 1d0
300    else
301      occ_v = 0.5  ! within TOL_Degeneracy of the Fermi level, use FD(E_F) = 1/2
302    endif
303    if(econd*ryd > pol%efermi + TOL_Degeneracy) then
304      occ_c = 0d0
305    else if (econd*ryd < pol%efermi - TOL_Degeneracy) then
306      occ_c = 1d0
307    else
308      occ_c = 0.5  ! within TOL_Degeneracy of the Fermi level, use FD(E_F) = 1/2
309    endif
310    occ_diff = occ_v - occ_c
311
312    ! FHJ: Note that eden means different things depending on pol%freq_dep
313    ! static: eden := 1/(transition energy).
314    ! FF: eden := (transition energy). (I know...)
315    ! In the static case, we lump sqrt(eden) into the matrix elements gme.
316    ! In the FF case, we have to put it by hand for each frequency we evaluate chi.
317    if (pol%freq_dep==0) then
318
319      if(eval - econd < TOL_Degeneracy .and. occ_diff > TOL_Zero) then
320        ! avoid dividing by zero or making eden > 0
321        eden = occ_diff / (eval - econd)
322      else
323        eden = 0d0 ! in this case, occ_diff = 0 too
324      endif
325
326
327    elseif (pol%freq_dep==2 .or. pol%freq_dep==3) then
328
329      ! FHJ: In chi_summation, we explicitly neglect transitions if eden<TOL_ZERO.
330      ! That`s the way we keep track of forbidden transitions.
331      if(eval - econd < TOL_Degeneracy .and. occ_diff > TOL_Zero) then
332        eden = (eval - econd) / occ_diff
333      else
334        eden = 0.0d0
335      endif
336
337
338      pol%edenDyn(iv_loc, ic_loc, ispin, irk, freq_idx) = eden
339
340    endif !pol%freq_dep
341
342    if(peinf%inode.eq.0) call timing%stop(timing%mtxel_denom)
343
344    POP_SUB(mtxel.get_eden)
345
346  end subroutine get_eden
347
348end subroutine mtxel
349
350end module mtxel_m
351