1!===============================================================================
2!
3! Routines:
4!
5! (1) diag main         Originally by MLT       Last Edited: 5/12/2008 (JRD)
6!
7!     For more details see the README_absorption file.
8!
9!     Calculates the real and imaginary parts of the macroscopic dielectric
10!     function starting from interaction matrix elements calculated by
11!     the Kernel code. It uses interpolation in the matrix elements and
12!     direct diagonalization of the Bethe-Salpeter equation.
13!     Spin-polarized case implemented.
14!
15!     For more details, see:
16!     Rohlfing & Louie, PRB 62:(8), 4927 (2000)
17!     G. Strinati, Rivista del Nuovo Cimento 11:(12), 1 (1988)
18!
19!     Please report bugs to: jdeslip@civet.berkeley.edu
20!
21!================================================================================
22
23#include "f_defs.h"
24
25subroutine diag(eqp,xct,flag,neig,nmax)
26
27  use global_m
28  use absp_io_m
29  use fullbz_m
30  use genwf_m
31  use intkernel_m
32  use intwfn_m
33  use misc_m
34  use vmtxel_m
35  use diagonalize_m
36  use evecs_m
37  use input_fi_m
38  use input_q_m
39  use absp_lanczos_m
40  use timing_m, only: timing => bse_timing
41  implicit none
42
43  type (eqpinfo), intent(inout) :: eqp
44  type (xctinfo), intent(inout) :: xct
45  type (flags), intent(inout) :: flag
46  integer, intent(inout) :: neig
47  integer, intent(in) :: nmax
48
49  type (crystal) :: crys
50  type (symmetry) :: syms
51  type (gspace) :: gvec
52  type (grid) :: kg_fi, kgq_fi,kg_co,kgq_co
53  type (kpoints) :: kp_fi, kpq_fi
54  type (wavefunction) :: wfnc_fi
55  type (wavefunction) :: wfnvq_fi
56  type (work_genwf) :: work, workq
57  type (int_wavefunction) :: intwfnc
58  type (int_wavefunction) :: intwfnv
59  type (evecs_t) :: evecs
60  type (vmtxel_t) :: dip
61
62  character :: tmpstr*128,filename*20
63  integer :: ii,ipol,jj,ncount,nmat,ncvs_fi,pblock
64  integer :: ikb, icb, ivb
65  integer :: ik,ikq,ikt,iblock,ikcvs,ikcvsd,ic,iv,is
66  integer :: iunit_c,iunit_v
67  real(DP) :: vol,omega_plasma,en_diff_max
68  real(DP) :: tsec(2),tmin(2),tmax(2)
69
70  character*16, allocatable :: routnam(:)
71  integer, allocatable :: routsrt(:)
72  integer, allocatable :: fi2co_wfn(:,:),indexq_fi(:)
73  real(DP), allocatable :: evals(:), kco(:,:), cs(:,:), cs0(:), rdum(:,:)
74
75  SCALAR, allocatable :: dcc(:,:,:,:,:),dvv(:,:,:,:,:),s0(:), &
76                         hqpcc(:,:,:), hqpvv(:,:,:), rdum2(:,:)
77  SCALAR, allocatable :: dipoles_l(:,:), dipoles_r(:,:), cs_full(:,:)
78  !> (kcvs, k`c`v`s`), "A" block of BSE Hamiltonian
79  SCALAR, allocatable :: hbse_a(:,:)
80  !> (kcvs, k`c`v`s`), "B" block of BSE Hamiltonian, only if tda=.false.
81  SCALAR, allocatable :: hbse_b(:,:)
82  real(DP), allocatable :: intp_coefs(:,:)
83  character(len=2) :: suffix(3) = (/'b1', 'b2', 'b3'/)
84
85  ! DYQ: Variables used in finite Q
86  integer, allocatable :: kg0_temp(:,:)
87  integer :: umk
88  real(DP) :: kq(3),qq(3)
89  real :: delta
90
91  !DYQ: Variables used for clustered subsampling
92  integer ::ik_sub,nsub_files,nk_sub
93  type(grid) :: kg_sub_co
94  SCALAR, allocatable :: dvv_sub(:,:,:,:,:,:),dcc_sub(:,:,:,:,:,:)
95  integer,allocatable :: closepts_sub(:,:)
96
97  PUSH_SUB(diag)
98
99! JRD: A Check for Finite Q
100
101!      if (peinf%inode .eq. 0) then
102!        write(6,*) 'nkpt_co = ', xct%nkpt_co
103!      endif
104
105  if(flag%vm.eq.2) then
106    if (peinf%inode.eq.0) then
107      write(0,*) 'WARNING: read_eps2_moments not supported in this diagonalization code. Ignoring keyword.'
108    endif
109    flag%vm=0
110  endif
111
112!--------------------------
113! If eigenvalues.dat is available, read them and go straight to
114! calculation of the absorption spectrum
115
116
117  if (flag%spec.eq.1) then
118    if (peinf%inode .eq. 0) then
119
120      omega_plasma = 0.d0
121
122      write(6,*) 'Create absorption_noeh.dat from eigenvalues_noeh.dat'
123      do ipol=1,xct%npol
124        call read_eigenvalues_noeh(xct,neig,vol,eqp,s0,ipol)
125        call absp0(eqp,xct,s0,vol,omega_plasma,flag,ipol)
126        SAFE_DEALLOCATE_P(eqp%evqp)
127        SAFE_DEALLOCATE_P(eqp%ecqp)
128        SAFE_DEALLOCATE_P(eqp%evlda)
129        SAFE_DEALLOCATE_P(eqp%eclda)
130        SAFE_DEALLOCATE(s0)
131      enddo
132
133      if (xct%iabsorp0 .eq. 0) then
134        write(6,*) 'Create absorption_eh.dat from eigenvalues.dat'
135        do ipol=1,xct%npol
136          call read_eigenvalues(xct,neig,vol,evals,cs0,ipol)
137          call absp(xct,neig,cs0,evals,vol,omega_plasma,flag,ipol)
138          SAFE_DEALLOCATE(evals)
139          SAFE_DEALLOCATE(cs0)
140        enddo
141      endif
142    endif
143
144    call diag_end()
145    POP_SUB(diag)
146    return
147  endif
148
149!--------------------------
150! Read wavefunctions on the fine grid
151
152  call logit('Calling input')
153  call timing%start(timing%input)
154  call input_fi(crys,gvec,kg_fi,kp_fi,syms,eqp,xct,flag, &
155    omega_plasma,.true.,intwfnc)
156
157! If there is no specified number of eigenvalues, calculate
158! all eigenvalues
159
160  nmat = xct%nkpt_fi*xct%ncb_fi*xct%nvb_fi*xct%nspin
161  if (xct%algo==BSE_ALGO_DIAG) then
162    if (xct%tda) then
163      if (neig==0) neig = nmat
164    else
165      if (peinf%inode==0.and.neig/=0) then
166        write(0,'(/,a,/)') 'WARNING: BSE calculations ignore the `number_eigenvalues` flag.'
167      endif
168#ifdef USESCALAPACK
169      ! FHJ: Meiyue`s solver is structure preserving, so we only compute the positive evecs
170      neig = nmat
171#else
172      ! FHJ: generic solver
173      neig = 2*nmat
174#endif
175    endif
176    if ((neig<=0).or.(neig>2*nmat).or.(neig>nmat.and.xct%tda)&
177#ifdef USESCALAPACK
178      .or.(neig>nmat)&
179#endif
180    ) then
181      write(tmpstr,'(a,2i6)') 'Incomprehensible request of eigenvalues : ', neig, nmat
182      call die(tmpstr, only_root_writes = .true.)
183    endif
184  else
185    neig = nmat
186  endif
187
188  vol = xct%nktotal*crys%celvol
189  if (peinf%inode.eq.0) then
190    write(6,'(/1x,a)') 'More job parameters:'
191    write(6,'(1x,a,es9.3e2)') '- Crystal volume (bohr): ', vol
192    write(6,'(1x,a,f0.3)') '- Broadening (eV): ', xct%eta
193    write(6,'(1x,a,i0)') '- Number of valence bands: ', xct%nvb_fi
194    write(6,'(1x,a,i0)') '- Number of cond. bands: ', xct%ncb_fi
195    write(6,'(1x,a,i0)') '- Number of spins: ', xct%nspin
196    write(6,'(1x,a,i0)') '- Number of eigenvalues to be computed: ', neig
197    write(6,'()')
198  endif
199  call timing%stop(timing%input)
200
201  SAFE_ALLOCATE(indexq_fi, (xct%nkpt_fi))
202  SAFE_ALLOCATE(xct%indexq_fi, (xct%nkpt_fi))
203  if (flag%vm.ne.1.or. .not. flag%read_dtmat) then
204    call timing%start(timing%input_q)
205    call logit('Calling input_q')
206    call input_q(kp_fi,crys,gvec,kg_fi,kgq_fi,kpq_fi,syms,xct,indexq_fi,eqp,flag,intwfnv)
207    call timing%stop(timing%input_q)
208  endif
209
210! JRD: Don`t do this next section if only want absorption_noeh.dat
211
212  if (xct%iabsorp0 .eq. 0) then
213
214!------------------------------
215! Calculate the transformation matrices from coarse grid wavefunctions
216! FHJ: These are the final transformation coefs that will be used to interpolate
217! the kernel. However, we might use an unrestricted version of dvv/dcc to
218! interpolate eqp if xct%unrestricted_transf==.true..
219    SAFE_ALLOCATE(dvv, (xct%nvb_fi,xct%n1b_co,xct%nspin,xct%nkpt_fi,xct%npts_intp_kernel))
220    SAFE_ALLOCATE(dcc, (xct%ncb_fi,xct%n2b_co,xct%nspin,xct%nkpt_fi,xct%npts_intp_kernel))
221    SAFE_ALLOCATE(kco, (3,xct%nkpt_co))
222    SAFE_ALLOCATE(fi2co_wfn, (xct%npts_intp_kernel,xct%nkpt_fi))
223    SAFE_ALLOCATE(intp_coefs, (xct%npts_intp_kernel, xct%nkpt_fi))
224
225    call logit('Calling intwfn')
226    call timing%start(timing%intwfn)
227    call intwfn(kp_fi,crys,syms,xct,flag,gvec,kg_fi,kgq_fi,kg_co,kgq_co,dcc,dvv,&
228      kco,fi2co_wfn,indexq_fi,eqp,intwfnv,intwfnc,intp_coefs)
229    call timing%stop(timing%intwfn)
230
231    if (xct%subsample_line) then
232      call logit('Calling intwfn_sub')
233      call open_file(unit=77, file='subsample.inp',form='formatted',status='old')
234      read(77,*) nk_sub
235      read(77,*) nsub_files
236      if (nsub_files.ne.xct%nkpt_co) call die("Number of subsampled points must be the same as the number of coarse points",&
237        only_root_writes=.true.)
238      kg_sub_co%nr = nsub_files
239      SAFE_ALLOCATE(kg_sub_co%r,(3,xct%nkpt_co))
240      do ik_sub=1,xct%nkpt_co
241        read(77,*) (kg_sub_co%r(ii,ik_sub),ii=1,3)
242      enddo
243      call close_file(77)
244      SAFE_ALLOCATE(dcc_sub,(xct%ncb_fi, xct%n2b_co, xct%nspin, xct%nkpt_fi,xct%idimensions+1, nk_sub))
245      SAFE_ALLOCATE(dvv_sub,(xct%nvb_fi, xct%n1b_co, xct%nspin, xct%nkpt_fi,xct%idimensions+1, nk_sub))
246      SAFE_ALLOCATE(closepts_sub,(4,xct%nkpt_fi))
247      call intwfn_sub(kp_fi,crys,syms,xct,flag,gvec,kg_fi,kgq_fi, &
248        dcc_sub,dvv_sub,kg_sub_co,kg_co,indexq_fi,nk_sub,intwfnv,intwfnc,closepts_sub)
249    endif
250
251  endif
252
253  SAFE_DEALLOCATE_P(xct%ifmax)
254  if ((flag%vm.ne.1.or. .not. flag%read_dtmat) .and..not. xct%no_mtxel) then
255    ! otherwise, we did not call input_q to allocate it
256    SAFE_DEALLOCATE_P(xct%ifmaxq)
257  endif
258
259!------------ Calculate the velocity (or momentum) matrix elements -------------
260
261! Each PE calculates a small number of them. At the end, share data
262!
263! If flag%vm.eq.1, skip this part and just read the matrix elements
264! from "vmtxel".
265!
266! peinf%block_sz = size of a distributed column in hbse_a
267
268
269  call logit('Calculating v/p matrix elememts')
270  ncvs_fi = xct%ncb_fi*xct%nvb_fi*xct%nspin
271  if (xct%ipar .eq. 1) then
272    peinf%block_sz = ncvs_fi
273  else if (xct%ipar .eq. 2) then
274    peinf%block_sz = xct%nvb_fi*xct%nspin
275  else
276    peinf%block_sz = xct%nspin
277  endif
278  nmat = xct%nkpt_fi*ncvs_fi
279
280  ! Initialize dipole operator and allocate dipole matrix elements
281  call dip%init_from_xctinfo(xct, opr=flag%opr)
282  call dip%alloc()
283
284  ! Copy list of k-points
285  do ik=1, dip%nk
286    dip%kpt(:,ik) = kg_fi%f(:,ik)
287  end do
288
289  ! FIXME: It was decided to hold on the use of hdf5 for vmtxel
290  !        until the next release, so as not to break backward compatibility
291  !        Since using hdf5 for vmtxel is highly desirable, let us not
292  !        forget to reactivate it as soon as possible.
293  dip%use_hdf5 = .false.
294
295  call timing%start(timing%vmtxel)
296
297  if (.not.xct%no_mtxel) then
298  if (flag%vm.eq.0) then
299    do ikt=1, peinf%ikt(peinf%inode+1)
300      ik = peinf%ik(peinf%inode+1,ikt)
301      if (xct%qflag .eq. 1) then
302        ikq = indexq_fi(ik)
303      else
304        ! genwf will retrieve the valence bands at k+q+Q
305        ikq = xct%indexq_fi(ik)
306        if (ikq.eq.0 .and. xct%patched_sampling) then
307          if (peinf%inode.eq.0) write(6,*) "Skipping genwf for ik,ikq:",ik,ikq
308          cycle
309        endif
310      endif
311
312      call genwf(crys,gvec,kg_fi,syms,wfnc_fi,ik,ik,xct%nspin,xct%ncb_fi,&
313                 work,intwfnc,xct%iwriteint,is_cond=.true.)
314      call genwf(crys,gvec,kgq_fi,syms,wfnvq_fi,ik,ikq,xct%nspin,xct%nvb_fi,&
315                 workq,intwfnv,xct%iwriteint,is_cond=.false.)
316
317      call dip%compute_ik_vmtxel(ik, wfnc_fi, wfnvq_fi, gvec, xct%qshift, crys, eqp)
318
319      SAFE_DEALLOCATE_P(wfnc_fi%cg)
320      SAFE_DEALLOCATE_P(wfnc_fi%isort)
321      SAFE_DEALLOCATE_P(wfnvq_fi%cg)
322      SAFE_DEALLOCATE_P(wfnvq_fi%isort)
323    enddo
324
325    ! typedefs initializes all of these ikolds to 0
326    if(work%ikold.ne.0) then
327      SAFE_DEALLOCATE_P(work%cg)
328      SAFE_DEALLOCATE_P(work%ph)
329      SAFE_DEALLOCATE_P(work%ind)
330      SAFE_DEALLOCATE_P(work%isort)
331    endif
332    if(workq%ikold.ne.0) then
333      SAFE_DEALLOCATE_P(workq%cg)
334      SAFE_DEALLOCATE_P(workq%ph)
335      SAFE_DEALLOCATE_P(workq%ind)
336      SAFE_DEALLOCATE_P(workq%isort)
337    endif
338
339    ! Share matrix elements
340    call dip%reduce()
341
342    ! Write file
343    call dip%write_vmtxel()
344
345  else
346    ! Read dipole matrix elements from file if they were already computed
347    call dip%read_vmtxel()
348  endif
349
350  if (flag%vm.ne.1.or. .not. flag%read_dtmat) then
351    call dealloc_grid(kgq_fi)
352  endif
353
354  if (flag%vm == 0 .and. .not. flag%read_dtmat) then
355    SAFE_DEALLOCATE_P(intwfnc%cgk)
356    SAFE_DEALLOCATE_P(intwfnv%cgk)
357    SAFE_DEALLOCATE_P(intwfnc%isort)
358    SAFE_DEALLOCATE_P(intwfnv%isort)
359  endif
360
361  else ! xct%no_mtxel
362    xct%npol = 1
363    xct%pol = 1.0
364  endif
365
366
367  call timing%stop(timing%vmtxel)
368
369
370
371! End Calculating Matrix Elements
372!-------------------------------------------------------------------------------
373
374!----------------------------
375! Calculate the non-interacting spectrum. Only one PE works
376
377  call timing%start(timing%absorp0)
378  call logit('Calling absp0')
379  if (peinf%inode.eq.0) then
380    do ipol=1,xct%npol
381      call absp0(eqp,xct,dip%s1(:,ipol),vol,omega_plasma,flag,ipol)
382    enddo
383  endif
384  call timing%stop(timing%absorp0)
385
386  SAFE_DEALLOCATE_P(eqp%eclda)
387  SAFE_DEALLOCATE_P(eqp%evlda)
388  SAFE_DEALLOCATE(indexq_fi)
389
390! JRD If we only want absorp0 - we finish here
391
392  if (xct%iabsorp0 .eq. 1) then
393    call diag_end()
394    POP_SUB(diag)
395    return
396  endif
397
398!------------ Build Hamiltonian Matrix --------------------------------------------
399
400! Build Hamiltonian matrix. Diagonal part comes from the "non-interacting"
401! quasiparticle Hamiltonians.  If the QP Greens function is diagonal,
402! then these are just the quasiparticle energy differences on the
403! diagonal.  The more general case is:
404!
405!       <cvk|H0|c'v'k'> = delta(k,k') *
406!            [ <c|hqp|c'>*delta(v',v) - delta(c,c')*<v'|hqp|v> ]
407!
408! The rest of the Hamiltonian, which is the electron-hole interaction,
409! comes from interpolation further below.
410
411  call logit('Building non-interacting Hamiltonian')
412  SAFE_ALLOCATE(hbse_a, (xct%nkpt_fi*ncvs_fi, peinf%nblocks*peinf%block_sz))
413  hbse_a(:,:) = 0.0d0
414  if (.not.xct%tda) then
415    SAFE_ALLOCATE(hbse_b, (xct%nkpt_fi*ncvs_fi, peinf%nblocks*peinf%block_sz))
416    hbse_b(:,:) = 0.0d0
417  endif
418
419! iblock loop. This loop is over proc owned k if ipar = 1, (k,c)  if
420! ipar = 2 and (k,c,v) if ipar = 3
421
422  en_diff_max = 0d0
423  do iblock=1,peinf%ibt(peinf%inode+1)
424    ik=peinf%ikb(iblock)
425
426    if (ik .eq. 0) then
427      write(0,*) "Illegal value for ik",peinf%inode, iblock, ik
428      call die("internal error in diag, ik = 0")
429    endif
430
431! Build <c|hqp|c'> and <v|hqp|v'> for this kpoint
432
433    SAFE_ALLOCATE(hqpcc, (xct%ncb_fi,xct%ncb_fi,xct%nspin))
434    SAFE_ALLOCATE(hqpvv, (xct%nvb_fi,xct%nvb_fi,xct%nspin))
435    hqpcc = 0.0d0
436    hqpvv = 0.0d0
437
438! Put QP energies on diagonals of hqpcc and hqpvv to start
439    do is=1,xct%nspin
440      do ic=1,xct%ncb_fi
441        ! Skip if k+Q falls outside the patch
442        if (xct%indexq_fi(ik).eq.0 .and. xct%patched_sampling) cycle
443        hqpcc(ic,ic,is) = eqp%ecqp(ic,ik,is)
444      enddo
445      do iv=1,xct%nvb_fi
446        if (xct%qflag.ne.2) then
447          !DYQ: for qflag.eq.0, eqp%evqp(iv,ik,is) corresponds to the k-point kgq_fi%f(xct%indexq_fi(ik))
448          hqpvv(iv,iv,is) = eqp%evqp(iv,ik,is)
449        else
450          ! Skip if k+Q falls outside the patch
451          if (xct%indexq_fi(ik).eq.0 .and. xct%patched_sampling) cycle
452          hqpvv(iv,iv,is) = eqp%evqp(iv,xct%indexq_fi(ik),is)
453        endif
454      enddo
455    enddo
456
457! Read possible offdiagonal QP elements from "hqp.<ik>" file
458! if it exists.  JRD: This is broken for now.  Someone should fix
459! it in the future if they want to use it
460
461    !if (ik.lt.10) then
462    !  write(tmpstr,'(a,i1)') 'hqp.',ik
463    !else if (ik.lt.100) then
464    !  write(tmpstr,'(a,i2)') 'hqp.',ik
465    !else if (ik.lt.1000) then
466    !  write(tmpstr,'(a,i3)') 'hqp.',ik
467    !else if (ik.lt.100000) then
468    !  write(tmpstr,'(a,i5)') 'hqp.',ik
469        !else
470    !  write(0,*) 'too many kpoints for reading hqp'
471    !endif
472    !call open_file(9,file=tmpstr,form='formatted',status='old')
473    !if (is.eq.0) then
474    !  if (peinf%inode.eq.0) then
475    !    write(6,*) 'Reading offdiagonal hqp from file ',tmpstr
476    !    write(6,*) 'All values in eV'
477    !  endif
478    !  do
479    !    read(9,*,end=999) nocc,ii,jj,x,y
480
481    ! if ii and jj both refer to valence, states, put
482    ! matrix element into hqpvv
483
484    !    if ((ii<=nocc).and.(ii>nocc-xct%nvb_fi).and. &
485    !    (jj<=nocc).and.(jj>nocc-xct%nvb_fi)) then
486    !      if (peinf%inode.eq.0) write(6,'(a,2i5,2f20.10)') ' hqp(v,vp) = ',ii,jj,x,y
487    !      ii=nocc-ii+1
488    !      jj=nocc-jj+1
489    !      is = 1
490#ifdef CPLX
491    !      hqpvv(ii,jj,is) = CMPLX(x,y)/ryd
492#else
493    !      hqpvv(ii,jj,is) = x/ryd
494#endif
495    !    else if ((ii>nocc).and.(ii<=nocc+xct%ncb_fi).and. &
496    !    (jj>nocc).and.(jj<=nocc+xct%ncb_fi)) then
497    !      if (peinf%inode.eq.0) write(6,'(a,2i5,2f20.10)') ' hqp(c,cp) = ',ii,jj,x,y
498    !      ii=ii-nocc
499    !      jj=jj-nocc
500    !      is = 1
501#ifdef CPLX
502    !      hqpcc(ii,jj,is) = CMPLX(x,y)/ryd
503#else
504    !      hqpcc(ii,jj,is) = x/ryd
505#endif
506    !    endif
507    !  enddo
508    !999      call close_file(9)
509    !  write(6,*)
510    !endif ! if hqp.<ik> was found
511
512    ! Now build hamiltonian from hqcc and hqvv
513    ! Consider only diagonal elements in k,v,c
514
515    ! FHJ: Note: here, iv and ic are dummy indices, and the actual band indices
516    ! are ivb/icb. When should we use the dummy or real band indices?
517    ! - Use the dummy indices iv/ic to index a column of hbse_a (which is distributed)
518    ! - Use the real indices ivb/icb to index a row of hbse_a (which is not distributed)
519    ikb = ik
520    do is=1,xct%nspin
521      do iv=1,peinf%nv_block !1 for ipar==2
522        do ic=1,peinf%nc_block !1 for ipar==2 or ipar==3
523          if (xct%ipar==1) then
524            ivb=iv
525            icb=ic
526          else if (xct%ipar==2) then
527            icb=peinf%icb(iblock)
528            ivb=iv
529          else if (xct%ipar==3) then
530            ivb=peinf%ivb(iblock)
531            icb=peinf%icb(iblock)
532          endif
533          ikcvs = bse_index(ikb, icb, ivb, is, xct)
534          ikcvsd = bse_index(iblock, ic, iv, is, xct, ncband=peinf%nc_block, nvband=peinf%nv_block)
535          en_diff_max = max(en_diff_max, dble(hqpcc(icb,icb,is) - hqpvv(ivb,ivb,is)))
536          hbse_a(ikcvs,ikcvsd) = hqpcc(icb,icb,is) - hqpvv(ivb,ivb,is)
537        enddo
538      enddo
539    enddo
540    SAFE_DEALLOCATE(hqpcc)
541    SAFE_DEALLOCATE(hqpvv)
542  enddo ! loop on k-points on this processor
543#ifdef MPI
544  call MPI_Allreduce(MPI_IN_PLACE, en_diff_max, 1, MPI_DOUBLE_PRECISION, &
545    MPI_MAX, MPI_COMM_WORLD, mpierr)
546#endif
547
548!----------------------------
549! Define the mapping of eigenvectors: the ii-th column of the matrix
550! evecs%u_r(:,:) stored in PE #ipe will contain the eigenvector of order
551! peinf%peig(ipe,ii). The total number of eigenvectors stored in
552! each processor is given by peinf%neig(1:peinf%npes).
553! pblock >= maxval(peinf%neig(1:peinf%npes))
554
555  ! FHJ: Note: pblock gets ~doubled automatically in full BSE calculations.
556  ! In BLACS terms, the following lines would set up a 1d block-column
557  ! distribution for the eigenvectors with:
558  ! M=(2*)nmat, N=neig, MB=M, NB=peinf%block_sz, LLD=M
559  ! We should really get rid of this manual distribution and use BLACS.
560  pblock = neig/(peinf%npes*peinf%block_sz)
561  if (pblock*peinf%npes*peinf%block_sz.lt.neig) pblock = pblock + 1
562  pblock = pblock*peinf%block_sz
563  SAFE_ALLOCATE(peinf%neig, (peinf%npes))
564  SAFE_ALLOCATE(peinf%peig, (peinf%npes,pblock))
565  peinf%neig=0
566  peinf%peig=0
567  ii=1
568  do jj=1,neig
569    if (ii.eq.peinf%npes+1) ii=1
570    peinf%neig(ii)=peinf%neig(ii)+1
571    peinf%peig(ii,peinf%neig(ii))=jj
572    if (mod(jj,peinf%block_sz).eq.0) ii=ii+1
573  enddo
574
575!-----------------------------
576! Interpolation scheme in the Kernel
577
578  call logit('Calling intkernel')
579  call timing%start(timing%intkernel)
580  if (xct%tda) then
581    if (xct%subsample_line) then
582      call intkernel(crys,kg_fi,kp_fi,syms,xct,hbse_a,dcc,dvv,kco,fi2co_wfn,flag,gvec,intp_coefs,&
583        dcc_sub=dcc_sub,dvv_sub=dvv_sub,closepts_sub=closepts_sub)
584    else
585      call intkernel(crys,kg_fi,kp_fi,syms,xct,hbse_a,dcc,dvv,kco,fi2co_wfn,flag,gvec,intp_coefs)
586    endif
587  else
588    if (xct%subsample_line) then
589      call intkernel(crys,kg_fi,kp_fi,syms,xct,hbse_a,dcc,dvv,kco,fi2co_wfn,flag,gvec,intp_coefs,hbse_b=hbse_b,&
590        dcc_sub=dcc_sub,dvv_sub=dvv_sub,closepts_sub=closepts_sub)
591    else
592      call intkernel(crys,kg_fi,kp_fi,syms,xct,hbse_a,dcc,dvv,kco,fi2co_wfn,flag,gvec,intp_coefs,hbse_b=hbse_b)
593    endif
594  endif
595  call logit('Done intkernel')
596  call timing%stop(timing%intkernel)
597  SAFE_DEALLOCATE(fi2co_wfn)
598  SAFE_DEALLOCATE(dcc)
599  SAFE_DEALLOCATE(dvv)
600  SAFE_DEALLOCATE(kco)
601
602  !FHJ: This is for debugging purposes. Please keep this.
603  if (.not.xct%tda.and.peinf%npes==1) then
604    write(6,'(1x,a)') 'Dumping BSE Hamiltonian'
605    call open_file(unit=666, file='hbse_a.dat', status='replace', form='formatted')
606    call open_file(unit=667, file='hbse_b.dat', status='replace', form='formatted')
607    write(666,'(2(i0,1x))') xct%nkpt_fi*ncvs_fi, peinf%nblocks*peinf%block_sz
608    write(667,'(2(i0,1x))') xct%nkpt_fi*ncvs_fi, peinf%nblocks*peinf%block_sz
609    do jj=1,peinf%nblocks*peinf%block_sz
610#ifdef CPLX
611      write(666,'(2(es16.8,1x))') hbse_a(:, jj)
612      write(667,'(2(es16.8,1x))') hbse_b(:, jj)
613#else
614      write(666,'(es16.8)') hbse_a(:, jj)
615      write(667,'(es16.8)') hbse_b(:, jj)
616#endif
617    enddo
618    call close_file(666)
619    call close_file(667)
620    write(6,'(1x,a)') 'Done dumping Hamiltonian.'
621  endif
622
623!--------------------------------
624! Exact diagonalization
625
626
627  if (xct%algo==BSE_ALGO_DIAG) then
628
629    ! Setup eigenvectors and allocate memory
630    call evecs%init(xct, kg_fi, nmat, neig, pblock)
631    call evecs%alloc()
632
633    call logit('Calling diagonalize')
634    call timing%start(timing%diagonalize)
635
636    if (xct%tda) then
637      call diagonalize(xct, neig, nmat, hbse_a, evecs%evals, evecs%u_r)
638    else
639      call diagonalize(xct, neig, nmat, hbse_a, evecs%evals, evecs%u_r, &
640                       hbse_b=hbse_b, evecs_l=evecs%u_l)
641      if (peinf%inode==0) then
642        write(6,'(/,1x,a,i0)') 'Number of positive eigenvalues: ', count(evecs%evals>0d0)
643      endif
644    endif
645    call timing%stop(timing%diagonalize)
646
647    !--------------------------------
648    ! Calculate transition matrix elements
649    ! oscillator strength = 2 * cs / omega, as defined below
650
651    call timing%start(timing%trans_mtxel)
652    call logit('Computing transition matrix elements')
653    SAFE_ALLOCATE(cs, (neig,xct%npol))
654    cs = 0d0
655    SAFE_ALLOCATE(dipoles_r, (neig,xct%npol))
656    dipoles_r = ZERO
657    if (.not.xct%tda) then
658      SAFE_ALLOCATE(dipoles_l, (neig,xct%npol))
659      dipoles_l = ZERO
660      SAFE_ALLOCATE(cs_full, (neig,xct%npol))
661      cs_full = ZERO
662    endif
663
664    ! The factor of 1/sqrt(dble(xct%nspin)) below is required to obtain the same
665    ! transition matrix elements for the singlet excitons for nspin = 1 and nspin = 2,
666    ! See just after eq. (25) in Rohlfing and Louie, PRB 62, 4927 (2000)
667    do ipol=1,xct%npol
668      if (xct%tda) then
669        do ii=1,peinf%neig(peinf%inode+1)
670          jj = peinf%peig(peinf%inode+1,ii)
671          dipoles_r(jj,ipol) = sum(evecs%u_r(1:nmat,ii)*MYCONJG(dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin))
672          cs(jj,ipol) = MYABS2(dipoles_r(jj,ipol))
673        enddo
674      else
675        do ii=1,peinf%neig(peinf%inode+1)
676          jj = peinf%peig(peinf%inode+1,ii)
677          ! FHJ: contributions from positive transitions
678          dipoles_l(jj,ipol) = sum(evecs%u_l(1:nmat,ii)*MYCONJG(dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin))
679          dipoles_r(jj,ipol) = sum(evecs%u_r(1:nmat,ii)*MYCONJG(dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin))
680          ! FHJ: contributions from negative transitions. Some notes:
681          ! (1) For the velocity matrix elements: s_(c->v) = - s_(v->c)^* . Proof:
682          !     1st-order expand the wfns |ck> -> |ck+q> and |vk> -> |vk+q> and project
683          !     onto <vk| and <ck|. Note that there`s a sign flip in the energy denom.
684          ! (2) There`s a negative sign in dipoles_r for (c->v) transitions, which
685          !     originates from Fermi factors. See eqn 8 in PRL 80, 4510 (1998).
686          dipoles_l(jj,ipol) = dipoles_l(jj,ipol) &
687            - sum(evecs%u_l(nmat+1:2*nmat,ii)*(-dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin))
688          dipoles_r(jj,ipol) = dipoles_r(jj,ipol) &
689            + sum(evecs%u_r(nmat+1:2*nmat,ii)*(-dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin))
690          cs_full(jj,ipol) = MYCONJG(dipoles_l(jj,ipol)) * dipoles_r(jj,ipol)
691        enddo
692      endif
693    enddo
694
695#ifdef MPI
696    SAFE_ALLOCATE(rdum, (neig,xct%npol))
697    rdum = cs
698    call MPI_REDUCE(rdum(1,1), cs(1,1), size(rdum), MPI_REAL_DP, MPI_SUM, 0, MPI_COMM_WORLD, mpierr)
699    SAFE_DEALLOCATE(rdum)
700    SAFE_ALLOCATE(rdum2, (neig,xct%npol))
701    rdum2 = dipoles_r
702    call MPI_REDUCE(rdum2(1,1), dipoles_r(1,1), size(rdum2), MPI_SCALAR, MPI_SUM, 0, MPI_COMM_WORLD, mpierr)
703    if (.not.xct%tda) then
704      rdum2 = dipoles_l
705      call MPI_REDUCE(rdum2(1,1), dipoles_l(1,1), size(rdum2), MPI_SCALAR, MPI_SUM, 0, MPI_COMM_WORLD, mpierr)
706      rdum2 = cs_full
707      call MPI_REDUCE(rdum2(1,1), cs_full(1,1), size(rdum2), MPI_SCALAR, MPI_SUM, 0, MPI_COMM_WORLD, mpierr)
708     endif
709    SAFE_DEALLOCATE(rdum2)
710#endif
711    if (.not.xct%tda) then
712      if (peinf%inode==0) then
713        cs = dble(cs_full)
714#ifdef CPLX
715        write(6,'(/,1x,a)') 'Imaginary part of the oscillator strength:'
716        write(6,'(1x,a,es9.3e2)') ' - Max. absolute value: ', maxval(abs(AIMAG(cs_full)))
717        write(6,'(1x,a,es9.3e2)') ' - Max. absolute value relative to real part: ', &
718          maxval(abs(AIMAG(cs_full)/dble(cs_full)))
719        write(6,*)
720#endif
721      endif
722    endif
723
724    call timing%stop(timing%trans_mtxel)
725
726    ! Convert eigenvalues to eV and write them out
727    evecs%evals(:) = evecs%evals(:)*ryd
728    if (.not.xct%tda) then
729      call write_eigenvalues(xct,flag,neig,vol,evecs%evals,cs,dipoles_r,dipoles_l=dipoles_l)
730    else
731      call write_eigenvalues(xct,flag,neig,vol,evecs%evals,cs,dipoles_r)
732    endif
733
734    SAFE_DEALLOCATE(dipoles_r)
735    if (.not.xct%tda) then
736      SAFE_DEALLOCATE(dipoles_l)
737    endif
738
739    !------------------------------
740    ! Calculate the absorption and density of excitonic states
741
742    call timing%start(timing%absorp)
743    call logit('Calling absp')
744    if (peinf%inode==0) then
745      do ipol=1,xct%npol
746        call absp(xct, neig, cs(:, ipol), evecs%evals, vol, omega_plasma, flag, ipol)
747      enddo
748    endif
749    call timing%stop(timing%absorp)
750
751  !------------------------------
752  ! Write out eigenvectors to file if needed
753
754    call timing%start(timing%write_eig)
755    if (flag%eig/=0) then
756      call logit('Calling write_eigenvectors_bin')
757      call evecs%write_eigenvectors_bin(flag%eig)
758    endif
759    call timing%stop(timing%write_eig)
760
761    SAFE_DEALLOCATE(cs)
762    if (.not.xct%tda) then
763      SAFE_DEALLOCATE(cs_full)
764    endif
765    call evecs%free()
766
767  elseif (xct%algo==BSE_ALGO_LANCZOS) then
768#ifdef USESCALAPACK
769
770    !------------------------------
771    ! Solve Lanczos algorithm
772
773    call timing%start(timing%absorp)
774    call logit('Calling absp_lanczos')
775    do ipol=1,xct%npol
776      if (xct%tda) then
777        call absp_lanczos(xct, nmat, nmax, dip%s1(:,ipol), vol, omega_plasma, &
778                          flag, ipol, en_diff_max, hbse_a)
779      else
780        call absp_lanczos(xct, nmat, nmax, dip%s1(:,ipol), vol, omega_plasma, &
781                          flag, ipol, en_diff_max, hbse_a, hbse_b)
782      endif
783    enddo
784    call timing%stop(timing%absorp)
785
786#endif
787  endif
788  ! xct%algo==BSE_ALGO_DIAG
789
790!-------------------------------
791! Deallocate stuff
792
793  call dip%free()
794
795  SAFE_DEALLOCATE_P(peinf%neig)
796  SAFE_DEALLOCATE_P(peinf%peig)
797  call dealloc_grid(kg_fi)
798
799  if (eqp%spl_tck%n>0) then
800    SAFE_DEALLOCATE_P(eqp%spl_tck%t)
801    SAFE_DEALLOCATE_P(eqp%spl_tck%c)
802  endif
803
804  SAFE_DEALLOCATE(hbse_a)
805  if (.not.xct%tda) then
806    SAFE_DEALLOCATE(hbse_b)
807  endif
808  SAFE_DEALLOCATE_P(eqp%ecqp)
809  SAFE_DEALLOCATE_P(eqp%evqp)
810  SAFE_DEALLOCATE(intp_coefs)
811
812  if (xct%iwritecoul .eq. 1 .and. peinf%inode .eq. 0) then
813    call close_file(19) ! file vcoul
814  endif
815
816  call diag_end()
817
818  POP_SUB(diag)
819  return
820
821contains
822
823  subroutine diag_end()
824
825    PUSH_SUB(diag.diag_end)
826
827#ifdef MPI
828    call MPI_BARRIER(MPI_COMM_WORLD,mpierr)
829#endif
830
831    POP_SUB(diag.diag_end)
832    return
833  end subroutine diag_end
834
835end subroutine diag
836