1!=============================================================================== 2! 3! Routines: 4! 5! (1) diag main Originally by MLT Last Edited: 5/12/2008 (JRD) 6! 7! For more details see the README_absorption file. 8! 9! Calculates the real and imaginary parts of the macroscopic dielectric 10! function starting from interaction matrix elements calculated by 11! the Kernel code. It uses interpolation in the matrix elements and 12! direct diagonalization of the Bethe-Salpeter equation. 13! Spin-polarized case implemented. 14! 15! For more details, see: 16! Rohlfing & Louie, PRB 62:(8), 4927 (2000) 17! G. Strinati, Rivista del Nuovo Cimento 11:(12), 1 (1988) 18! 19! Please report bugs to: jdeslip@civet.berkeley.edu 20! 21!================================================================================ 22 23#include "f_defs.h" 24 25subroutine diag(eqp,xct,flag,neig,nmax) 26 27 use global_m 28 use absp_io_m 29 use fullbz_m 30 use genwf_m 31 use intkernel_m 32 use intwfn_m 33 use misc_m 34 use vmtxel_m 35 use diagonalize_m 36 use evecs_m 37 use input_fi_m 38 use input_q_m 39 use absp_lanczos_m 40 use timing_m, only: timing => bse_timing 41 implicit none 42 43 type (eqpinfo), intent(inout) :: eqp 44 type (xctinfo), intent(inout) :: xct 45 type (flags), intent(inout) :: flag 46 integer, intent(inout) :: neig 47 integer, intent(in) :: nmax 48 49 type (crystal) :: crys 50 type (symmetry) :: syms 51 type (gspace) :: gvec 52 type (grid) :: kg_fi, kgq_fi,kg_co,kgq_co 53 type (kpoints) :: kp_fi, kpq_fi 54 type (wavefunction) :: wfnc_fi 55 type (wavefunction) :: wfnvq_fi 56 type (work_genwf) :: work, workq 57 type (int_wavefunction) :: intwfnc 58 type (int_wavefunction) :: intwfnv 59 type (evecs_t) :: evecs 60 type (vmtxel_t) :: dip 61 62 character :: tmpstr*128,filename*20 63 integer :: ii,ipol,jj,ncount,nmat,ncvs_fi,pblock 64 integer :: ikb, icb, ivb 65 integer :: ik,ikq,ikt,iblock,ikcvs,ikcvsd,ic,iv,is 66 integer :: iunit_c,iunit_v 67 real(DP) :: vol,omega_plasma,en_diff_max 68 real(DP) :: tsec(2),tmin(2),tmax(2) 69 70 character*16, allocatable :: routnam(:) 71 integer, allocatable :: routsrt(:) 72 integer, allocatable :: fi2co_wfn(:,:),indexq_fi(:) 73 real(DP), allocatable :: evals(:), kco(:,:), cs(:,:), cs0(:), rdum(:,:) 74 75 SCALAR, allocatable :: dcc(:,:,:,:,:),dvv(:,:,:,:,:),s0(:), & 76 hqpcc(:,:,:), hqpvv(:,:,:), rdum2(:,:) 77 SCALAR, allocatable :: dipoles_l(:,:), dipoles_r(:,:), cs_full(:,:) 78 !> (kcvs, k`c`v`s`), "A" block of BSE Hamiltonian 79 SCALAR, allocatable :: hbse_a(:,:) 80 !> (kcvs, k`c`v`s`), "B" block of BSE Hamiltonian, only if tda=.false. 81 SCALAR, allocatable :: hbse_b(:,:) 82 real(DP), allocatable :: intp_coefs(:,:) 83 character(len=2) :: suffix(3) = (/'b1', 'b2', 'b3'/) 84 85 ! DYQ: Variables used in finite Q 86 integer, allocatable :: kg0_temp(:,:) 87 integer :: umk 88 real(DP) :: kq(3),qq(3) 89 real :: delta 90 91 !DYQ: Variables used for clustered subsampling 92 integer ::ik_sub,nsub_files,nk_sub 93 type(grid) :: kg_sub_co 94 SCALAR, allocatable :: dvv_sub(:,:,:,:,:,:),dcc_sub(:,:,:,:,:,:) 95 integer,allocatable :: closepts_sub(:,:) 96 97 PUSH_SUB(diag) 98 99! JRD: A Check for Finite Q 100 101! if (peinf%inode .eq. 0) then 102! write(6,*) 'nkpt_co = ', xct%nkpt_co 103! endif 104 105 if(flag%vm.eq.2) then 106 if (peinf%inode.eq.0) then 107 write(0,*) 'WARNING: read_eps2_moments not supported in this diagonalization code. Ignoring keyword.' 108 endif 109 flag%vm=0 110 endif 111 112!-------------------------- 113! If eigenvalues.dat is available, read them and go straight to 114! calculation of the absorption spectrum 115 116 117 if (flag%spec.eq.1) then 118 if (peinf%inode .eq. 0) then 119 120 omega_plasma = 0.d0 121 122 write(6,*) 'Create absorption_noeh.dat from eigenvalues_noeh.dat' 123 do ipol=1,xct%npol 124 call read_eigenvalues_noeh(xct,neig,vol,eqp,s0,ipol) 125 call absp0(eqp,xct,s0,vol,omega_plasma,flag,ipol) 126 SAFE_DEALLOCATE_P(eqp%evqp) 127 SAFE_DEALLOCATE_P(eqp%ecqp) 128 SAFE_DEALLOCATE_P(eqp%evlda) 129 SAFE_DEALLOCATE_P(eqp%eclda) 130 SAFE_DEALLOCATE(s0) 131 enddo 132 133 if (xct%iabsorp0 .eq. 0) then 134 write(6,*) 'Create absorption_eh.dat from eigenvalues.dat' 135 do ipol=1,xct%npol 136 call read_eigenvalues(xct,neig,vol,evals,cs0,ipol) 137 call absp(xct,neig,cs0,evals,vol,omega_plasma,flag,ipol) 138 SAFE_DEALLOCATE(evals) 139 SAFE_DEALLOCATE(cs0) 140 enddo 141 endif 142 endif 143 144 call diag_end() 145 POP_SUB(diag) 146 return 147 endif 148 149!-------------------------- 150! Read wavefunctions on the fine grid 151 152 call logit('Calling input') 153 call timing%start(timing%input) 154 call input_fi(crys,gvec,kg_fi,kp_fi,syms,eqp,xct,flag, & 155 omega_plasma,.true.,intwfnc) 156 157! If there is no specified number of eigenvalues, calculate 158! all eigenvalues 159 160 nmat = xct%nkpt_fi*xct%ncb_fi*xct%nvb_fi*xct%nspin 161 if (xct%algo==BSE_ALGO_DIAG) then 162 if (xct%tda) then 163 if (neig==0) neig = nmat 164 else 165 if (peinf%inode==0.and.neig/=0) then 166 write(0,'(/,a,/)') 'WARNING: BSE calculations ignore the `number_eigenvalues` flag.' 167 endif 168#ifdef USESCALAPACK 169 ! FHJ: Meiyue`s solver is structure preserving, so we only compute the positive evecs 170 neig = nmat 171#else 172 ! FHJ: generic solver 173 neig = 2*nmat 174#endif 175 endif 176 if ((neig<=0).or.(neig>2*nmat).or.(neig>nmat.and.xct%tda)& 177#ifdef USESCALAPACK 178 .or.(neig>nmat)& 179#endif 180 ) then 181 write(tmpstr,'(a,2i6)') 'Incomprehensible request of eigenvalues : ', neig, nmat 182 call die(tmpstr, only_root_writes = .true.) 183 endif 184 else 185 neig = nmat 186 endif 187 188 vol = xct%nktotal*crys%celvol 189 if (peinf%inode.eq.0) then 190 write(6,'(/1x,a)') 'More job parameters:' 191 write(6,'(1x,a,es9.3e2)') '- Crystal volume (bohr): ', vol 192 write(6,'(1x,a,f0.3)') '- Broadening (eV): ', xct%eta 193 write(6,'(1x,a,i0)') '- Number of valence bands: ', xct%nvb_fi 194 write(6,'(1x,a,i0)') '- Number of cond. bands: ', xct%ncb_fi 195 write(6,'(1x,a,i0)') '- Number of spins: ', xct%nspin 196 write(6,'(1x,a,i0)') '- Number of eigenvalues to be computed: ', neig 197 write(6,'()') 198 endif 199 call timing%stop(timing%input) 200 201 SAFE_ALLOCATE(indexq_fi, (xct%nkpt_fi)) 202 SAFE_ALLOCATE(xct%indexq_fi, (xct%nkpt_fi)) 203 if (flag%vm.ne.1.or. .not. flag%read_dtmat) then 204 call timing%start(timing%input_q) 205 call logit('Calling input_q') 206 call input_q(kp_fi,crys,gvec,kg_fi,kgq_fi,kpq_fi,syms,xct,indexq_fi,eqp,flag,intwfnv) 207 call timing%stop(timing%input_q) 208 endif 209 210! JRD: Don`t do this next section if only want absorption_noeh.dat 211 212 if (xct%iabsorp0 .eq. 0) then 213 214!------------------------------ 215! Calculate the transformation matrices from coarse grid wavefunctions 216! FHJ: These are the final transformation coefs that will be used to interpolate 217! the kernel. However, we might use an unrestricted version of dvv/dcc to 218! interpolate eqp if xct%unrestricted_transf==.true.. 219 SAFE_ALLOCATE(dvv, (xct%nvb_fi,xct%n1b_co,xct%nspin,xct%nkpt_fi,xct%npts_intp_kernel)) 220 SAFE_ALLOCATE(dcc, (xct%ncb_fi,xct%n2b_co,xct%nspin,xct%nkpt_fi,xct%npts_intp_kernel)) 221 SAFE_ALLOCATE(kco, (3,xct%nkpt_co)) 222 SAFE_ALLOCATE(fi2co_wfn, (xct%npts_intp_kernel,xct%nkpt_fi)) 223 SAFE_ALLOCATE(intp_coefs, (xct%npts_intp_kernel, xct%nkpt_fi)) 224 225 call logit('Calling intwfn') 226 call timing%start(timing%intwfn) 227 call intwfn(kp_fi,crys,syms,xct,flag,gvec,kg_fi,kgq_fi,kg_co,kgq_co,dcc,dvv,& 228 kco,fi2co_wfn,indexq_fi,eqp,intwfnv,intwfnc,intp_coefs) 229 call timing%stop(timing%intwfn) 230 231 if (xct%subsample_line) then 232 call logit('Calling intwfn_sub') 233 call open_file(unit=77, file='subsample.inp',form='formatted',status='old') 234 read(77,*) nk_sub 235 read(77,*) nsub_files 236 if (nsub_files.ne.xct%nkpt_co) call die("Number of subsampled points must be the same as the number of coarse points",& 237 only_root_writes=.true.) 238 kg_sub_co%nr = nsub_files 239 SAFE_ALLOCATE(kg_sub_co%r,(3,xct%nkpt_co)) 240 do ik_sub=1,xct%nkpt_co 241 read(77,*) (kg_sub_co%r(ii,ik_sub),ii=1,3) 242 enddo 243 call close_file(77) 244 SAFE_ALLOCATE(dcc_sub,(xct%ncb_fi, xct%n2b_co, xct%nspin, xct%nkpt_fi,xct%idimensions+1, nk_sub)) 245 SAFE_ALLOCATE(dvv_sub,(xct%nvb_fi, xct%n1b_co, xct%nspin, xct%nkpt_fi,xct%idimensions+1, nk_sub)) 246 SAFE_ALLOCATE(closepts_sub,(4,xct%nkpt_fi)) 247 call intwfn_sub(kp_fi,crys,syms,xct,flag,gvec,kg_fi,kgq_fi, & 248 dcc_sub,dvv_sub,kg_sub_co,kg_co,indexq_fi,nk_sub,intwfnv,intwfnc,closepts_sub) 249 endif 250 251 endif 252 253 SAFE_DEALLOCATE_P(xct%ifmax) 254 if ((flag%vm.ne.1.or. .not. flag%read_dtmat) .and..not. xct%no_mtxel) then 255 ! otherwise, we did not call input_q to allocate it 256 SAFE_DEALLOCATE_P(xct%ifmaxq) 257 endif 258 259!------------ Calculate the velocity (or momentum) matrix elements ------------- 260 261! Each PE calculates a small number of them. At the end, share data 262! 263! If flag%vm.eq.1, skip this part and just read the matrix elements 264! from "vmtxel". 265! 266! peinf%block_sz = size of a distributed column in hbse_a 267 268 269 call logit('Calculating v/p matrix elememts') 270 ncvs_fi = xct%ncb_fi*xct%nvb_fi*xct%nspin 271 if (xct%ipar .eq. 1) then 272 peinf%block_sz = ncvs_fi 273 else if (xct%ipar .eq. 2) then 274 peinf%block_sz = xct%nvb_fi*xct%nspin 275 else 276 peinf%block_sz = xct%nspin 277 endif 278 nmat = xct%nkpt_fi*ncvs_fi 279 280 ! Initialize dipole operator and allocate dipole matrix elements 281 call dip%init_from_xctinfo(xct, opr=flag%opr) 282 call dip%alloc() 283 284 ! Copy list of k-points 285 do ik=1, dip%nk 286 dip%kpt(:,ik) = kg_fi%f(:,ik) 287 end do 288 289 ! FIXME: It was decided to hold on the use of hdf5 for vmtxel 290 ! until the next release, so as not to break backward compatibility 291 ! Since using hdf5 for vmtxel is highly desirable, let us not 292 ! forget to reactivate it as soon as possible. 293 dip%use_hdf5 = .false. 294 295 call timing%start(timing%vmtxel) 296 297 if (.not.xct%no_mtxel) then 298 if (flag%vm.eq.0) then 299 do ikt=1, peinf%ikt(peinf%inode+1) 300 ik = peinf%ik(peinf%inode+1,ikt) 301 if (xct%qflag .eq. 1) then 302 ikq = indexq_fi(ik) 303 else 304 ! genwf will retrieve the valence bands at k+q+Q 305 ikq = xct%indexq_fi(ik) 306 if (ikq.eq.0 .and. xct%patched_sampling) then 307 if (peinf%inode.eq.0) write(6,*) "Skipping genwf for ik,ikq:",ik,ikq 308 cycle 309 endif 310 endif 311 312 call genwf(crys,gvec,kg_fi,syms,wfnc_fi,ik,ik,xct%nspin,xct%ncb_fi,& 313 work,intwfnc,xct%iwriteint,is_cond=.true.) 314 call genwf(crys,gvec,kgq_fi,syms,wfnvq_fi,ik,ikq,xct%nspin,xct%nvb_fi,& 315 workq,intwfnv,xct%iwriteint,is_cond=.false.) 316 317 call dip%compute_ik_vmtxel(ik, wfnc_fi, wfnvq_fi, gvec, xct%qshift, crys, eqp) 318 319 SAFE_DEALLOCATE_P(wfnc_fi%cg) 320 SAFE_DEALLOCATE_P(wfnc_fi%isort) 321 SAFE_DEALLOCATE_P(wfnvq_fi%cg) 322 SAFE_DEALLOCATE_P(wfnvq_fi%isort) 323 enddo 324 325 ! typedefs initializes all of these ikolds to 0 326 if(work%ikold.ne.0) then 327 SAFE_DEALLOCATE_P(work%cg) 328 SAFE_DEALLOCATE_P(work%ph) 329 SAFE_DEALLOCATE_P(work%ind) 330 SAFE_DEALLOCATE_P(work%isort) 331 endif 332 if(workq%ikold.ne.0) then 333 SAFE_DEALLOCATE_P(workq%cg) 334 SAFE_DEALLOCATE_P(workq%ph) 335 SAFE_DEALLOCATE_P(workq%ind) 336 SAFE_DEALLOCATE_P(workq%isort) 337 endif 338 339 ! Share matrix elements 340 call dip%reduce() 341 342 ! Write file 343 call dip%write_vmtxel() 344 345 else 346 ! Read dipole matrix elements from file if they were already computed 347 call dip%read_vmtxel() 348 endif 349 350 if (flag%vm.ne.1.or. .not. flag%read_dtmat) then 351 call dealloc_grid(kgq_fi) 352 endif 353 354 if (flag%vm == 0 .and. .not. flag%read_dtmat) then 355 SAFE_DEALLOCATE_P(intwfnc%cgk) 356 SAFE_DEALLOCATE_P(intwfnv%cgk) 357 SAFE_DEALLOCATE_P(intwfnc%isort) 358 SAFE_DEALLOCATE_P(intwfnv%isort) 359 endif 360 361 else ! xct%no_mtxel 362 xct%npol = 1 363 xct%pol = 1.0 364 endif 365 366 367 call timing%stop(timing%vmtxel) 368 369 370 371! End Calculating Matrix Elements 372!------------------------------------------------------------------------------- 373 374!---------------------------- 375! Calculate the non-interacting spectrum. Only one PE works 376 377 call timing%start(timing%absorp0) 378 call logit('Calling absp0') 379 if (peinf%inode.eq.0) then 380 do ipol=1,xct%npol 381 call absp0(eqp,xct,dip%s1(:,ipol),vol,omega_plasma,flag,ipol) 382 enddo 383 endif 384 call timing%stop(timing%absorp0) 385 386 SAFE_DEALLOCATE_P(eqp%eclda) 387 SAFE_DEALLOCATE_P(eqp%evlda) 388 SAFE_DEALLOCATE(indexq_fi) 389 390! JRD If we only want absorp0 - we finish here 391 392 if (xct%iabsorp0 .eq. 1) then 393 call diag_end() 394 POP_SUB(diag) 395 return 396 endif 397 398!------------ Build Hamiltonian Matrix -------------------------------------------- 399 400! Build Hamiltonian matrix. Diagonal part comes from the "non-interacting" 401! quasiparticle Hamiltonians. If the QP Greens function is diagonal, 402! then these are just the quasiparticle energy differences on the 403! diagonal. The more general case is: 404! 405! <cvk|H0|c'v'k'> = delta(k,k') * 406! [ <c|hqp|c'>*delta(v',v) - delta(c,c')*<v'|hqp|v> ] 407! 408! The rest of the Hamiltonian, which is the electron-hole interaction, 409! comes from interpolation further below. 410 411 call logit('Building non-interacting Hamiltonian') 412 SAFE_ALLOCATE(hbse_a, (xct%nkpt_fi*ncvs_fi, peinf%nblocks*peinf%block_sz)) 413 hbse_a(:,:) = 0.0d0 414 if (.not.xct%tda) then 415 SAFE_ALLOCATE(hbse_b, (xct%nkpt_fi*ncvs_fi, peinf%nblocks*peinf%block_sz)) 416 hbse_b(:,:) = 0.0d0 417 endif 418 419! iblock loop. This loop is over proc owned k if ipar = 1, (k,c) if 420! ipar = 2 and (k,c,v) if ipar = 3 421 422 en_diff_max = 0d0 423 do iblock=1,peinf%ibt(peinf%inode+1) 424 ik=peinf%ikb(iblock) 425 426 if (ik .eq. 0) then 427 write(0,*) "Illegal value for ik",peinf%inode, iblock, ik 428 call die("internal error in diag, ik = 0") 429 endif 430 431! Build <c|hqp|c'> and <v|hqp|v'> for this kpoint 432 433 SAFE_ALLOCATE(hqpcc, (xct%ncb_fi,xct%ncb_fi,xct%nspin)) 434 SAFE_ALLOCATE(hqpvv, (xct%nvb_fi,xct%nvb_fi,xct%nspin)) 435 hqpcc = 0.0d0 436 hqpvv = 0.0d0 437 438! Put QP energies on diagonals of hqpcc and hqpvv to start 439 do is=1,xct%nspin 440 do ic=1,xct%ncb_fi 441 ! Skip if k+Q falls outside the patch 442 if (xct%indexq_fi(ik).eq.0 .and. xct%patched_sampling) cycle 443 hqpcc(ic,ic,is) = eqp%ecqp(ic,ik,is) 444 enddo 445 do iv=1,xct%nvb_fi 446 if (xct%qflag.ne.2) then 447 !DYQ: for qflag.eq.0, eqp%evqp(iv,ik,is) corresponds to the k-point kgq_fi%f(xct%indexq_fi(ik)) 448 hqpvv(iv,iv,is) = eqp%evqp(iv,ik,is) 449 else 450 ! Skip if k+Q falls outside the patch 451 if (xct%indexq_fi(ik).eq.0 .and. xct%patched_sampling) cycle 452 hqpvv(iv,iv,is) = eqp%evqp(iv,xct%indexq_fi(ik),is) 453 endif 454 enddo 455 enddo 456 457! Read possible offdiagonal QP elements from "hqp.<ik>" file 458! if it exists. JRD: This is broken for now. Someone should fix 459! it in the future if they want to use it 460 461 !if (ik.lt.10) then 462 ! write(tmpstr,'(a,i1)') 'hqp.',ik 463 !else if (ik.lt.100) then 464 ! write(tmpstr,'(a,i2)') 'hqp.',ik 465 !else if (ik.lt.1000) then 466 ! write(tmpstr,'(a,i3)') 'hqp.',ik 467 !else if (ik.lt.100000) then 468 ! write(tmpstr,'(a,i5)') 'hqp.',ik 469 !else 470 ! write(0,*) 'too many kpoints for reading hqp' 471 !endif 472 !call open_file(9,file=tmpstr,form='formatted',status='old') 473 !if (is.eq.0) then 474 ! if (peinf%inode.eq.0) then 475 ! write(6,*) 'Reading offdiagonal hqp from file ',tmpstr 476 ! write(6,*) 'All values in eV' 477 ! endif 478 ! do 479 ! read(9,*,end=999) nocc,ii,jj,x,y 480 481 ! if ii and jj both refer to valence, states, put 482 ! matrix element into hqpvv 483 484 ! if ((ii<=nocc).and.(ii>nocc-xct%nvb_fi).and. & 485 ! (jj<=nocc).and.(jj>nocc-xct%nvb_fi)) then 486 ! if (peinf%inode.eq.0) write(6,'(a,2i5,2f20.10)') ' hqp(v,vp) = ',ii,jj,x,y 487 ! ii=nocc-ii+1 488 ! jj=nocc-jj+1 489 ! is = 1 490#ifdef CPLX 491 ! hqpvv(ii,jj,is) = CMPLX(x,y)/ryd 492#else 493 ! hqpvv(ii,jj,is) = x/ryd 494#endif 495 ! else if ((ii>nocc).and.(ii<=nocc+xct%ncb_fi).and. & 496 ! (jj>nocc).and.(jj<=nocc+xct%ncb_fi)) then 497 ! if (peinf%inode.eq.0) write(6,'(a,2i5,2f20.10)') ' hqp(c,cp) = ',ii,jj,x,y 498 ! ii=ii-nocc 499 ! jj=jj-nocc 500 ! is = 1 501#ifdef CPLX 502 ! hqpcc(ii,jj,is) = CMPLX(x,y)/ryd 503#else 504 ! hqpcc(ii,jj,is) = x/ryd 505#endif 506 ! endif 507 ! enddo 508 !999 call close_file(9) 509 ! write(6,*) 510 !endif ! if hqp.<ik> was found 511 512 ! Now build hamiltonian from hqcc and hqvv 513 ! Consider only diagonal elements in k,v,c 514 515 ! FHJ: Note: here, iv and ic are dummy indices, and the actual band indices 516 ! are ivb/icb. When should we use the dummy or real band indices? 517 ! - Use the dummy indices iv/ic to index a column of hbse_a (which is distributed) 518 ! - Use the real indices ivb/icb to index a row of hbse_a (which is not distributed) 519 ikb = ik 520 do is=1,xct%nspin 521 do iv=1,peinf%nv_block !1 for ipar==2 522 do ic=1,peinf%nc_block !1 for ipar==2 or ipar==3 523 if (xct%ipar==1) then 524 ivb=iv 525 icb=ic 526 else if (xct%ipar==2) then 527 icb=peinf%icb(iblock) 528 ivb=iv 529 else if (xct%ipar==3) then 530 ivb=peinf%ivb(iblock) 531 icb=peinf%icb(iblock) 532 endif 533 ikcvs = bse_index(ikb, icb, ivb, is, xct) 534 ikcvsd = bse_index(iblock, ic, iv, is, xct, ncband=peinf%nc_block, nvband=peinf%nv_block) 535 en_diff_max = max(en_diff_max, dble(hqpcc(icb,icb,is) - hqpvv(ivb,ivb,is))) 536 hbse_a(ikcvs,ikcvsd) = hqpcc(icb,icb,is) - hqpvv(ivb,ivb,is) 537 enddo 538 enddo 539 enddo 540 SAFE_DEALLOCATE(hqpcc) 541 SAFE_DEALLOCATE(hqpvv) 542 enddo ! loop on k-points on this processor 543#ifdef MPI 544 call MPI_Allreduce(MPI_IN_PLACE, en_diff_max, 1, MPI_DOUBLE_PRECISION, & 545 MPI_MAX, MPI_COMM_WORLD, mpierr) 546#endif 547 548!---------------------------- 549! Define the mapping of eigenvectors: the ii-th column of the matrix 550! evecs%u_r(:,:) stored in PE #ipe will contain the eigenvector of order 551! peinf%peig(ipe,ii). The total number of eigenvectors stored in 552! each processor is given by peinf%neig(1:peinf%npes). 553! pblock >= maxval(peinf%neig(1:peinf%npes)) 554 555 ! FHJ: Note: pblock gets ~doubled automatically in full BSE calculations. 556 ! In BLACS terms, the following lines would set up a 1d block-column 557 ! distribution for the eigenvectors with: 558 ! M=(2*)nmat, N=neig, MB=M, NB=peinf%block_sz, LLD=M 559 ! We should really get rid of this manual distribution and use BLACS. 560 pblock = neig/(peinf%npes*peinf%block_sz) 561 if (pblock*peinf%npes*peinf%block_sz.lt.neig) pblock = pblock + 1 562 pblock = pblock*peinf%block_sz 563 SAFE_ALLOCATE(peinf%neig, (peinf%npes)) 564 SAFE_ALLOCATE(peinf%peig, (peinf%npes,pblock)) 565 peinf%neig=0 566 peinf%peig=0 567 ii=1 568 do jj=1,neig 569 if (ii.eq.peinf%npes+1) ii=1 570 peinf%neig(ii)=peinf%neig(ii)+1 571 peinf%peig(ii,peinf%neig(ii))=jj 572 if (mod(jj,peinf%block_sz).eq.0) ii=ii+1 573 enddo 574 575!----------------------------- 576! Interpolation scheme in the Kernel 577 578 call logit('Calling intkernel') 579 call timing%start(timing%intkernel) 580 if (xct%tda) then 581 if (xct%subsample_line) then 582 call intkernel(crys,kg_fi,kp_fi,syms,xct,hbse_a,dcc,dvv,kco,fi2co_wfn,flag,gvec,intp_coefs,& 583 dcc_sub=dcc_sub,dvv_sub=dvv_sub,closepts_sub=closepts_sub) 584 else 585 call intkernel(crys,kg_fi,kp_fi,syms,xct,hbse_a,dcc,dvv,kco,fi2co_wfn,flag,gvec,intp_coefs) 586 endif 587 else 588 if (xct%subsample_line) then 589 call intkernel(crys,kg_fi,kp_fi,syms,xct,hbse_a,dcc,dvv,kco,fi2co_wfn,flag,gvec,intp_coefs,hbse_b=hbse_b,& 590 dcc_sub=dcc_sub,dvv_sub=dvv_sub,closepts_sub=closepts_sub) 591 else 592 call intkernel(crys,kg_fi,kp_fi,syms,xct,hbse_a,dcc,dvv,kco,fi2co_wfn,flag,gvec,intp_coefs,hbse_b=hbse_b) 593 endif 594 endif 595 call logit('Done intkernel') 596 call timing%stop(timing%intkernel) 597 SAFE_DEALLOCATE(fi2co_wfn) 598 SAFE_DEALLOCATE(dcc) 599 SAFE_DEALLOCATE(dvv) 600 SAFE_DEALLOCATE(kco) 601 602 !FHJ: This is for debugging purposes. Please keep this. 603 if (.not.xct%tda.and.peinf%npes==1) then 604 write(6,'(1x,a)') 'Dumping BSE Hamiltonian' 605 call open_file(unit=666, file='hbse_a.dat', status='replace', form='formatted') 606 call open_file(unit=667, file='hbse_b.dat', status='replace', form='formatted') 607 write(666,'(2(i0,1x))') xct%nkpt_fi*ncvs_fi, peinf%nblocks*peinf%block_sz 608 write(667,'(2(i0,1x))') xct%nkpt_fi*ncvs_fi, peinf%nblocks*peinf%block_sz 609 do jj=1,peinf%nblocks*peinf%block_sz 610#ifdef CPLX 611 write(666,'(2(es16.8,1x))') hbse_a(:, jj) 612 write(667,'(2(es16.8,1x))') hbse_b(:, jj) 613#else 614 write(666,'(es16.8)') hbse_a(:, jj) 615 write(667,'(es16.8)') hbse_b(:, jj) 616#endif 617 enddo 618 call close_file(666) 619 call close_file(667) 620 write(6,'(1x,a)') 'Done dumping Hamiltonian.' 621 endif 622 623!-------------------------------- 624! Exact diagonalization 625 626 627 if (xct%algo==BSE_ALGO_DIAG) then 628 629 ! Setup eigenvectors and allocate memory 630 call evecs%init(xct, kg_fi, nmat, neig, pblock) 631 call evecs%alloc() 632 633 call logit('Calling diagonalize') 634 call timing%start(timing%diagonalize) 635 636 if (xct%tda) then 637 call diagonalize(xct, neig, nmat, hbse_a, evecs%evals, evecs%u_r) 638 else 639 call diagonalize(xct, neig, nmat, hbse_a, evecs%evals, evecs%u_r, & 640 hbse_b=hbse_b, evecs_l=evecs%u_l) 641 if (peinf%inode==0) then 642 write(6,'(/,1x,a,i0)') 'Number of positive eigenvalues: ', count(evecs%evals>0d0) 643 endif 644 endif 645 call timing%stop(timing%diagonalize) 646 647 !-------------------------------- 648 ! Calculate transition matrix elements 649 ! oscillator strength = 2 * cs / omega, as defined below 650 651 call timing%start(timing%trans_mtxel) 652 call logit('Computing transition matrix elements') 653 SAFE_ALLOCATE(cs, (neig,xct%npol)) 654 cs = 0d0 655 SAFE_ALLOCATE(dipoles_r, (neig,xct%npol)) 656 dipoles_r = ZERO 657 if (.not.xct%tda) then 658 SAFE_ALLOCATE(dipoles_l, (neig,xct%npol)) 659 dipoles_l = ZERO 660 SAFE_ALLOCATE(cs_full, (neig,xct%npol)) 661 cs_full = ZERO 662 endif 663 664 ! The factor of 1/sqrt(dble(xct%nspin)) below is required to obtain the same 665 ! transition matrix elements for the singlet excitons for nspin = 1 and nspin = 2, 666 ! See just after eq. (25) in Rohlfing and Louie, PRB 62, 4927 (2000) 667 do ipol=1,xct%npol 668 if (xct%tda) then 669 do ii=1,peinf%neig(peinf%inode+1) 670 jj = peinf%peig(peinf%inode+1,ii) 671 dipoles_r(jj,ipol) = sum(evecs%u_r(1:nmat,ii)*MYCONJG(dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin)) 672 cs(jj,ipol) = MYABS2(dipoles_r(jj,ipol)) 673 enddo 674 else 675 do ii=1,peinf%neig(peinf%inode+1) 676 jj = peinf%peig(peinf%inode+1,ii) 677 ! FHJ: contributions from positive transitions 678 dipoles_l(jj,ipol) = sum(evecs%u_l(1:nmat,ii)*MYCONJG(dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin)) 679 dipoles_r(jj,ipol) = sum(evecs%u_r(1:nmat,ii)*MYCONJG(dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin)) 680 ! FHJ: contributions from negative transitions. Some notes: 681 ! (1) For the velocity matrix elements: s_(c->v) = - s_(v->c)^* . Proof: 682 ! 1st-order expand the wfns |ck> -> |ck+q> and |vk> -> |vk+q> and project 683 ! onto <vk| and <ck|. Note that there`s a sign flip in the energy denom. 684 ! (2) There`s a negative sign in dipoles_r for (c->v) transitions, which 685 ! originates from Fermi factors. See eqn 8 in PRL 80, 4510 (1998). 686 dipoles_l(jj,ipol) = dipoles_l(jj,ipol) & 687 - sum(evecs%u_l(nmat+1:2*nmat,ii)*(-dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin)) 688 dipoles_r(jj,ipol) = dipoles_r(jj,ipol) & 689 + sum(evecs%u_r(nmat+1:2*nmat,ii)*(-dip%s1(1:nmat,ipol))) / sqrt(dble(xct%nspin)) 690 cs_full(jj,ipol) = MYCONJG(dipoles_l(jj,ipol)) * dipoles_r(jj,ipol) 691 enddo 692 endif 693 enddo 694 695#ifdef MPI 696 SAFE_ALLOCATE(rdum, (neig,xct%npol)) 697 rdum = cs 698 call MPI_REDUCE(rdum(1,1), cs(1,1), size(rdum), MPI_REAL_DP, MPI_SUM, 0, MPI_COMM_WORLD, mpierr) 699 SAFE_DEALLOCATE(rdum) 700 SAFE_ALLOCATE(rdum2, (neig,xct%npol)) 701 rdum2 = dipoles_r 702 call MPI_REDUCE(rdum2(1,1), dipoles_r(1,1), size(rdum2), MPI_SCALAR, MPI_SUM, 0, MPI_COMM_WORLD, mpierr) 703 if (.not.xct%tda) then 704 rdum2 = dipoles_l 705 call MPI_REDUCE(rdum2(1,1), dipoles_l(1,1), size(rdum2), MPI_SCALAR, MPI_SUM, 0, MPI_COMM_WORLD, mpierr) 706 rdum2 = cs_full 707 call MPI_REDUCE(rdum2(1,1), cs_full(1,1), size(rdum2), MPI_SCALAR, MPI_SUM, 0, MPI_COMM_WORLD, mpierr) 708 endif 709 SAFE_DEALLOCATE(rdum2) 710#endif 711 if (.not.xct%tda) then 712 if (peinf%inode==0) then 713 cs = dble(cs_full) 714#ifdef CPLX 715 write(6,'(/,1x,a)') 'Imaginary part of the oscillator strength:' 716 write(6,'(1x,a,es9.3e2)') ' - Max. absolute value: ', maxval(abs(AIMAG(cs_full))) 717 write(6,'(1x,a,es9.3e2)') ' - Max. absolute value relative to real part: ', & 718 maxval(abs(AIMAG(cs_full)/dble(cs_full))) 719 write(6,*) 720#endif 721 endif 722 endif 723 724 call timing%stop(timing%trans_mtxel) 725 726 ! Convert eigenvalues to eV and write them out 727 evecs%evals(:) = evecs%evals(:)*ryd 728 if (.not.xct%tda) then 729 call write_eigenvalues(xct,flag,neig,vol,evecs%evals,cs,dipoles_r,dipoles_l=dipoles_l) 730 else 731 call write_eigenvalues(xct,flag,neig,vol,evecs%evals,cs,dipoles_r) 732 endif 733 734 SAFE_DEALLOCATE(dipoles_r) 735 if (.not.xct%tda) then 736 SAFE_DEALLOCATE(dipoles_l) 737 endif 738 739 !------------------------------ 740 ! Calculate the absorption and density of excitonic states 741 742 call timing%start(timing%absorp) 743 call logit('Calling absp') 744 if (peinf%inode==0) then 745 do ipol=1,xct%npol 746 call absp(xct, neig, cs(:, ipol), evecs%evals, vol, omega_plasma, flag, ipol) 747 enddo 748 endif 749 call timing%stop(timing%absorp) 750 751 !------------------------------ 752 ! Write out eigenvectors to file if needed 753 754 call timing%start(timing%write_eig) 755 if (flag%eig/=0) then 756 call logit('Calling write_eigenvectors_bin') 757 call evecs%write_eigenvectors_bin(flag%eig) 758 endif 759 call timing%stop(timing%write_eig) 760 761 SAFE_DEALLOCATE(cs) 762 if (.not.xct%tda) then 763 SAFE_DEALLOCATE(cs_full) 764 endif 765 call evecs%free() 766 767 elseif (xct%algo==BSE_ALGO_LANCZOS) then 768#ifdef USESCALAPACK 769 770 !------------------------------ 771 ! Solve Lanczos algorithm 772 773 call timing%start(timing%absorp) 774 call logit('Calling absp_lanczos') 775 do ipol=1,xct%npol 776 if (xct%tda) then 777 call absp_lanczos(xct, nmat, nmax, dip%s1(:,ipol), vol, omega_plasma, & 778 flag, ipol, en_diff_max, hbse_a) 779 else 780 call absp_lanczos(xct, nmat, nmax, dip%s1(:,ipol), vol, omega_plasma, & 781 flag, ipol, en_diff_max, hbse_a, hbse_b) 782 endif 783 enddo 784 call timing%stop(timing%absorp) 785 786#endif 787 endif 788 ! xct%algo==BSE_ALGO_DIAG 789 790!------------------------------- 791! Deallocate stuff 792 793 call dip%free() 794 795 SAFE_DEALLOCATE_P(peinf%neig) 796 SAFE_DEALLOCATE_P(peinf%peig) 797 call dealloc_grid(kg_fi) 798 799 if (eqp%spl_tck%n>0) then 800 SAFE_DEALLOCATE_P(eqp%spl_tck%t) 801 SAFE_DEALLOCATE_P(eqp%spl_tck%c) 802 endif 803 804 SAFE_DEALLOCATE(hbse_a) 805 if (.not.xct%tda) then 806 SAFE_DEALLOCATE(hbse_b) 807 endif 808 SAFE_DEALLOCATE_P(eqp%ecqp) 809 SAFE_DEALLOCATE_P(eqp%evqp) 810 SAFE_DEALLOCATE(intp_coefs) 811 812 if (xct%iwritecoul .eq. 1 .and. peinf%inode .eq. 0) then 813 call close_file(19) ! file vcoul 814 endif 815 816 call diag_end() 817 818 POP_SUB(diag) 819 return 820 821contains 822 823 subroutine diag_end() 824 825 PUSH_SUB(diag.diag_end) 826 827#ifdef MPI 828 call MPI_BARRIER(MPI_COMM_WORLD,mpierr) 829#endif 830 831 POP_SUB(diag.diag_end) 832 return 833 end subroutine diag_end 834 835end subroutine diag 836