1subroutine khamiltonian 2 !------------------------------------------------------------------------------------------ 3 ! 4 ! This subroutine calculates and saves to disk the elements needed for the construction of 5 ! the k-dependent Hamiltonian in the optimal basis |e_i>: 6 ! K1_ij, K0_ij, K1_ij, Vloc_ij, V_nonloc_ij (only the projectors <\beta_l|exp(ik.r)|e_j>) 7 ! It calculates also the nonlocal commutator for the the velocity matrix elements 8 ! 9 USE kinds, ONLY : DP 10 USE input_simple 11 USE scf, ONLY : vrs 12 USE wavefunctions, ONLY : psic 13 USE cell_base, ONLY : tpiba2, tpiba, bg, alat, at 14 USE gvect, ONLY : gg, g 15 USE io_global, ONLY : stdout, ionode, ionode_id 16 USE mp_world, ONLY : world_comm 17 USE mp, ONLY : mp_sum,mp_barrier, mp_bcast 18 USE noncollin_module, ONLY: npol, noncolin 19 USE io_files, ONLY : prefix, tmp_dir 20 USE fft_base, ONLY : dffts 21 USE fft_interfaces, ONLY : fwfft, invfft 22 USE spin_orb, ONLY : domag 23 USE cell_base, ONLY : omega 24 USE ions_base, ONLY: nat, ntyp => nsp, ityp 25 USE uspp_param, ONLY: nh, nhm 26 USE uspp, ONLY: nkb, deeq, indv_ijkb0, deeq_nc 27 USE lsda_mod, ONLY : nspin 28 USE becmod, ONLY : bec_type, calbec, allocate_bec_type, deallocate_bec_type 29 USE wvfct, ONLY : npwx 30 USE klist, ONLY : nelec 31 USE wannier_gw, ONLY : num_nbndv 32 ! 33 IMPLICIT NONE 34 ! 35 COMPLEX(kind=DP), ALLOCATABLE :: g2e(:,:) , g2e_mat(:,:) , rwfc(:,:) , v_rwfc(:,:) 36 COMPLEX(kind=DP), ALLOCATABLE :: deeqc(:,:,:) 37 COMPLEX(kind=DP), ALLOCATABLE :: d_wfc_e(:,:) 38 COMPLEX(kind=DP), ALLOCATABLE :: commut_mat(:,:) , sum_beck_nc(:,:,:,:) , sum_beckc(:,:,:) , sum_commut_mat(:,:,:,:) 39 COMPLEX(kind=DP) :: sup, sdwn 40 INTEGER :: ipol, ig, itot_e, iun, iun1, idir, ii, ir, jj, kk, jtot_e, kindex, sum_kindex 41 INTEGER, EXTERNAL :: find_free_unit 42 INTEGER, ALLOCATABLE :: igkk(:) 43 REAL(kind=DP) :: qk(3) 44 TYPE(bec_type) :: beck, beck2 45 ! 46 call start_clock('khamiltonian') 47 ! 48 write(stdout,*) ' ' 49 write(stdout,*) 'Compute and store (in files prefix.hamiltonian and prefix.hamiltonian_k) the' 50 write(stdout,*) 'matrix elements needed for the k-dependent Hamiltonian in the optimal basis:' 51 ! 52 allocate(g2e(npw_max*npol,ntot_e),g2e_mat(ntot_e,ntot_e),d_wfc_e(npw_max*npol,ntot_e)) 53 ! 54 if (noncolin) then 55 allocate(sum_beck_nc(nkb,npol,ntot_e,nkpoints(3))) 56 else 57 allocate(sum_beckc(nkb,ntot_e,nkpoints(3))) 58 endif 59 ! 60 if (nonlocal_commutator) then 61 allocate(commut_mat(ntot_e,ntot_e), sum_commut_mat(ntot_e,ntot_e,3,nkpoints(3))) 62 endif 63 ! 64 g2e(1:npw_max*npol,1:ntot_e) = wfc_e(1:npw_max*npol,1:ntot_e) 65 ! 66 ! K0_ij (kinetic part) 67 call start_clock('K0') 68 write(stdout,*) 'K0_ij' 69 do itot_e=1,ntot_e 70 do ipol=0,npol-1 71 do ig=1,npw_max 72 g2e(ig + ipol*npw_max,itot_e) = gg(ig)*g2e(ig + ipol*npw_max,itot_e) 73 enddo 74 enddo 75 enddo 76 ! Scalar product: at the end we have \sum_G e_i*(G) G**2 e_j(G) 77 call ZGEMM('C','N',ntot_e,ntot_e,npol*npw_max,(1.d0,0.d0),g2e,npol*npw_max,wfc_e,npol*npw_max,(0.d0,0.d0),g2e_mat,ntot_e) 78 call mp_sum(g2e_mat,world_comm) 79 ! 80 g2e_mat = g2e_mat*tpiba2 81 ! 82 if(ionode) then 83 iun=find_free_unit() 84 open( unit= iun, file=trim(tmp_dir)//trim(prefix)//'.hamiltonian', & 85 &status='unknown',form='unformatted') 86 write(iun) ntot_e 87 endif 88 if (ionode) write(iun) g2e_mat(1:ntot_e,1:ntot_e) 89 call stop_clock('K0') 90 ! End K0_ij 91 ! 92 ! K1_ij (kinetic part) 93 call start_clock('K1') 94 write(stdout,*) 'K1_ij' 95 do idir=1,3 96 g2e(1:npw_max*npol,1:ntot_e) = wfc_e(1:npw_max*npol,1:ntot_e) 97 do itot_e=1,ntot_e 98 do ipol=0,npol-1 99 do ig=1,npw_max 100 g2e(ig + ipol*npw_max,itot_e) = g(idir,ig)*g2e(ig + ipol*npw_max,itot_e) 101 enddo 102 enddo 103 enddo 104 ! Scalar product: at the end we have \sum_G e_i*(G) G e_j(G) 105 call ZGEMM('C','N',ntot_e,ntot_e,npol*npw_max,(1.d0,0.d0),g2e, & 106 npol*npw_max,wfc_e,npol*npw_max,(0.d0,0.d0),g2e_mat,ntot_e) 107 call mp_sum(g2e_mat,world_comm) 108 ! 109 g2e_mat = g2e_mat*tpiba*2.0 ! In the definition of K1_ij there is a factor 2 in front 110 ! 111 if (ionode) write(iun) g2e_mat(1:ntot_e,1:ntot_e) ! write to disk 112 ! 113 enddo 114 call stop_clock('K1') 115 ! End K1_ij 116 ! 117 ! Vloc_ij 118 call start_clock('Vloc') 119 write(stdout,*) 'Vloc_ij' 120 allocate(rwfc(dffts%nnr*npol,ntot_e)) 121 allocate(v_rwfc(dffts%nnr*npol,ntot_e)) 122 ! 123 do ii=1,ntot_e 124 psic(1:dffts%nnr)=0.d0 125 psic(dffts%nl(1:npw_max))=wfc_e(1:npw_max,ii) 126 CALL invfft ('Wave', psic, dffts) ! Inverse fourier transform of |e_i> on the smooth grid 127 rwfc(1:dffts%nnr,ii)=psic(1:dffts%nnr) ! Wavefunction in real space 128 129 if(npol>1) then 130 psic(1:dffts%nnr)=0.d0 131 psic(dffts%nl(1:npw_max))=wfc_e(npw_max+1:npw_max+npw_max,ii) 132 CALL invfft ('Wave', psic, dffts) 133 rwfc(dffts%nnr+1:2*dffts%nnr,ii)=psic(1:dffts%nnr) 134 endif 135 enddo 136 ! 137 v_rwfc(1:dffts%nnr*npol,1:ntot_e) = rwfc(1:dffts%nnr*npol,1:ntot_e) 138 if (noncolin) then 139 do ii=1,ntot_e 140 if (domag) then 141 do ir=1, dffts%nnr 142 sup = v_rwfc(ir,ii) * (vrs(ir,1)+vrs(ir,4)) + & 143 v_rwfc(ir+ dffts%nnr,ii) * (vrs(ir,2)-(0.d0,1.d0)*vrs(ir,3)) 144 sdwn = v_rwfc(ir+ dffts%nnr,ii) * (vrs(ir,1)-vrs(ir,4)) + & 145 v_rwfc(ir,ii) * (vrs(ir,2)+(0.d0,1.d0)*vrs(ir,3)) 146 v_rwfc(ir,ii)=sup 147 v_rwfc(ir+ dffts%nnr,ii)=sdwn 148 enddo 149 else 150 do ir=1, dffts%nnr 151 v_rwfc(ir,ii) = v_rwfc(ir,ii) * vrs(ir,1) 152 v_rwfc(dffts%nnr+ir,ii) = v_rwfc(dffts%nnr+ir,ii) * vrs(ir,1) 153 enddo 154 !v_rwfc(dffts%nnr+1:dffts%nnr*npol,ii) = 0.d0 155 endif 156 enddo 157 else 158 do ii=1,ntot_e 159 do ir=1,dffts%nnr 160 v_rwfc(ir,ii) = v_rwfc(ir,ii)*vrs(ir,1) ! V_loc(r) x e_i(r) 161 enddo 162 enddo 163 end if 164 165 ! Calculate integral of e_j(r)* x V_loc(r) x e_i(r) 166 call ZGEMM('C','N',ntot_e,ntot_e,npol*dffts%nnr,(1.d0,0.d0),rwfc, & 167 npol*dffts%nnr,v_rwfc,npol*dffts%nnr,(0.d0,0.d0),g2e_mat,ntot_e) 168 call mp_sum(g2e_mat,world_comm) 169 ! 170 g2e_mat = g2e_mat / dble(dffts%nr1*dffts%nr2*dffts%nr3) 171 ! 172 if (ionode) write(iun) g2e_mat(1:ntot_e,1:ntot_e) 173 ! 174 deallocate(rwfc,v_rwfc) 175 call stop_clock('Vloc') 176 ! End Vloc_ij 177 178 ! Vnonloc_ij 179 write(stdout,*) 'Vnonloc_ij(k) (projectors)' 180 if (nonlocal_commutator) then 181 write(stdout,*) '! Non local commutator from the velocity operator will also be computed !' 182 else 183 write(stdout,*) 'WARNING: Non local commutator from the velocity operator will NOT be computed' 184 endif 185 ! 186 if(ionode) then 187 write(iun) noncolin 188 write(iun) nat 189 write(iun) ntyp 190 write(iun) nhm 191 write(iun) nspin 192 write(iun) nkb 193 write(iun) npol 194 write(iun) ityp(1:nat) 195 write(iun) nh(1:ntyp) 196 write(iun) indv_ijkb0(1:nat) 197 write(iun) nkpoints 198 endif 199 ! 200 allocate(deeqc(nhm,nhm,nat), vkb_max(npw_max,nkb), igkk(npw_max)) 201 if (noncolin) then 202 if (ionode) write(iun) deeq_nc(1:nhm,1:nhm,1:nat,1:nspin) 203 else 204 deeqc(1:nhm,1:nhm,1:nat) = cmplx(deeq(1:nhm,1:nhm,1:nat,1), 0.d0, KIND=DP) 205 if (ionode) write(iun) deeqc(1:nhm,1:nhm,1:nat) 206 endif 207 ! 208 do ig=1,npw_max 209 igkk(ig) = ig 210 enddo 211 ! 212 call allocate_bec_type(nkb,ntot_e,beck) 213 call allocate_bec_type(nkb,ntot_e,beck2) 214 ! 215 if(ionode) then 216 iun1=find_free_unit() 217 open( unit= iun1, file=trim(tmp_dir)//trim(prefix)//'.hamiltonian_k', & 218 &status='unknown',form='unformatted') 219 endif 220 ! 221 !! Calculation of the non-local part of the pseudopotential on a uniform k-mesh 222 write(stdout,'(a,i7)') ' Total number of k-points:', (nkpoints(1))*(nkpoints(2))*(nkpoints(3)) 223 do ii=0,nkpoints(1)-1 224 do jj=0,nkpoints(2)-1 225 ! 226 do kk=0,nkpoints(3)-1 227 kindex = kk + jj*nkpoints(3) + ii*nkpoints(2)*nkpoints(3) + 1 ! global kindex 228 sum_kindex = kk + 1 229 write(stdout,'(a,i7)') ' k-point:', kindex 230 qk(1:3)=bg(1:3,1)*dble(ii)/dble(nkpoints(1))+bg(1:3,2)*dble(jj)/dble(nkpoints(2))+& 231 & bg(1:3,3)*dble(kk)/dble(nkpoints(3)) 232 ! 233 call start_clock('Vnloc') 234 if (nkb>0) then 235 call init_us_2_max(npw_max,igkk,qk,vkb_max) ! get the projectors \beta_Ilm (k-dependent) 236 endif 237 !vkb_max(npwx+1:npw_max,1:nkb) = 0.d0 ! WARNING: HERE I PUT TO ZERO THE ELEMENTS OF BETA WiTH G > npwx 238 ! 239 call calbec(npw_max,vkb_max,wfc_e,beck,ntot_e) ! scalar product <\beta_Ilm|exp(ik.r)|e_j> 240 ! 241 if (noncolin) then 242 sum_beck_nc(1:nkb,1:npol,1:ntot_e,sum_kindex) = beck%nc(1:nkb,1:npol,1:ntot_e) 243 else 244 sum_beckc(1:nkb,1:ntot_e,sum_kindex) = beck%k(1:nkb,1:ntot_e) 245 endif 246 call stop_clock('Vnloc') 247 ! 248 call start_clock('commutator') 249 if (nonlocal_commutator) then 250 ! Calculation of commutator 251 do ipol=1,3 252 ! 253 call commutator_Hx_psi_simple(qk, npw_max, ntot_e, beck, beck2, ipol, d_wfc_e, wfc_e, vkb_max) 254 ! 255 ! Calculate the matrix <e_i|[V(k),r]|e_j> 256 commut_mat = 0.d0 257 call start_clock('commut_zgemm') 258 call ZGEMM('C','N',ntot_e,ntot_e,npol*npw_max,(1.d0,0.d0),wfc_e,npol*npw_max,d_wfc_e, & 259 & npol*npw_max,(0.d0,0.d0),commut_mat,ntot_e) 260 call stop_clock('commut_zgemm') 261 ! 262 call start_clock('commut_mp_sum') 263 call mp_sum(commut_mat,world_comm) 264 call stop_clock('commut_mp_sum') 265 ! 266 ! save i[V_nl,r] = -i [r,V_nl] 267 sum_commut_mat(1:ntot_e,1:ntot_e,ipol,sum_kindex) = (0.d0,1.d0)*commut_mat(1:ntot_e,1:ntot_e) 268 ! 269 enddo 270 ! End calculation of commutator 271 endif 272 call stop_clock('commutator') 273 ! 274 enddo 275 ! 276 call start_clock('Vnloc_write') 277 if (noncolin) then 278 if (ionode) write(iun1) sum_beck_nc(1:nkb,1:npol,1:ntot_e,1:nkpoints(3)) 279 else 280 if (ionode) write(iun1) sum_beckc(1:nkb,1:ntot_e,1:nkpoints(3)) 281 endif 282 call stop_clock('Vnloc_write') 283 ! 284 if (nonlocal_commutator) then 285 call start_clock('commut_write') 286 if (ionode) write(iun1) sum_commut_mat(1:ntot_e,1:ntot_e,1:3,1:nkpoints(3)) 287 call stop_clock('commut_write') 288 endif 289 ! 290 enddo 291 enddo 292 ! 293 deallocate(deeqc,g2e_mat,d_wfc_e,g2e) 294 if (noncolin) then 295 deallocate(sum_beck_nc) 296 else 297 deallocate(sum_beckc) 298 endif 299 ! 300 if (nonlocal_commutator) then 301 deallocate(commut_mat, sum_commut_mat) 302 endif 303 ! 304 call deallocate_bec_type(beck) 305 call deallocate_bec_type(beck2) 306 deallocate(vkb_max,igkk) 307 !End Vnonloc_ij 308 ! 309 if(ionode) then 310 write(iun) alat 311 write(iun) bg(1:3,1:3) 312 write(iun) at(1:3,1:3) 313 write(iun) nelec 314 write(iun) omega 315 write(iun) num_val 316 write(iun) num_cond 317 write(iun) num_nbndv 318 write(iun) nonlocal_commutator 319 write(iun) s_bands 320 endif 321 ! 322 if(ionode) then 323 close(iun) 324 close(iun1) 325 endif 326 ! 327 write(stdout,*) ' ' 328 write(stdout,*) '**********************************' 329 write(stdout,*) 'Matrix elements computed and saved' 330 write(stdout,*) '**********************************' 331 ! 332 call stop_clock('khamiltonian') 333 ! 334 ! Write in output the input parameters 335 write(stdout,*) ' ' 336 write(stdout,*) 'INPUT PARAMETERS:' 337 write(stdout,'(a,f10.5)') ' s_bands [a.u.] = ', s_bands 338 write(stdout,'(a,I3,I3,I3)') ' k-grid = ', nkpoints(1:3) 339 if (.not. nonlocal_commutator) then 340 write(stdout,*) 'nonlocal_commutator = False' 341 elseif (nonlocal_commutator) then 342 write(stdout,*) 'nonlocal_commutator = True' 343 endif 344 write(stdout,*) ' ' 345 ! 346 call print_clock('K0') 347 call print_clock('K1') 348 call print_clock('Vloc') 349 call print_clock('Vnloc') 350 call print_clock('Vnloc_write') 351 call print_clock('commutator') 352 call print_clock('commut_Hx_psi') 353 !call print_clock('gen_beta1') 354 !call print_clock('gen_beta2') 355 call print_clock('commut_nbnd') 356 call print_clock('commut_zgemm') 357 call print_clock('commut_mp_sum') 358 call print_clock('commut_write') 359 call print_clock('khamiltonian') 360 ! 361end subroutine khamiltonian 362