1!================================================================================ 2! 3! Module: 4! 5! (1) mtxel_cor() Originally By ? Last Modified 8/15/2015 (FHJ) 6! 7! Computes the correlation contribution to the QP self-energy with 8! different approaches (SX+CH, or COR directly), and with different levels 9! of approximations to the dynamical effects of W (static, GPP, real-axis 10! integration and contour deformation). 11! 12! More specifically, this routine computes the contribution from the 13! current processor to the correlation part of the self-energy operator 14! between bands `n` (sig%diag(in)) and `m` (sig%diag(im)) at the frequency 15! of the band `l` (sig%diag(il)). 16! 17! Screened exchange is put into asxt(iw), Coulomb hole into acht(iw), 18! iw goes from 1 to 3 (see below for what it is). 19! 20! On entry, asxt(:) and acht(:) are zeroed. 21! 22! Physically, this routine will compute: 23! 24! < psi_n ( k ) | Sigma_cor ( E ) | psi_m ( k ) > 25! 26! where Sigma_cor(E) = Sigma(E) - Sigma_X(E). We compute the retarded 27! self-energy operator, except for contour-deformation calculations, 28! in which case we compute the time-ordered Sigma. 29! 30! Sigma_cor(E) is computed within different approximations: 31! - static COHSEX approximation, both exact and partial sum CH (gsm) 32! - Generalized Plasmon Pole model (?) 33! - full frequency dependent inverse dielectric matrix (CDS/CHP/gsm) 34! 35! ch2 variables are used to calculate the CH part of the imaginary part 36! of the self energy assuming a zero energy broadening in the energy 37! denominator for the evaluation of the self energy (CHP). 38! 39!================================================================================ 40 41#include "f_defs.h" 42 43module mtxel_cor_m 44 45 use global_m 46 use misc_m 47 use blas_m 48 use timing_m, only: timing => sigma_timing 49 50 implicit none 51 52 private 53 54 public :: mtxel_cor 55 56contains 57 58subroutine mtxel_cor(in,il,ispin,ncouls,neps,gvec,eps,ph,ind, & 59 indinv,isrtrqi,isrtrq,vcoul,crys,sig,wpg,wfnk,wfnkq,ncoulch, & 60 aqsn,aqsm,aqsch,asigt_imag,acht_n1,asxt,acht,achtcor,achtcor_n1,nspin,qk, & 61 coulfact,inv_igp_index,ngpown,epsR,epsA, & 62 achtD_n1,asxtDyn,achtDyn,achtDyn_cor,achtDyn_corb,ach2tDyn,icalc) 63 64 integer, intent(in) :: in,il,ispin,ncouls,neps 65 type (gspace), intent(in) :: gvec 66 !> (neps,ngpown) Uninitialized unless we are running the complex version 67 SCALAR, pointer, intent(in) :: eps(:,:) 68 SCALAR, intent(in) :: ph(:) !< (gvec%ng) 69 integer, intent(in) :: ind(:), indinv(:), isrtrq(:) !< (gvec%ng) 70 !> (gvec%ng) Uninitialized unless freq_dep=0 .or. exact_ch=1 71 integer, pointer, intent(in) :: isrtrqi(:) 72 real(DP), intent(in) :: vcoul(:) !< (ncoul) 73 type (crystal), intent(in) :: crys 74 type (siginfo), intent(in) :: sig 75 type (wpgen), intent(in) :: wpg 76 type (wfnkstates), intent(in) :: wfnk 77 type (wfnkqstates), intent(in) :: wfnkq 78 integer, intent(in) :: ncoulch 79 SCALAR, intent(in) :: aqsn(:,:), aqsm(:,:) !< (peinf%ntband_max,ncoul) 80 !> (ncoulch) Uninitialized unless freq_dep=0 .or. exact_ch=1 81 SCALAR, pointer, intent(in) :: aqsch(:) 82 !> (sig%ntband) Uninitialized in FF calculations 83 SCALAR, pointer, intent(out) :: acht_n1(:) 84 !> (3 or sig%nfreqeval) Uninitialized unless freq_dep=1 85 SCALAR, pointer, intent(out) :: asxt(:), acht(:) 86! FHJ: static remainder, resolved over bands 87 SCALAR, pointer, intent(out) :: achtcor_n1(:) 88 SCALAR, intent(out) :: achtcor 89 SCALAR, intent(out) :: asigt_imag 90 integer, intent(in) :: nspin 91 real(DP), intent(in) :: qk(:) !< (3) 92 real(DP), intent(in) :: coulfact 93 integer, intent(in) :: inv_igp_index(:) !< (neps) 94 integer, intent(in) :: ngpown 95 !> The following pointers are uninitialized unless we are running the complex version 96 complex(DPC), pointer, intent(in) :: epsR(:,:,:),epsA(:,:,:) !< (neps,ngpown,sig%nFreq) 97 complex(DPC), pointer, intent(out) :: achtD_n1(:) !< (sig%ntband) 98 complex(DPC), pointer, intent(out) :: asxtDyn(:), achtDyn(:), achtDyn_cor(:), ach2tDyn(:) !< (sig%nfreqeval) 99 complex(DPC), pointer, intent(out) :: achtDyn_corb(:) !< (sig%nfreqeval) 100 integer, intent(in) :: icalc 101 102 SCALAR, allocatable :: epstemp(:), aqsntemp(:,:),aqsmtemp(:,:) 103 complex(DPC), allocatable :: epsRtemp(:,:),epsAtemp(:,:) 104 SCALAR, allocatable:: acht_n1_loc(:) 105 106 integer :: my_igp, indigp, ipe, j, iout, my_id 107 real(DP) :: diff, diffmin 108 logical :: flag_occ 109 integer :: ig,igp,igpp,igpp2,iw,iwlda,n1,iband,n1true,nstart,nend,gpp(3) 110 real(DP), allocatable :: wx_array(:) 111! chs - partial sum static CH, chx - exact static CH 112 SCALAR :: achstemp, achxtemp, schx, epsggp, I_epsggp, achxtemp_gp, Omega2, wtilde2 113 SCALAR, allocatable :: asxtemp(:),achtemp(:) 114 SCALAR, allocatable :: wpmtx(:),I_eps_array(:,:) 115 real(DP) :: e_lk, e_n1kq, lambda, phi, freq0, wx, occ 116 complex(DPC) :: wtilde, wtilde2_temp, ctemp 117 complex(DPC), allocatable :: wtilde_array(:,:) 118 119! full-frequency 120 integer :: ifreq, nfreq_real 121 real(DP) :: E_max, pref_zb, freqStart, freqEnd 122 real(DP), allocatable :: pref(:), wxi(:) 123 complex(DPC), allocatable :: asxDtemp(:), achDtemp(:), ach2Dtemp(:), achDtemp_cor(:) 124 complex(DPC), allocatable :: achDtemp_corb(:), sch2Di(:), ssxDi(:) 125 complex(DPC), allocatable :: schDi(:), schDi_cor(:), schDi_corb(:) 126 complex(DPC) :: ssxDit,ssxDitt,ssxDittt,schDt,schDtt 127 complex(DPC), allocatable :: epsRggp(:),I_epsRggp(:) 128 complex(DPC), allocatable :: I_epsR_array(:,:,:),I_epsA_array(:,:,:) 129 SCALAR, allocatable :: I_eps_imag(:,:,:) 130 SCALAR :: mcph 131 132! subspace global variables 133 logical :: do_sigma_subspace 134 integer :: actual_nm 135 integer :: ngpown_sub_max, Nbas_own_max 136 integer :: ngpown_sub, Nbas_own, wing_pos 137 integer, pointer :: eps_sub_info(:,:) 138 complex(DPC), pointer :: eigenvec_sub(:,:) 139 complex(DPC), pointer :: eps_sub(:,:,:) 140 complex(DPC), pointer :: eps_wings_correction_rows(:,:) 141 complex(DPC), pointer :: eps_wings_correction_cols(:,:) 142 real(DP), pointer :: vcoul_sub(:) 143! subspace local variables 144 logical :: my_do_subspace 145 logical :: fix_wings, fix_wings_res 146 integer :: my_G_start, my_G_end, my_G 147 integer :: my_Nb_start, my_Nb_end, my_Nb, Nb_tot, my_ib 148 integer :: wing_pos_igp 149 integer, allocatable :: ipe_2_LocSubSize(:,:) 150 complex(DPC), allocatable :: wings_Re(:,:,:) !(:,:,1/2) 1=row, 2=col 151 complex(DPC), allocatable :: Re_eps_sub(:,:,:) 152 complex(DPC) :: wings_Im_elem 153 SCALAR, allocatable :: wings_Im(:,:,:) 154 SCALAR, allocatable :: Im_eps_sub(:,:,:) 155 SCALAR, allocatable :: Msub_m(:,:), Msub_n(:,:) 156 SCALAR, allocatable :: Caux(:,:), Caux_send(:,:), Caux_rec(:,:), Maux(:,:) 157 SCALAR, allocatable :: n_q_order(:,:), m_q_order(:,:) 158 SCALAR, allocatable :: Msub_m_temp(:,:), Msub_n_temp(:,:) 159 ! non-blocking cyclic communication 160 integer :: ipe_inx 161 integer :: isend_static, irec_static 162 integer :: actual_send, actual_rec 163 integer :: req_send_n, tag_send_n, req_rec_n, tag_rec_n 164 integer :: req_send_m, tag_send_m, req_rec_m, tag_rec_m 165 integer :: ib_start, ib_end, ib_size, my_num_band 166#ifdef MPI 167 integer :: stat(MPI_STATUS_SIZE) 168#endif 169 170 complex(DPC), allocatable :: mat_1(:,:), mat_2(:,:) 171 172 PUSH_SUB(mtxel_cor) 173 174 my_do_subspace = .false. 175 if(sig%do_sigma_subspace) then 176 ! check if all pointers are assosiated 177 if(associated(sig%epssub%eps_sub_info) .and. & 178 associated(sig%epssub%eigenvec_sub) .and. & 179 associated(sig%epssub%eps_sub) .and. & 180 associated(sig%epssub%eps_wings_correction_rows) .and. & 181 associated(sig%epssub%eps_wings_correction_cols) .and. & 182 associated(sig%epssub%vcoul_sub)) then 183 my_do_subspace = sig%do_sigma_subspace 184 end if 185 end if 186 187 if(my_do_subspace) then 188 ! 189 actual_nm = sig%epssub%actual_nm 190 nullify(eps_sub_info, eigenvec_sub, eps_sub, & 191 eps_wings_correction_rows, eps_wings_correction_cols, & 192 vcoul_sub) 193 eps_sub_info => sig%epssub%eps_sub_info(:,:,actual_nm) 194 eigenvec_sub => sig%epssub%eigenvec_sub(:,:,actual_nm) 195 eps_sub => sig%epssub%eps_sub(:,:,:,actual_nm) 196 eps_wings_correction_rows => sig%epssub%eps_wings_correction_rows(:,:) 197 eps_wings_correction_cols => sig%epssub%eps_wings_correction_cols(:,:) 198 vcoul_sub => sig%epssub%vcoul_sub(:,actual_nm) 199 ! 200 wing_pos = sig%epssub%wing_pos(actual_nm) 201 ! 202 ngpown_sub_max = sig%epssub%ngpown_sub_max 203 Nbas_own_max = sig%epssub%Nbas_own_max 204 ngpown_sub = sig%epssub%ngpown_sub 205 Nbas_own = sig%epssub%Nbas_own 206 ! 207 my_Nb_start = eps_sub_info(1,1) 208 my_Nb_end = eps_sub_info(2,1) 209 Nb_tot = eps_sub_info(3,1) 210 my_Nb = my_Nb_end - my_Nb_start + 1 211 ! 212 my_G_start = eps_sub_info(1,2) 213 my_G_end = eps_sub_info(2,2) 214 my_G = my_G_end - my_G_start + 1 215 216 SAFE_ALLOCATE(ipe_2_LocSubSize, (3, peinf%npes_pool)) 217 ipe_2_LocSubSize = 0 218 ipe_2_LocSubSize(1,peinf%pool_rank+1) = my_Nb_start 219 ipe_2_LocSubSize(2,peinf%pool_rank+1) = my_Nb_end 220 ipe_2_LocSubSize(3,peinf%pool_rank+1) = my_Nb 221#ifdef MPI 222 call MPI_Allreduce(MPI_IN_PLACE, ipe_2_LocSubSize(:,:), & 223 3 * peinf%npes_pool, MPI_INTEGER, MPI_SUM, & 224 peinf%pool_comm, mpierr) 225#endif 226 227 if( (my_Nb_start .ne. my_G_start) .or. & 228 (my_Nb_end .ne. my_G_end) .or. & 229 (my_Nb .ne. my_G) ) then 230 call die("BUG: Subspace Epsilon and eigenvector distribution don't match!") 231 end if 232 233 234 !XX SAFE_ALLOCATE(Caux, (ncouls,Nb_tot)) 235 !XX !XXXX replicate 236 !XX Caux = ZERO 237 !XX if(my_G_start > 0) then 238 !XX ! Caux(my_G_start:my_G_end,1:Nb_tot) = eigenvec_sub(1:my_G,1:Nb_tot) 239 !XX Caux(1:ncouls,my_G_start:my_G_end) = eigenvec_sub(1:ncouls,1:my_G) 240 !XX end if 241 !XX call MPI_Allreduce(MPI_IN_PLACE, Caux(:,:), & 242 !XX ncouls * Nb_tot, MPI_COMPLEX_DPC, MPI_SUM, & 243 !XX peinf%pool_comm, mpierr) 244 245 ! matrix elements in subspace 246 SAFE_ALLOCATE(Msub_m, (Nb_tot,peinf%ntband_max)) 247 SAFE_ALLOCATE(Msub_n, (Nb_tot,peinf%ntband_max)) 248 end if 249 250 if(sig%freq_dep .eq. -1) then 251 call die("BUG: cannot call mtxel_cor for Hartree-Fock!") 252 endif 253 nfreq_real = sig%nFreq - sig%nfreq_imag 254 255 SAFE_ALLOCATE(acht_n1_loc, (sig%ntband)) 256 if (sig%freq_dep/=0 .and. sig%exact_ch==1) then 257 achtcor_n1 = ZERO 258 endif 259 260! Initialize Output Arrays 261! SIB: Zero contribution to asx(n) and ach(n) for this irq 262 263 if (sig%freq_dep.eq.0.or.sig%freq_dep.eq.1.or.sig%freq_dep.eq.3) then 264 asxt(:) = ZERO 265 acht(:) = ZERO 266 acht_n1(:) = ZERO 267 acht_n1_loc(:) = ZERO 268 elseif (sig%freq_dep.eq.2) then 269 asxtDyn(:) = (0.0d0,0.0d0) 270 achtDyn(:) = (0.0d0,0.0d0) 271 achtDyn_cor(:) = (0.0d0,0.0d0) 272 achtDyn_corb(:) = (0.0d0,0.0d0) 273 ach2tDyn(:) = (0.0d0,0.0d0) 274 achtD_n1(:) = (0.0d0,0.0d0) 275 endif 276 achtcor = ZERO 277 asigt_imag = ZERO 278 279 if (peinf%my_pool .eq. -1) then 280 POP_SUB(mtxel_cor) 281 return 282 endif 283 284 285!----------------------- 286! Initialization for full-frequency CH integral 287 call timing%start(timing%m_cor_init) 288 if (sig%freq_dep.eq.2) then 289 SAFE_ALLOCATE(pref, (sig%nFreq)) 290#ifdef CPLX 291 pref_zb = 0.5D0 / PI_D 292#else 293 pref_zb = 1D0 / PI_D 294#endif 295 do ifreq=1,sig%nFreq 296 if (ifreq .lt. sig%nFreq) then 297 pref(ifreq)=(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq))/PI_D 298 else 299 pref(ifreq)=pref(ifreq-1) 300 endif 301 enddo 302 pref(1)=pref(1)*0.5d0 303 pref(sig%nFreq)=pref(sig%nFreq)*0.5d0 304#ifdef CPLX 305 do ifreq=1,sig%nFreq 306 pref(ifreq)=pref(ifreq)*0.5d0 307 enddo 308#endif 309 E_max=sig%dFreqGrid(sig%nFreq) 310 SAFE_ALLOCATE(asxDtemp, (sig%nfreqeval)) 311 asxDtemp = 0D0 312 SAFE_ALLOCATE(achDtemp, (sig%nfreqeval)) 313 achDtemp = 0D0 314 SAFE_ALLOCATE(achDtemp_cor, (sig%nfreqeval)) 315 achDtemp_cor = 0D0 316 SAFE_ALLOCATE(achDtemp_corb, (sig%nfreqeval)) 317 achDtemp_corb = 0D0 318 SAFE_ALLOCATE(ach2Dtemp, (sig%nfreqeval)) 319 ach2Dtemp = 0D0 320 SAFE_ALLOCATE(schDi, (sig%nfreqeval)) 321 schDi=0D0 322 SAFE_ALLOCATE(schDi_cor, (sig%nfreqeval)) 323 schDi_cor=0D0 324 SAFE_ALLOCATE(schDi_corb, (sig%nfreqeval)) 325 schDi_corb=0D0 326 SAFE_ALLOCATE(sch2Di, (sig%nfreqeval)) 327 sch2Di=0D0 328 SAFE_ALLOCATE(ssxDi, (sig%nfreqeval)) 329 ssxDi=0D0 330 SAFE_ALLOCATE(wxi, (sig%nfreqeval)) 331 wxi=0D0 332 if(.not. (my_do_subspace)) then 333 ! in the subspace case we allocate this later 334 SAFE_ALLOCATE(I_epsR_array, (ncouls,ngpown,nfreq_real)) 335 if (sig%need_advanced) then 336 SAFE_ALLOCATE(I_epsA_array, (ncouls,ngpown,nfreq_real)) 337 endif 338 if (sig%freq_dep_method==2) then 339 SAFE_ALLOCATE(I_eps_imag, (ncouls,ngpown,sig%nfreq_imag)) 340 endif 341 end if ! .not.(my_do_subspace) 342 else if (sig%freq_dep.eq.1) then 343 SAFE_ALLOCATE(I_eps_array, (ncouls,ngpown)) 344 SAFE_ALLOCATE(wtilde_array, (ncouls,ngpown)) 345!$OMP PARALLEL DO private(j) 346 do j = 1, ngpown 347 I_eps_array(:,j)=ZERO 348 wtilde_array(:,j)=(0D0,0D0) 349 enddo 350!$OMP END PARALLEL DO 351 else if (sig%freq_dep.eq.3) then 352 SAFE_ALLOCATE(I_eps_array, (ncouls,ngpown)) 353 SAFE_ALLOCATE(wtilde_array, (ncouls,ngpown)) 354 I_eps_array(:,:)=ZERO 355 wtilde_array(:,:)=(0D0,0D0) 356 else 357 SAFE_ALLOCATE(I_eps_array, (ncouls,ngpown)) 358 endif 359 360 if(my_do_subspace) then 361 !XXX ! a bit stupid, just deallocate and reallocate 362 !XXX SAFE_DEALLOCATE(I_epsR_array) 363 !XXX SAFE_DEALLOCATE(I_eps_imag) 364 SAFE_ALLOCATE(I_epsR_array, (ncouls,ngpown,1)) 365 SAFE_ALLOCATE(I_eps_imag, (1,1,1)) 366 ! 367 SAFE_ALLOCATE(Im_eps_sub, (Nb_tot,my_Nb,sig%nfreq_imag)) 368 SAFE_ALLOCATE(Re_eps_sub, (Nb_tot,my_Nb,nfreq_real)) 369 Im_eps_sub = ZERO 370 Re_eps_sub = (0D0,0D0) 371 SAFE_ALLOCATE(wings_Im, (ncouls, sig%nfreq_imag, 2)) 372 SAFE_ALLOCATE(wings_Re, (ncouls, nfreq_real, 2)) 373 wings_Im = ZERO 374 wings_Re = (0D0,0D0) 375 ! reordered matrix elements 376 SAFE_ALLOCATE(n_q_order, (ncouls,peinf%ntband_max)) 377 SAFE_ALLOCATE(m_q_order, (ncouls,peinf%ntband_max)) 378 n_q_order = ZERO 379 m_q_order = ZERO 380 end if 381 382! SIB: the array wfnk%ek has bands indexed by sig%diag 383! take the right band index 384! DVF : sig%ncore_excl has to be substracted here because wfnk%ek is defined in the 385! read_wavefunction subroutine in input.f90 to be referenced to the case with 386! no core states. 387 388 iband = sig%diag(il) 389 e_lk = wfnk%ek(iband-sig%ncore_excl,ispin) 390 ! FHJ: Figure out starting frequency for freq. grid 391 if (sig%freq_grid_shift<2) then 392 freq0 = sig%freqevalmin 393 else 394 freq0 = e_lk - sig%freqevalstep*(sig%nfreqeval-1)/2 395 if (sig%freq_dep==2) then 396 !FHJ: Avoid hitting a pole 397 freq0 = freq0 + TOL_SMALL 398 endif 399 endif 400 401! JRD: Initial frequencies for plasmon pole case. 402! Note: below assumes that we will use iw=1,2,3 in 403! subroutine shiftenergy depending on value of sig%fdf 404 405 if (sig%freq_dep.eq.1 .or. sig%freq_dep.eq.0.or.sig%freq_dep.eq.3) then 406 if (sig%fdf.eq.-3) then 407 SAFE_ALLOCATE(asxtemp,(sig%nfreqeval)) 408 SAFE_ALLOCATE(achtemp,(sig%nfreqeval)) 409 else 410 SAFE_ALLOCATE(asxtemp,(3)) 411 SAFE_ALLOCATE(achtemp,(3)) 412 endif 413 asxtemp(:) = ZERO 414 achtemp(:) = ZERO 415 endif 416 417 if (sig%freq_dep.eq.1.or.sig%freq_dep.eq.3) then 418 if (sig%fdf.eq.-3) then 419 nstart=1 420 nend=sig%nfreqeval 421 SAFE_ALLOCATE(wxi, (sig%nfreqeval)) 422 SAFE_ALLOCATE(wx_array, (sig%nfreqeval)) 423 do iw=1,sig%nfreqeval 424 wx = freq0 + (iw-1)*sig%freqevalstep 425 wxi(iw) = wx 426 enddo 427 elseif (sig%fdf.eq.-1) then 428 nstart = 1 429 nend = 2 430 SAFE_ALLOCATE(wx_array, (3)) 431 elseif (sig%fdf.eq.0) then 432 nstart = 1 433 nend = 3 434 SAFE_ALLOCATE(wx_array, (3)) 435 elseif (sig%fdf.eq.1.or.(sig%fdf.eq.2.and.icalc.eq.1)) then 436 nstart = 2 437 nend = 3 438 SAFE_ALLOCATE(wx_array, (3)) 439 else 440 nstart = 2 441 nend = 2 442 SAFE_ALLOCATE(wx_array, (3)) 443 endif 444 endif 445 446 call timing%stop(timing%m_cor_init) 447!-------------- Main loop over G` (igp) --------------------------------------- 448 call timing%start(timing%m_cor_epsinit) 449 achxtemp = ZERO 450 451! In below OMP region: 452! I_eps_array, I_epsR_array, I_epsA_array, I_eps_imag, wtilde_array are shared 453 454!$OMP PARALLEL private (ig,epsggp,I_epsggp,gpp,iout,schx,igpp,igpp2,achxtemp_gp,igp,indigp, & 455!$OMP epstemp,epsRtemp,I_epsRggp,epsRggp, & 456#ifdef CPLX 457!$OMP epsAtemp,wtilde2_temp,lambda,phi, & 458#endif 459!$OMP wpmtx,wtilde,wtilde2,Omega2,iw,mcph,ctemp,wings_Im_elem) 460 461#ifdef OMP 462 my_id = omp_get_thread_num() 463#else 464 my_id = 0 465#endif 466 467! Allocate Temporary Arrays 468 469 select case(sig%freq_dep) 470 case(0) 471 SAFE_ALLOCATE(epstemp, (neps)) 472 case(1) 473 SAFE_ALLOCATE(epstemp, (neps)) 474 SAFE_ALLOCATE(wpmtx, (neps)) 475 case(2) 476 SAFE_ALLOCATE(epstemp, (neps)) 477 case(3) 478 SAFE_ALLOCATE(epstemp, (neps)) 479 SAFE_ALLOCATE(epsRtemp, (neps,sig%nFreq)) 480 SAFE_ALLOCATE(epsRggp, (sig%nFreq)) 481 SAFE_ALLOCATE(I_epsRggp, (sig%nFreq)) 482 end select 483 484!$OMP DO reduction(+:achxtemp) 485 do my_igp=1,ngpown 486 487 indigp = inv_igp_index(my_igp) 488 igp = indinv(indigp) 489 490 if (igp .gt. ncouls) write(6,*) "CATASTROPHE", peinf%inode, my_igp, igp 491 if (igp .gt. ncouls .or. igp .le. 0) cycle 492 493!!------------- Initialize eps^-1 for this G` --------------------------------- 494 495 select case(sig%freq_dep) 496 case(0,1) 497 epstemp(:)=eps(:,my_igp) 498 case(2) 499 epstemp(:)=SCALARIFY(epsR(:,my_igp,1)) 500 case(3) 501 epstemp(:)=SCALARIFY(epsR(:,my_igp,1)) 502 epsRtemp(:,:)=epsR(:,my_igp,:) 503 504 end select 505 506!------------------------------------------------------------------------------ 507! (gsm) Below you`ll find the code to compute SX & CH in the static COHSEX 508! approximation (both the exact and partial sum expressions for CH), 509! within the GPP model, and using the full frequency dependent RPA 510! inverse dielectric matrix. The GPP section of the code is well 511! commented (thanks to Sohrab), while the COHSEX and RPA sections 512! are not... But most of the variables are the same anyways, 513! so just look at the comments in the GPP section. 514!------------------------------------------------------------------------------ 515 516! (gsm) <<<<<< static COHSEX approximation - exact static CH >>>>>> 517 518! < n k | \Sigma_{CH} (r, r`; 0) | m k > = 519! \frac{1}{2} \sum_{q G G`} 520! < n k | e^{i (G - G`) \cdot r} | m k > 521! [\eps_{G G`}^{-1} (q; 0) - \delta_{G G`}] v (q + G`) 522 523 if (sig%freq_dep.eq.0.or.sig%exact_ch.eq.1) then 524 525! Only Computed on One Processor Per Pool 526 527 ! JRD: Since, we are distributed over igp now, all procs need to do this 528 529 achxtemp_gp = ZERO 530 531 do ig=1,ncouls 532 533 epsggp = ph(ig)*MYCONJG(ph(igp))*SCALARIFY(epstemp(ind(ig))) 534 535 if (ig.eq.igp) then 536 I_epsggp = epsggp - 1.0d0 537 else 538 I_epsggp = epsggp 539 endif 540 if (abs(I_epsggp).lt.TOL_Small) cycle 541 if (ig.ne.igp) then 542 gpp(:)=gvec%components(:,isrtrq(ig))-gvec%components(:,isrtrq(igp)) 543 call findvector(iout,gpp,gvec) 544 if (iout.eq.0) cycle 545 igpp=isrtrqi(iout) 546 if (igpp.lt.1.or.igpp.gt.ncoulch) cycle 547 gpp(:)=gvec%components(:,isrtrq(igp))-gvec%components(:,isrtrq(ig)) 548 call findvector(iout,gpp,gvec) 549 if (iout.eq.0) cycle 550 igpp2=isrtrqi(iout) 551 if (igpp2.lt.1.or.igpp2.gt.ncoulch) cycle 552 else 553 gpp(:)=0 554 call findvector(iout,gpp,gvec) 555 if (iout.eq.0) cycle 556 igpp=isrtrqi(iout) 557 if (igpp.lt.1.or.igpp.gt.ncoulch) cycle 558 endif 559 schx = aqsch(igpp) * I_epsggp 560 achxtemp_gp = achxtemp_gp + schx 561 enddo ! over G (ig) 562 563 achxtemp = achxtemp + achxtemp_gp * vcoul(igp) * 0.5d0 564 565 endif ! sig%freq_dep.eq.0.or.sig%exact_ch.eq.1 566 567!!----------------------------------------------------------------------------------------- 568! (gsm) <<<<<< static COHSEX approximation - CH as a partial sum over empty bands >>>>>> 569 570! < n k | \Sigma_{SX} (r, r`; 0) | m k > = 571! - \sum_{n_1}^{occ} \sum_{q G G`} 572! < n k | e^{i (q + G) \cdot r} | n_1 k - q > 573! < n_1 k - q | e^{- i (q + G`) \cdot r`} | m k > 574! [\eps_{G G`}^{-1} (q; 0) - \delta_{G G`}] v (q + G`) 575 576! < n k | \Sigma_{CH} (r, r`; 0) | m k > = 577! \frac{1}{2} \sum_{n_1} \sum_{q G G`} 578! < n k | e^{i (q + G) \cdot r} | n_1 k - q > 579! < n_1 k - q | e^{- i (q + G`) \cdot r`} | m k > 580! [\eps_{G G`}^{-1} (q; 0) - \delta_{G G`}] v (q + G`) 581 582 583 if (sig%freq_dep.eq.0) then 584 585 do ig=1,ncouls 586 epsggp = ph(ig)*MYCONJG(ph(igp))*epstemp(ind(ig)) 587 if (ig.eq.igp) then 588 I_epsggp = epsggp - 1.0d0 589 else 590 I_epsggp = epsggp 591 endif 592 593 I_eps_array(ig,my_igp) = I_epsggp 594 595 enddo ! over G (ig) 596 597! (gsm) <<<<<< Generalized Plasmon Pole model >>>>>> 598 599 elseif (sig%freq_dep.eq.1) then 600 601! Zero out temporary accumulation variables 602 603!---------------------- 604! Calculate Plasma Frequencies 605! 606! SIB: Here we get the plasmon-pole effective plasma frequecies 607! Omega(G,G`)^2 (equation (31) of Hybersten & Louie, PRB 34, 1986, p 5396) 608! which come in the vector wp(G) for current G`. 609 610! SIB: We calculate wp(G) for a given G` (trade-off for memory) 611! Note that wp(G) G=1,ncouls requires O(N) operations 612! Even if we redo it for each band n, not so bad 613 614! JRD: I changed this to use qk instead of qkk because we use vcoul at q=0 615! (instead of small q) throughout 616 617 618! given how many times this routine is called, timing it appears to take a non-negligible amount of time 619 620 call wpeff(crys,gvec,wpg,sig,neps,isrtrq,igp,ncouls,wpmtx,nspin,qk,vcoul,coulfact) 621 622!!------ For given G` (igp), loop over all G vectors (ig) in lower triangle --- 623 624 if ( my_id == 0 ) call timing%start(timing%m_cor_pp_prep) 625 626 do ig=1,ncouls 627 628! Put epsilon(G,G`) into epsggp 629 630 epsggp = ph(ig)*MYCONJG(ph(igp))*epstemp(ind(ig)) 631 632! I_epsggp = Kronecker(G,G`) - eps(G,G`) 633 634 if (ig.eq.igp) then 635 I_epsggp = ONE - epsggp 636 else 637 I_epsggp = ZERO - epsggp 638 endif 639 640! If I_epsggp is too small, then we skip this (G,G`) entry 641! This only happens when eps is 1 on diagonal or 0 off diagonal 642! but, this means no screening correct and is already treated properly in bare 643! exchange term 644 645 if (abs(I_epsggp).lt.TOL_Small) cycle 646 647 I_eps_array(ig,my_igp) = I_epsggp 648 649! Omega2 = Omega(G,G`)^2 = effective plasma freq^2 650 651 Omega2 = wpmtx(ig) 652 653! If Omega2 is too small, then we skip this (G,G`) entry 654! JRD: I am not sure why we have to cycle here... :/ Probably not needed 655! FHJ: If Omega2->0, both the SX and the CH terms vanish. But the CH term 656! goes to zero as Omega2/wtilde ~ Omega2/sqrt(Omega2). Skipping this term is 657! probably better than risking a 0/sqrt(0) division. 658 659 660 if (abs(Omega2).lt.TOL_Small) cycle 661 662#ifdef CPLX 663 664! <<<<<<<<<<<< COMPLEX GPP >>>>>>>>>>>> 665 666! (gsm) equations (17), (20), (21) from [PRB 40, 3162 (1989)] 667 668 wtilde2_temp = Omega2 / I_epsggp 669 670 lambda = abs(wtilde2_temp) 671 if (lambda .lt. TOL_Small) cycle 672 phi = atan2(IMAG(wtilde2_temp), dble(wtilde2_temp)) 673 if (abs(cos(phi)) .lt. TOL_Small) cycle 674 wtilde2 = lambda / cos(phi) 675! this is not needed because we recalculate Omega2 below 676! Omega2 = Omega2 * CMPLX(1.0d0, -tan(phi)) 677 678#else 679 680! <<<<<<<<<<<< REAL GPP >>>>>>>>>>>> 681 682! (gsm) equation (30) from [PRB 34, 5390 (1986)] 683 684 wtilde2 = Omega2 / I_epsggp 685 686 if (abs(wtilde2) .lt. TOL_Small) cycle 687 688#endif 689 690 !FHJ: What do we do if we find an invalid mode with wtilde2<0? 691 if (dble(wtilde2)<0) then 692 select case (sig%invalid_gpp_mode) 693 case (0) ! Skip invalid GPP mode and ignore its contribution to the self energy. 694 wtilde = (0d0,0d0) 695 case (1) ! "Find" a purely imaginary mode frequency (GSM & JRD). 696 wtilde = sqrt(COMPLEXIFY(wtilde2)) 697 case (2) ! Set the GPP mode frequency to a fixed value of 2 Ry. 698 wtilde = CMPLX(2d0*ryd, 0d0) 699 case (3) ! Treat that mode within the static COHSEX approximation. 700 wtilde = CMPLX(1d0/TOL_ZERO, 0d0) 701 case default 702 wtilde = CMPLX(1d0/TOL_ZERO, 0d0) 703 endselect 704 else 705 wtilde = CMPLX(sqrt(dble(wtilde2)), 0d0) 706 endif 707 wtilde_array(ig,my_igp) = wtilde 708 709 enddo ! G Loop for GPP Setup 710 711 if (my_id.eq.0) call timing%stop(timing%m_cor_pp_prep) 712 713 elseif (sig%freq_dep.eq.3) then 714 715 if (my_id.eq.0) call timing%start(timing%m_cor_pp_prep) 716 717 do ig=1,ncouls 718 719! Put epsilon(G,G`) into epsRggp 720! JRD XXX This is bad. But it is FJR code, so who cares... 721 epsRggp(:) = ph(ig)*MYCONJG(ph(igp))*epsRtemp(ind(ig),:) 722 723! I_epsRggp = Kronecker(G,G`) - eps(G,G`) 724 if (ig.eq.igp) then 725 I_epsRggp(:) = ONE - epsRggp(:) 726 else 727 I_epsRggp(:) = ZERO - epsRggp(:) 728 endif 729 730! If I_epsggp is too small, then we skip this (G,G`) entry 731! This only happens when eps is 1 on diagonal or 0 off diagonal 732! but, this means no screening correct and is already treated properly in bare 733! exchange term 734 735 if (all(abs(I_epsRggp(:)).lt.TOL_Small)) cycle 736 737 I_eps_array(ig,my_igp) = SCALARIFY(I_epsRggp(1)) 738 wtilde2 = abs(sig%dFreqBrd(2))**2 * I_epsRggp(2)/ ( I_epsRggp(1) - I_epsRggp(2) ) 739 740 !FHJ: What do we do if we find an invalid mode with wtilde2<0? 741 if (dble(wtilde2)<0) then 742 select case (sig%invalid_gpp_mode) 743 case (0) ! Skip invalid mode and ignore its contribution to the self energy. 744 wtilde = (0d0,0d0) 745 case (1) ! "Find" a purely complex mode frequency (GSM & JRD). 746 wtilde = sqrt(COMPLEXIFY(wtilde2)) 747 case (2) ! Set the mode frequency to a fixed value of 2 Ry. 748 wtilde = CMPLX(2d0*ryd, 0d0) 749 case (3) ! Treat that mode within the static COHSEX approximation 750 wtilde = CMPLX(1d0/TOL_ZERO, 0d0) 751 case default 752 wtilde = CMPLX(1d0/TOL_ZERO, 0d0) 753 endselect 754 else 755 wtilde = sqrt(dble(wtilde2)) 756 endif 757 wtilde_array(ig,my_igp) = wtilde 758 759 enddo ! G Loop for GPP Setup 760 761 if (my_id.eq.0) call timing%stop(timing%m_cor_pp_prep) 762 763!!-------------------------------------------------------------------- 764! (gsm) <<<<<< full frequency dependent inverse dielectric matrix >>>>>> 765 766! The code below makes use of the following relations: 767! 768! {eps_{G G`}^r}^{-1}(q, -E) = {eps_{G G`}^a}^{-1}(q, E) 769! for general systems (both real and complex versions of the code) 770! 771! {eps_{G G`}^a}^{-1}(q, E) = {{eps_{G G`}^r}^{-1}}^{*}(q, E) 772! for systems with inversion symmetry (the real version of the code) 773! since plane-wave matrix elements are real 774! 775! {eps_{G G}^a}^{-1}(q, E) = {{eps_{G G}^r}^{-1}}^{*}(q, E) 776! for general systems, the diagonal of the matrix (G` = G) 777! since plane-wave matrix elements are complex conjugates of each other 778! 779! The complex version of the code uses 780! {eps_{G G`}^r}^{-1}(q, E) and {eps_{G G`}^a}^{-1}(q, E) for E >= 0 781! 782! The real version of the code uses 783! {eps_{G G`}^r}^{-1}(q, E) for E >= 0 784 785! CHP: full frequency routine - the case for sig%ggpsum == 1 786! 787! On top of the above relations, we need the following: 788! Assuming that W_{G,G`}={eps_{G,G`}}^{-1} v(q+G`), 789! 790! W_{G`,G}^r(E) = {W_{G,G`}^a}^{*}(E) 791! = {W_{G,G`}^r}^{*}(-E) (this form is used if E<0) 792! for general systems (both real and complex version of the code) 793! 794! W_{G`,G}^a(E) = {W_{G,G`}^r}^{*}(E) 795! = {W_{G,G`}^a}^{*}(-E) (this form is used if E<0) 796! for general systems (both real and complex version of the code) 797! 798! W_{G`,G}^r(E) = W_{G,G`}^r(E) 799! for systems with inversion symmetry 800! 801! W_{G`,G}^a(E) = W_{G,G`}^a(E) 802! for systems with inversion symmetry 803! 804! Note that eps^{-1} does not have these symmetries. 805 806 endif 807 enddo 808!$OMP END DO 809 810 if (sig%freq_dep.eq.2) then 811 if (sig%freq_dep_method==0) then 812 813!$OMP DO 814 do iw = 1, sig%nFreq 815 do my_igp=1,ngpown 816 817 indigp = inv_igp_index(my_igp) 818 igp = indinv(indigp) 819 820 if (igp .gt. ncouls .or. igp .le. 0) cycle 821 822 mcph = MYCONJG(ph(igp)) 823! JRD: XXX ind() may kill us here 824 do ig = 1, ncouls 825 I_epsR_array(ig,my_igp,iw) = - mcph * ph(ig) * epsR(ind(ig),my_igp,iw) 826 enddo 827 I_epsR_array(igp,my_igp,iw) = I_epsR_array(igp,my_igp,iw) + 1.0d0 828 enddo 829 enddo 830!$OMP END DO 831 832 else 833 834! JRD XXX 835! May want to add schedule dynamic here since iw > nfreq_real has more work. Or move OMP in one level 836!$OMP DO 837 do iw = 1, sig%nFreq 838 if (iw .le. nfreq_real) then 839 840 if(my_do_subspace) then 841 ! indigp = inv_igp_index(my_igp) -> local to global index for full Eps mat 842 ! indinv() -> from the global Epsilon index to the k+q indexing 843 ! ind() -> from the global k+q indexing go th the Epsilon indexing 844 ! ph -> phase in the k+q indexing 845 if(iw == 1) then 846 ! copy full epsilon only for the static case 847 I_epsR_array(:,:,1) = (0.0d0, 0.0d0) 848 do my_igp=1,ngpown 849 indigp = inv_igp_index(my_igp) 850 igp = indinv(indigp) 851 852 if (igp .gt. ncouls .or. igp .le. 0) cycle 853 !XX if (indigp .gt. ncouls .or. indigp .le. 0) cycle 854 855 mcph = MYCONJG(ph(igp)) 856 do ig = 1, ncouls 857 I_epsR_array(ig,my_igp,iw) = - mcph * ph(indinv(ig)) * epsR(ig, my_igp, iw) 858 enddo 859 I_epsR_array(indigp,my_igp,iw) = I_epsR_array(indigp,my_igp,iw) + 1.0d0 860 861 end do 862 end if 863 864 ! fix wings, here we go with the ordering driven by the epsilon matrix 865 ! each MPI task own the full wings correction, both row and col 866 wing_pos_igp = indinv(wing_pos) 867 do ig = 1, ncouls 868 ! row 869 ! transform eps -> k+q index 870 igp = indinv(ig) 871 wings_Re(ig,iw,1) = - eps_wings_correction_rows(ig,iw) * MYCONJG(ph(igp)) * ph(wing_pos_igp) 872 ! wings_Re(ig,iw,1) = wings_Re(ig,iw,1) * vcoul_sub(ig) / vcoul_sub(wing_pos) 873 end do 874 do ig = 1, ncouls 875 ! column 876 igp = indinv(ig) 877 wings_Re(ig,iw,2) = - eps_wings_correction_cols(ig,iw) * ph(igp) * MYCONJG(ph(wing_pos_igp)) 878 ! wings_Re(ig,iw,2) = wings_Re(ig,iw,2) * vcoul_sub(wing_pos) / vcoul_sub(ig) 879 end do 880 ! and here the famous factor 1.0 881 wings_Re(wing_pos,iw,1) = wings_Re(wing_pos,iw,1) + 1.0d0 882 ! copy subspace epsilon 883 Re_eps_sub(1:Nb_tot,1:my_Nb,iw) = - eps_sub(1:Nb_tot,1:my_Nb,iw) 884 885 else ! my_do_subspace 886 887 do my_igp=1,ngpown 888 889 indigp = inv_igp_index(my_igp) 890 igp = indinv(indigp) 891 892 if (igp .gt. ncouls .or. igp .le. 0) cycle 893 894 mcph = MYCONJG(ph(igp)) 895! JRD: XXX ind() may kill us here 896 do ig = 1, ncouls 897 I_epsR_array(ig,my_igp,iw) = - mcph * ph(ig) * epsR(ind(ig),my_igp,iw) 898 enddo 899 I_epsR_array(igp,my_igp,iw) = I_epsR_array(igp,my_igp,iw) + 1.0d0 900 enddo 901 902 end if ! my_do_subspace 903 904 endif 905 if (iw .gt. nfreq_real) then 906 907 if(my_do_subspace) then 908 ! fix wings 909 wing_pos_igp = indinv(wing_pos) 910 do ig = 1, ncouls 911 ! row 912 ! transform eps -> k+q index 913 igp = indinv(ig) 914 wings_Im_elem = - eps_wings_correction_rows(ig,iw) * MYCONJG(ph(igp)) * ph(wing_pos_igp) 915 wings_Im(ig,iw-nfreq_real,1) = SCALARIFY(wings_Im_elem) 916 !XXX wings_Im(ig,iw-nfreq_real,1) = wings_Im(ig,iw-nfreq_real,1) * vcoul_sub(ig) / vcoul_sub(wing_pos) 917 end do 918 do ig = 1, ncouls 919 ! column 920 igp = indinv(ig) 921 wings_Im_elem = - eps_wings_correction_cols(ig,iw) * ph(igp) * MYCONJG(ph(wing_pos_igp)) 922 wings_Im(ig,iw-nfreq_real,2) = SCALARIFY(wings_Im_elem) 923 !XXX wings_Im(ig,iw-nfreq_real,2) = wings_Im(ig,iw-nfreq_real,2) * vcoul_sub(wing_pos) / vcoul_sub(ig) 924 end do 925 ! and here the famous factor 1.0 926 wings_Im(wing_pos,iw-nfreq_real,1) = wings_Im(wing_pos,iw-nfreq_real,1) + 1.0d0 927 ! copy subspace epsilon 928 Im_eps_sub(1:Nb_tot,1:my_Nb,iw-nfreq_real) = - SCALARIFY(eps_sub(1:Nb_tot,1:my_Nb,iw)) 929 else ! my_do_subspace 930 931 do my_igp=1,ngpown 932 933 indigp = inv_igp_index(my_igp) 934 igp = indinv(indigp) 935 936 if (igp .gt. ncouls .or. igp .le. 0) cycle 937 938 mcph = MYCONJG(ph(igp)) 939! JRD: XXX ind() may kill us here 940 do ig = 1, ncouls 941 ctemp = - mcph * ph(ig) * epsR(ind(ig),my_igp,iw) 942 I_eps_imag(ig,my_igp,iw-nfreq_real) = SCALARIFY(ctemp) 943 enddo 944 I_eps_imag(igp,my_igp,iw-nfreq_real) = I_eps_imag(igp,my_igp,iw-nfreq_real) + 1.0d0 945 enddo 946 947 end if ! my_do_subspace 948 949 endif 950 enddo 951!OMP END DO 952 953 endif 954 955 if (sig%need_advanced) then 956 957!$OMP DO 958 do iw = 1, sig%nFreq 959 do my_igp=1,ngpown 960 961 indigp = inv_igp_index(my_igp) 962 igp = indinv(indigp) 963 964 if (igp .gt. ncouls .or. igp .le. 0) cycle 965 966 mcph = MYCONJG(ph(igp)) 967! JRD: XXX ind() may kill us here 968 do ig = 1, ncouls 969 I_epsA_array(ig,my_igp,iw) = - mcph * ph(ig) * epsA(ind(ig),my_igp,iw) 970 enddo 971 I_epsA_array(igp,my_igp,iw) = I_epsA_array(igp,my_igp,iw) + 1.0d0 972 enddo 973 enddo 974!$OMP END DO 975 976 endif 977 endif 978 979 if (sig%freq_dep.eq.0) then 980 SAFE_DEALLOCATE(epstemp) 981 endif 982 if (sig%freq_dep.eq.1) then 983 SAFE_DEALLOCATE(wpmtx) 984 SAFE_DEALLOCATE(epstemp) 985 endif 986 if (sig%freq_dep.eq.2) then 987 SAFE_DEALLOCATE(epstemp) 988 endif 989 if (sig%freq_dep.eq.3) then 990 SAFE_DEALLOCATE(epstemp) 991 SAFE_DEALLOCATE(epsRtemp) 992 SAFE_DEALLOCATE(epsRggp) 993 SAFE_DEALLOCATE(I_epsRggp) 994 endif 995 996!$OMP END PARALLEL 997 call timing%stop(timing%m_cor_epsinit) 998 999 ! transform basis, moved outside OMP parallel region 1000 call timing%start(timing%sub_transf_tot) 1001 if(my_do_subspace) then 1002 ! transform basis function 1003 ! reorder wfn matrix elements 1004 n_q_order = ZERO 1005 m_q_order = ZERO 1006 do ig = 1, ncouls 1007 igp = indinv(ig) 1008 if (igp .gt. ncouls .or. igp .le. 0) cycle 1009 n_q_order(ig,:) = aqsn(igp,:) 1010 m_q_order(ig,:) = aqsm(igp,:) 1011 end do 1012 1013 ! copy matrix elements and scale them with the coulomb operator 1014 SAFE_ALLOCATE(aqsntemp,(ncouls,peinf%ntband_max)) 1015 SAFE_ALLOCATE(aqsmtemp,(ncouls,peinf%ntband_max)) 1016 aqsntemp = n_q_order 1017 aqsmtemp = m_q_order 1018 do ig = 1, ncouls 1019 igp = indinv(ig) 1020 if (igp .gt. ncouls .or. igp .le. 0) cycle 1021 aqsntemp(ig,:) = aqsntemp(ig,:) * ph(igp) * SQRT(vcoul_sub(ig)) 1022 aqsmtemp(ig,:) = aqsmtemp(ig,:) * ph(igp) * vcoul(igp) / SQRT(vcoul_sub(ig)) 1023 end do 1024 1025 SAFE_ALLOCATE(Caux, (ncouls, Nbas_own_max)) 1026 SAFE_ALLOCATE(Caux_send, (ncouls, Nbas_own_max)) 1027 SAFE_ALLOCATE(Caux_rec, (ncouls, Nbas_own_max)) 1028 SAFE_ALLOCATE(Maux, (Nbas_own_max, peinf%ntband_max)) 1029 1030 Caux = ZERO ! (0D0,0D0) 1031 Caux_send = ZERO ! (0D0,0D0) 1032 Caux_rec = ZERO ! (0D0,0D0) 1033 Maux = ZERO ! (0D0,0D0) 1034 1035 ! get ready for the first cycle 1036 isend_static = MOD(peinf%pool_rank + 1 + peinf%npes_pool, peinf%npes_pool) 1037 irec_static = MOD(peinf%pool_rank - 1 + peinf%npes_pool, peinf%npes_pool) 1038 if(my_Nb_start > 0) then 1039 Caux(1:ncouls, 1:Nbas_own) = SCALARIFY(eigenvec_sub(1:ncouls, 1:Nbas_own)) 1040 end if 1041 Msub_n = ZERO ! (0D0,0D0) 1042 Msub_m = ZERO ! (0D0,0D0) 1043 1044 my_num_band = peinf%ntband_dist(peinf%pool_rank+1) 1045 1046 ipe = peinf%pool_rank + 1 1047 do ipe_inx = 1, peinf%npes_pool 1048 actual_send = MOD(peinf%pool_rank + ipe_inx + peinf%npes_pool, peinf%npes_pool) 1049 actual_rec = MOD(peinf%pool_rank - ipe_inx + peinf%npes_pool, peinf%npes_pool) 1050#ifdef MPI 1051 call timing%start(timing%sub_transf_com) 1052 ! post receiving mex 1053 tag_rec_n = 1 1054 req_rec_n = MPI_REQUEST_NULL 1055 CALL MPI_Irecv(Caux_rec, ncouls*Nbas_own_max, MPI_SCALAR, irec_static,& 1056 tag_rec_n, peinf%pool_comm, req_rec_n, mpierr) 1057 ! post send mex 1058 tag_send_n = 1 1059 req_send_n = MPI_REQUEST_NULL 1060 Caux_send = Caux 1061 CALL MPI_Isend(Caux_send, ncouls*Nbas_own_max, MPI_SCALAR, isend_static,& 1062 tag_send_n, peinf%pool_comm, req_send_n, mpierr) 1063 call timing%stop(timing%sub_transf_com) 1064#endif 1065 ! go with zgemm 1066 ib_start = ipe_2_LocSubSize(1,ipe) 1067 ib_end = ipe_2_LocSubSize(2,ipe) 1068 ib_size = ipe_2_LocSubSize(3,ipe) 1069 call timing%start(timing%sub_transf_gemm) 1070 if(ib_start > 0) then 1071#ifdef CPLX 1072 call zgemm('T','N', Nbas_own_max, peinf%ntband_max, ncouls, & 1073 (1D0,0D0), Caux(:,:), ncouls, & 1074 aqsntemp(:,:), ncouls, & 1075 (0D0,0D0), Maux, Nbas_own_max) 1076#else 1077 call dgemm('T','N', Nbas_own_max, peinf%ntband_max, ncouls, & 1078 1.0D0, Caux(:,:), ncouls, & 1079 aqsntemp(:,:), ncouls, & 1080 0.0D0, Maux, Nbas_own_max) 1081#endif 1082 Msub_n(ib_start:ib_end, 1:my_num_band) = Msub_n(ib_start:ib_end, 1:my_num_band) + & 1083 Maux(1:ib_size,1:my_num_band) 1084 1085#ifdef CPLX 1086 call zgemm('T','N', Nbas_own_max, peinf%ntband_max, ncouls, & 1087 (1D0,0D0), Caux(:,:), ncouls, & 1088 aqsmtemp(:,:), ncouls, & 1089 (0D0,0D0), Maux, Nbas_own_max) 1090#else 1091 call dgemm('T','N', Nbas_own_max, peinf%ntband_max, ncouls, & 1092 1.0D0, Caux(:,:), ncouls, & 1093 aqsmtemp(:,:), ncouls, & 1094 0.0D0, Maux, Nbas_own_max) 1095#endif 1096 Msub_m(ib_start:ib_end, 1:my_num_band) = Msub_m(ib_start:ib_end, 1:my_num_band) + & 1097 Maux(1:ib_size,1:my_num_band) 1098 end if 1099 call timing%stop(timing%sub_transf_gemm) 1100#ifdef MPI 1101 call timing%start(timing%sub_transf_com) 1102 CALL MPI_Wait(req_rec_n,stat,mpierr) 1103 CALL MPI_Wait(req_send_n,stat,mpierr) 1104 call timing%stop(timing%sub_transf_com) 1105#endif 1106 ! get ready for next cycle 1107 Caux = Caux_rec 1108 ipe = actual_rec + 1 1109 end do 1110 SAFE_DEALLOCATE(aqsntemp) 1111 SAFE_DEALLOCATE(aqsmtemp) 1112 end if ! my_do_subspace 1113 ! finish with basis transformation 1114 call timing%stop(timing%sub_transf_tot) 1115 1116!----------------------------------------------------------------------------- 1117! End of setup. Begin computation of Sigma. 1118!----------------------------------------------------------------------------- 1119 1120 if (sig%freq_dep/=0 .and. sig%exact_ch==1) then 1121 ! Initialize the static part of the static remainder correction, resolved 1122 ! over bands. We put the static term into band #1, and later on 1123 ! substract the CH part. Note the famous factor of 1/2 here. 1124 achtcor_n1(1) = 0.5d0 * achxtemp 1125 endif 1126 SAFE_ALLOCATE(aqsntemp,(ncouls,peinf%ntband_max)) 1127 SAFE_ALLOCATE(aqsmtemp,(ncouls,peinf%ntband_max)) 1128 1129 if (my_do_subspace) then 1130 ! create buffers for communication 1131 SAFE_ALLOCATE(Msub_n_temp, (Nb_tot,peinf%ntband_max)) 1132 SAFE_ALLOCATE(Msub_m_temp, (Nb_tot,peinf%ntband_max)) 1133 Msub_n_temp = ZERO 1134 Msub_m_temp = ZERO 1135 ! set the flag for fixing the wings 1136 fix_wings = .true. 1137 fix_wings_res = .true. 1138 !XXXX 1139 if(sig%subsample) then 1140 ! in the subsample case no fix is done, set flag to false 1141 fix_wings = .false. 1142 fix_wings_res = .false. 1143 end if 1144 !XXXX 1145 ! dry run just to fix wings, we pass Msub_m_temp and Msub_n_temp with 1146 ! zeros and we pretend to run for our actual proc. 1147 call timing%start(timing%m_cor_sub_wings) 1148 aqsntemp = ZERO 1149 aqsmtemp = ZERO 1150 aqsntemp(1:ncouls,1:my_num_band) = n_q_order(1:ncouls,1:my_num_band) 1151 aqsmtemp(1:ncouls,1:my_num_band) = m_q_order(1:ncouls,1:my_num_band) 1152 ipe = peinf%pool_rank + 1 1153 call sigma_cd_subspace() 1154 call timing%stop(timing%m_cor_sub_wings) 1155 !XXXX 1156 end if 1157 1158 ! FHJ: Loop over each processor/pool, and over each band that ipe owns. 1159 ! Then, calculate contribution to Sigma due to that band, stored in the 1160 ! aqs*temp arrays. 1161 do ipe = 1, peinf%npes_pool 1162 1163 call timing%start(timing%m_cor_comm) 1164 if (peinf%pool_rank .eq. ipe-1) then 1165 if(my_do_subspace) then 1166 !XXX aqsntemp(:,:) = n_q_order(:,:) 1167 !XXX aqsmtemp(:,:) = m_q_order(:,:) 1168 Msub_n_temp(:,:) = Msub_n(:,:) 1169 Msub_m_temp(:,:) = Msub_m(:,:) 1170 else 1171 aqsntemp(:,:) = aqsn(1:ncouls,:) 1172 aqsmtemp(:,:) = aqsm(1:ncouls,:) 1173 end if 1174 endif 1175 1176#ifdef MPI 1177 if (peinf%my_pool/=-1) then 1178 if (my_do_subspace) then 1179 call MPI_Bcast(Msub_n_temp, Nb_tot * peinf%ntband_max, MPI_SCALAR, ipe-1, & 1180 peinf%pool_comm, mpierr) 1181 call MPI_Bcast(Msub_m_temp, Nb_tot * peinf%ntband_max, MPI_SCALAR, ipe-1, & 1182 peinf%pool_comm, mpierr) 1183 else 1184 1185 call MPI_Bcast(aqsntemp, peinf%ntband_max*ncouls, MPI_SCALAR, ipe-1, & 1186 peinf%pool_comm, mpierr) 1187 ! FHJ: Only bother to BCast aqsmtemp if this is an off-diag. calculation 1188 if (icalc==2) then 1189 call MPI_Bcast(aqsmtemp, peinf%ntband_max*ncouls, MPI_SCALAR, ipe-1, & 1190 peinf%pool_comm, mpierr) 1191 else 1192 aqsmtemp = aqsntemp 1193 endif 1194 1195 end if ! my_do_subspace 1196 endif 1197#endif 1198 call timing%stop(timing%m_cor_comm) 1199 1200 ! here we want to move the band index inside the actual routine such 1201 ! that we can calculate sigma by calling ZGEMM 1202 if(my_do_subspace) then 1203 1204 call sigma_cd_subspace() 1205 1206 else ! my_do_subspace 1207 1208 do n1 = 1, peinf%ntband_dist(ipe) 1209 1210 ! n1true = "True" band index of the band n1 w.r.t. all bands 1211 n1true = peinf%indext_dist(n1,ipe) 1212 ! energy of the |n1,k-q> state 1213 e_n1kq = wfnkq%ekq(n1true,ispin) 1214 ! occupation of the |n1,k-q> state 1215 flag_occ = (n1true<=(sig%nvband+sig%ncrit)) & 1216 .and.((sig%ncrit==0).or.(e_n1kq<=(sig%efermi+sig%tol))) 1217 if (abs(e_n1kq-sig%efermi)<sig%tol) then 1218 occ = 0.5d0 ! Fermi-Dirac distribution = 1/2 at Fermi level 1219 else 1220 occ = 1.0d0 1221 endif 1222 1223 ! FHJ: CH term used for static remainder for current band "n1true". 1224 achstemp = ZERO 1225 1226 !FHJ: Call specialized code to calculate contribution to <nk|Sigma^c|mk> 1227 !due to current band n1 1228 if (sig%freq_dep==0) then 1229 call sigma_cohsex() 1230 else if (sig%freq_dep==1 .or. sig%freq_dep==3) then 1231 call sigma_gpp() 1232 else if (sig%freq_dep==2 .and. sig%freq_dep_method==0) then 1233 call sigma_ra() 1234 else if (sig%freq_dep==2 .and. sig%freq_dep_method==2) then 1235 call sigma_cd() 1236 endif 1237 1238 if (sig%freq_dep/=0 .and. sig%exact_ch==1) then 1239 ! Compute the static remainder resolved over bands (just the CH part here). 1240 ! Note the famous factor of 1/2 here. 1241 achtcor_n1(n1true) = achtcor_n1(n1true) - 0.5d0*achstemp 1242 endif 1243 enddo ! over bands (n1) 1244 1245 end if! my_do_subspace 1246 1247 enddo ! over bands (ipe) 1248 1249 ! FHJ: Sum contribution to Sigma due to different bands into a single 1250 ! contribution due to the portion of the dielectric matrix that I own. We`ll 1251 ! reduce these contributions in sigma_main.f90 after we loop over all q points. 1252 1253 if (sig%freq_dep/=0 .and. sig%exact_ch==1) then 1254 ! Compute the static remainder integrated over bands. 1255 ! Note the famous factor of 1/2 here. 1256 achtcor = sum(achtcor_n1) 1257 endif 1258 1259 if (sig%freq_dep==0) then 1260 1261 if (sig%exact_ch==1) then 1262 achtemp(2) = achxtemp 1263 achxtemp = ZERO 1264 endif 1265 asxt(:) = asxt(:) + asxtemp(2) 1266 acht(:) = acht(:) + achtemp(2) 1267 if (sig%exact_ch==0) then 1268 achtcor = achtcor + (achxtemp - achtemp(2)) 1269 endif 1270 1271 elseif (sig%freq_dep==1 .or. sig%freq_dep==3) then 1272 1273 do iw=nstart,nend 1274 asxt(iw) = asxt(iw) + asxtemp(iw) 1275 acht(iw) = acht(iw) + achtemp(iw) 1276 enddo 1277 1278 elseif (sig%freq_dep==2 .and. sig%freq_dep_method==0) then 1279 1280 do iw = 1, sig%nfreqeval 1281 asxtDyn(iw) = asxtDyn(iw) + asxDtemp(iw) 1282 achtDyn(iw) = achtDyn(iw) + achDtemp(iw) 1283 achtDyn_cor(iw) = achtDyn_cor(iw) + achDtemp_cor(iw) 1284 achtDyn_corb(iw) = achtDyn_corb(iw) + achDtemp_corb(iw) 1285 ach2tDyn(iw) = ach2tDyn(iw) + ach2Dtemp(iw) 1286 enddo 1287 1288 elseif (sig%freq_dep==2 .and. sig%freq_dep_method==2) then 1289 1290 do iw = 1, sig%nfreqeval 1291 asxtDyn(iw) = asxtDyn(iw) + asxDtemp(iw) 1292 achtDyn(iw) = achtDyn(iw) + achDtemp(iw) 1293 achtDyn_cor(iw) = achtDyn_cor(iw) + achDtemp(iw) 1294 achtDyn_corb(iw) = achtDyn_corb(iw) + achDtemp(iw) + asxDtemp(iw) 1295 enddo 1296 1297 endif 1298 1299 SAFE_DEALLOCATE(aqsntemp) 1300 SAFE_DEALLOCATE(aqsmtemp) 1301 1302 1303!----------------------------------------------------------------------------- 1304! Done calculating Sigma! Deallocate and finish. 1305!----------------------------------------------------------------------------- 1306 1307 if (sig%freq_dep.eq.1 .or.sig%freq_dep.eq.3) then 1308 SAFE_DEALLOCATE(wx_array) 1309 SAFE_DEALLOCATE(wtilde_array) 1310 if (sig%fdf.eq.-3) then 1311 SAFE_DEALLOCATE(wxi) 1312 endif 1313 endif 1314 1315 if (sig%freq_dep.eq.1 .or. sig%freq_dep.eq.0 .or. sig%freq_dep.eq.3) then 1316 SAFE_DEALLOCATE(asxtemp) 1317 SAFE_DEALLOCATE(achtemp) 1318 SAFE_DEALLOCATE(I_eps_array) 1319 acht_n1(1:sig%ntband) = acht_n1_loc(1:sig%ntband) 1320 endif 1321 1322 if (sig%freq_dep.eq.2) then 1323 SAFE_DEALLOCATE(pref) 1324 SAFE_DEALLOCATE(asxDtemp) 1325 SAFE_DEALLOCATE(achDtemp) 1326 SAFE_DEALLOCATE(achDtemp_cor) 1327 SAFE_DEALLOCATE(achDtemp_corb) 1328 SAFE_DEALLOCATE(ach2Dtemp) 1329 SAFE_DEALLOCATE(schDi) 1330 SAFE_DEALLOCATE(schDi_cor) 1331 SAFE_DEALLOCATE(schDi_corb) 1332 SAFE_DEALLOCATE(sch2Di) 1333 SAFE_DEALLOCATE(ssxDi) 1334 SAFE_DEALLOCATE(wxi) 1335 SAFE_DEALLOCATE(I_epsR_array) 1336 if (sig%need_advanced) then 1337 SAFE_DEALLOCATE(I_epsA_array) 1338 endif 1339 if (sig%freq_dep_method==2) then 1340 SAFE_DEALLOCATE(I_eps_imag) 1341 endif 1342 endif 1343 SAFE_DEALLOCATE(acht_n1_loc) 1344 1345 POP_SUB(mtxel_cor) 1346 return 1347 1348contains 1349 1350!============================================================================== 1351! COHSEX 1352!============================================================================== 1353 !> Calculate Sigma matrix elements, COHSEX formalism 1354 subroutine sigma_cohsex() 1355 SCALAR :: aqsn_Ieps, asxtemp_loc, achtemp_loc 1356 1357 PUSH_SUB(mtxel_cor.sigma_cohsex) 1358 1359 call timing%start(timing%m_cor_sx_ch) 1360!$OMP PARALLEL DO private (my_igp,igp,indigp,ig,aqsn_Ieps,achtemp_loc, & 1361!$OMP asxtemp_loc) reduction(+:achtemp,asxtemp,acht_n1_loc) 1362 do my_igp = 1, ngpown 1363 indigp = inv_igp_index(my_igp) 1364 igp = indinv(indigp) 1365 if (igp>ncouls .or. igp<=0) cycle 1366 achtemp_loc = ZERO 1367 asxtemp_loc = ZERO 1368 1369 do ig = 1, ncouls 1370 aqsn_Ieps = aqsntemp(ig,n1) * I_eps_array(ig,my_igp) 1371 if (sig%exact_ch==0) then 1372 achtemp_loc = achtemp_loc + 0.5d0*aqsn_Ieps 1373 endif 1374 if (flag_occ) asxtemp_loc = asxtemp_loc - aqsn_Ieps 1375 enddo ! ig 1376 1377 asxtemp(2) = asxtemp(2) + asxtemp_loc*occ*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 1378 if (sig%exact_ch==0) then 1379 achtemp(2) = achtemp(2) + achtemp_loc*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 1380 acht_n1_loc(n1true) = acht_n1_loc(n1true) + achtemp_loc*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 1381 endif 1382 enddo ! my_igp 1383!$OMP END PARALLEL DO 1384 call timing%stop(timing%m_cor_sx_ch) 1385 1386 POP_SUB(mtxel_cor.sigma_cohsex) 1387 1388 end subroutine sigma_cohsex 1389 1390 1391!============================================================================== 1392! GPP 1393!============================================================================== 1394 !> Calculate Sigma matrix elements, GPP (HL and GN) formalism 1395 subroutine sigma_gpp() 1396 SCALAR, allocatable :: ssx_array(:), sch_array(:) 1397 real(DP) :: delwr, delw2, wdiffr, rden, wxt, ssxcutoff, limitone, limittwo 1398 complex(DPC) :: halfinvwtilde, delw, wdiff, cden 1399 integer :: igbeg, igend, igblk 1400 SCALAR :: ssx, sch, ssxt, scht, schtt 1401 1402 PUSH_SUB(mtxel_cor.sigma_gpp) 1403 1404 igblk = 512 1405 1406 ! Some constants used in the loop below, computed here to save 1407 ! floating point operations 1408 limitone=1D0/(TOL_Small*4D0) 1409 limittwo=sig%gamma**2 1410 1411 ! GSM: compute the static CH for the static remainder 1412 if (sig%exact_ch.eq.1) then 1413 call acc_static_ch(ngpown, ncouls, inv_igp_index, indinv, vcoul, & 1414 aqsntemp(:,n1), aqsmtemp(:,n1), achstemp, eps_scalar=I_eps_array) 1415 endif 1416 1417!!!!--- Loop over three energy values which we compute Sigma ----------- 1418! 1419! SIB: In terms of formulae, the two terms we will compute are given in 1420! formulae (34a,34b) on p. 5397 of above reference. 1421! 1422! Omega^2(G,G`) 1423! SX(E) = M(n,G)*conj(M(m,G`)) * ------------------------ * Vcoul(G`) 1424! (E-E_n1(k-q))^2-wtilde^2 1425! 1426! Omega^2(G,G`) 1427! CH(E) = M(n,G)*conj(M(m,G`)) * ----------------------------- * Vcoul(G`) 1428! 2*wtilde*[E-E_n1(k-q)-wtilde] 1429! 1430! and we are evaluating both at E = E_l(k) and E = E_l(k) +/- dE. 1431! SX only gets contributions for the band n1 being an occupied state. 1432! 1433! For diagonal elements, we need matrix elements at various 1434! energies (to get quasi-particle energies), i.e. for iw=1,2,3; 1435! but for off-diagonal elements, we only need them at iw=2 1436! (the actual energy) so we won`t even bother calculating 1437! them at other energies 1438 1439 call timing%start(timing%m_cor_sx_ch) 1440 do iw=nstart,nend 1441! wx = E_l(k) - E_n1(k-q) + dE = difference in energies 1442! of the two states appearing as E - E_n1(k-q) above. 1443 if (sig%fdf .eq. -3) then 1444 wx_array(iw) = wxi(iw) - e_n1kq 1445 else 1446 wx_array(iw) = e_lk + sig%dw*(iw-2) - e_n1kq 1447 endif 1448 if (abs(wx_array(iw)) .lt. TOL_Zero) wx_array(iw) = TOL_Zero 1449 enddo 1450 1451! JRD: This Loop is Performance critical. Make Sure you don`t mess it up 1452 1453!$OMP PARALLEL private (my_igp,igp,indigp,ssx_array,sch_array,ig, & 1454!$OMP wtilde,wtilde2,halfinvwtilde,ssxcutoff,sch,ssx, & 1455!$OMP iw,delw,delw2,Omega2,scht,schtt,ssxt,wxt, & 1456!$OMP rden,cden,delwr,wdiffr,wdiff,igbeg,igend) 1457 1458 if (sig%fdf.eq.-3) then 1459 SAFE_ALLOCATE(ssx_array,(sig%nfreqeval)) 1460 SAFE_ALLOCATE(sch_array,(sig%nfreqeval)) 1461 else 1462 SAFE_ALLOCATE(ssx_array,(3)) 1463 SAFE_ALLOCATE(sch_array,(3)) 1464 endif 1465 1466!$OMP DO reduction(+:asxtemp,acht_n1_loc,achtemp) 1467 do my_igp = 1, ngpown 1468 1469 indigp = inv_igp_index(my_igp) 1470 igp = indinv(indigp) 1471 1472 if (igp .gt. ncouls .or. igp .le. 0) cycle 1473 1474 ssx_array = ZERO 1475 sch_array = ZERO 1476 1477! delw measures how close to zero the difference 1478! wx - wtilde = E - E_n1(k-q) - wtilde is relative to wtilde: 1479! delw = (E - E_n1(k-q) - wtilde) / (2 * wtilde) 1480! delw2 is the squared absolute value of delw 1481 1482! If delw is small, both SX and CH blow up, but their sum (for 1483! an occupied state n1) is finite. In such a case, their sum 1484! is Omega^2 / (4 * wtilde2) / (1 + delw). Then the sum is 1485! assigned to ssx and sch is set to zero. 1486 1487! If ssx is too large (which can happen due to the pole at 1488! wx + wtilde = 0 of the SX term), then we should drop this term. 1489! See the discussion at the bottom of p. 5411-5412 of Hybertsen & Louie. 1490 1491! If G.neq.G`, then since we sum over only lower triangle, 1492! we include the contribution we would have had from (G`,G). 1493 1494 if (flag_occ) then 1495 1496 do iw=nstart,nend 1497 1498 scht=0D0 1499 ssxt=0D0 1500 wxt = wx_array(iw) 1501 1502 do ig = 1, ncouls 1503 1504! Here we recompute Omega2 = wtilde2 * I_eps_array. This contains 1505! the factor of (1 - i tan phi) from Eqs. 21 & 22 of arXiv paper. 1506 1507!FIXME: Here we use temporary variables wtilde, wtilde2, Omega2 while 1508! in the following sections we use wtilde_array and I_eps_array directly. 1509! JRD, please write a comment here explaining whether this is critical 1510! for performance or it doesn`t matter. 1511 1512 wtilde = wtilde_array(ig,my_igp) 1513 wtilde2 = wtilde**2 1514 Omega2 = wtilde2 * I_eps_array(ig,my_igp) 1515 1516! Cycle bad for vectorization. Not needed wtilde is zero 1517! if (abs(Omega2) .lt. TOL_Zero) cycle 1518 1519 wdiff = wxt - wtilde 1520 1521 cden = wdiff 1522 rden = cden * CONJG(cden) 1523 rden = 1D0 / rden 1524 delw = wtilde * CONJG(cden) * rden 1525 delwr = delw*CONJG(delw) 1526 wdiffr = wdiff*CONJG(wdiff) 1527 1528! This Practice is bad for vectorization and understanding of the output. 1529! JRD: Complex division is hard to vectorize. So, we help the compiler. 1530 if (wdiffr.gt.limittwo .and. delwr.lt.limitone) then 1531 sch = delw * I_eps_array(ig,my_igp) 1532 cden = wxt**2 - wtilde2 1533 rden = cden*CONJG(cden) 1534 rden = 1D0 / rden 1535 ssx = Omega2 * CONJG(cden) * rden 1536 else if ( delwr .gt. TOL_Zero) then 1537 sch = 0.0d0 1538 cden = (4.0d0 * wtilde2 * (delw + 0.5D0 )) 1539 rden = cden*MYCONJG(cden) 1540 rden = 1D0 / rden 1541 ssx = -Omega2 * MYCONJG(cden) * rden * delw 1542 else 1543 sch = 0.0d0 1544 ssx = 0.0d0 1545 endif 1546 1547! JRD: Breaks vectorization. But, I will have to fix later because 1548! leaving it out breaks GSM example. 1549 ssxcutoff = sig%sexcut*abs(I_eps_array(ig,my_igp)) 1550 if (abs(ssx) .gt. ssxcutoff .and. wxt .lt. 0.0d0) ssx=0.0d0 1551 1552 ssxt = ssxt + aqsntemp(ig,n1)*ssx 1553 scht = scht + aqsntemp(ig,n1)*sch 1554 1555 enddo ! loop over g 1556 ssx_array(iw) = ssx_array(iw) + ssxt*MYCONJG(aqsmtemp(igp,n1)) 1557 sch_array(iw) = sch_array(iw) + 0.5D0*scht*MYCONJG(aqsmtemp(igp,n1)) 1558 enddo 1559 1560 else 1561 1562 do igbeg = 1,ncouls,igblk 1563 igend = min(igbeg+igblk-1,ncouls) 1564 do iw=nstart,nend 1565 1566 scht=0D0 1567 ssxt=0D0 1568 wxt = wx_array(iw) 1569 1570!dir$ no unroll 1571 do ig = igbeg, igend 1572 1573! Here we recompute Omega2 = wtilde2 * I_eps_array. This contains 1574! the factor of (1 - i tan phi) from Eqs. 21 & 22 of arXiv paper. 1575 1576!FIXME: Here we use wtilde_array and I_eps_array directly while in the 1577! previous sections we use temporary variables wtilde, wtilde2, Omega2. 1578! JRD, please write a comment here explaining whether this is critical 1579! for performance or it doesn`t matter. 1580 1581 wdiff = wxt - wtilde_array(ig,my_igp) 1582 1583 cden = wdiff 1584 rden = cden * CONJG(cden) 1585 rden = 1D0 / rden 1586 delw = wtilde_array(ig,my_igp) * CONJG(cden) * rden 1587 delwr = delw*CONJG(delw) 1588 wdiffr = wdiff*CONJG(wdiff) 1589 1590 schtt = aqsntemp(ig,n1) * delw * I_eps_array(ig,my_igp) 1591 1592! JRD: This if is OK for vectorization 1593 if (wdiffr.gt.limittwo .and. delwr.lt.limitone) then 1594 scht = scht + schtt 1595 endif 1596 1597 enddo ! loop over g 1598 1599 sch_array(iw) = sch_array(iw) + 0.5D0*scht*MYCONJG(aqsmtemp(igp,n1)) 1600 1601 enddo 1602 enddo 1603 1604 endif 1605 1606! If a valence band, then accumulate SX contribution. 1607 1608 if (flag_occ) then 1609 do iw=nstart,nend 1610 asxtemp(iw) = asxtemp(iw) - ssx_array(iw) * occ * vcoul(igp) 1611 enddo 1612 endif 1613 1614! Accumulate CH contribution. 1615 1616 do iw=nstart,nend 1617 achtemp(iw) = achtemp(iw) + sch_array(iw) * vcoul(igp) 1618 enddo 1619 1620! Logging CH convergence. 1621 1622 acht_n1_loc(n1true) = acht_n1_loc(n1true) + sch_array(2) * vcoul(igp) 1623 1624 enddo ! igp 1625!$OMP END DO 1626 SAFE_DEALLOCATE(ssx_array) 1627 SAFE_DEALLOCATE(sch_array) 1628!$OMP END PARALLEL 1629 call timing%stop(timing%m_cor_sx_ch) 1630 1631 POP_SUB(mtxel_cor.sigma_gpp) 1632 1633 end subroutine sigma_gpp 1634 1635 1636!============================================================================== 1637! REAL AXIS 1638!============================================================================== 1639 !> Calculate Sigma matrix elements, full-freq / REAL-AXIS formalism 1640 subroutine sigma_ra() 1641 complex(DPC) :: I_epsRggp_int, I_epsAggp_int 1642 real(DP) :: cedifft_zb,intfact,cedifft_zb_left,cedifft_zb_right 1643 complex(DPC) :: cedifft_coh, cedifft_cor 1644 complex(DPC) :: schDt_avg, schDt_right, schDt_left, schDt_lin, schDt_lin2, schDt_lin3 1645 complex(DPC) :: schDttt, schDttt_cor, schD, sch2Dt, sch2Dtt 1646 complex(DPC), allocatable :: schDt_array(:) 1647 real(DP) :: fact1, fact2 1648 integer :: ijk 1649 logical, save :: warned=.false. 1650 1651 PUSH_SUB(mtxel_cor.sigma_ra) 1652 1653 ! JRD: Find iw closest to e_lk 1654 diffmin = INF 1655 do iw=1,sig%nfreqeval 1656 diff = abs(freq0 + (iw-1)*sig%freqevalstep - e_lk) 1657 if (diff .lt. diffmin) then 1658 diffmin=diff 1659 iwlda=iw 1660 endif 1661 enddo 1662 1663 do iw=1,sig%nfreqeval 1664 wx = freq0 - e_n1kq + (iw-1)*sig%freqevalstep 1665 wxi(iw) = wx 1666 enddo 1667 1668 ! GSM: compute the static CH for the static remainder 1669 if (sig%exact_ch.eq.1) then 1670 call acc_static_ch(ngpown, ncouls, inv_igp_index, indinv, vcoul, & 1671 aqsntemp(:,n1), aqsmtemp(:,n1), achstemp, eps_cplx=I_epsR_array(:,:,1)) 1672 endif 1673 1674 ssxDi = (0D0,0D0) 1675 schDi = (0D0,0D0) 1676 schDi_cor = (0D0,0D0) 1677 schDi_corb = (0D0,0D0) 1678 sch2Di = (0D0,0D0) 1679 1680 call timing%start(timing%m_cor_ra_sx) 1681 ! JRD: Don`t want to thread here, nfreqeval could be small 1682 do iw=1,sig%nfreqeval 1683 wx = wxi(iw) 1684 ! SX and CH terms: equation (1.42) of Catalin`s thesis 1685 ! Note the negative sign in I_epsRggp and I_epsAggp 1686 1687 if (flag_occ) then 1688 1689 if(wx.ge.0.0d0) then 1690 ifreq=0 1691 do ijk = 1, sig%nFreq-1 1692 if (wx .ge. sig%dFreqGrid(ijk) .and. wx .lt. sig%dFreqGrid(ijk+1)) then 1693 ifreq=ijk 1694 endif 1695 enddo 1696 if (ifreq .eq. 0) then 1697 ifreq = sig%nFreq+3 ! three is for luck 1698 endif 1699 else 1700 ifreq=0 1701 do ijk = 1, sig%nFreq-1 1702 if (-wx .ge. sig%dFreqGrid(ijk) .and. -wx .lt. sig%dFreqGrid(ijk+1)) then 1703 ifreq=ijk 1704 endif 1705 enddo 1706 if (ifreq .eq. 0) then 1707 ifreq = sig%nFreq+3 ! three is for luck 1708 endif 1709 endif 1710 1711 if(ifreq.ge.sig%nFreq) then 1712 if (.not.warned .and. peinf%pool_rank==0) then 1713 write(0,777) iband, n1true, e_lk, e_n1kq, wx, E_max 1714 endif 1715 warned = .true. 1716 ifreq=sig%nFreq-1 1717 endif 1718 1719#ifdef CPLX 1720 if(wx.ge.0.d0) then 1721 1722 fact1 = (sig%dFreqGrid(ifreq+1)-wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1723 fact2 = (wx-sig%dFreqGrid(ifreq))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1724 1725 ssxDittt = 0D0 1726 1727!$OMP PARALLEL do private (my_igp,igp,indigp,ssxDitt,ig, & 1728!$OMP ssxDit) reduction(+:ssxDittt) 1729 do my_igp = 1, ngpown 1730 indigp = inv_igp_index(my_igp) 1731 igp = indinv(indigp) 1732 1733 if (igp .gt. ncouls .or. igp .le. 0) cycle 1734 1735 ssxDitt = (0D0,0D0) 1736 do ig = 1, ncouls 1737 ssxDit=I_epsR_array(ig,my_igp,ifreq)*fact1 + & 1738 I_epsR_array(ig,my_igp,ifreq+1)*fact2 1739 ssxDitt = ssxDitt + aqsntemp(ig,n1)*ssxDit 1740 enddo 1741 ssxDittt = ssxDittt + ssxDitt*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 1742 enddo 1743!$OMP END PARALLEL DO 1744 1745 ssxDi(iw) = ssxDi(iw) + ssxDittt 1746 1747 else 1748 1749 fact1 = (sig%dFreqGrid(ifreq+1)+wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1750 fact2 = (-sig%dFreqGrid(ifreq)-wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1751 1752 ssxDittt = 0D0 1753 1754!$OMP PARALLEL do private (my_igp,igp,indigp,ssxDitt,ig, & 1755!$OMP ssxDit) reduction(+:ssxDittt) 1756 do my_igp = 1, ngpown 1757 indigp = inv_igp_index(my_igp) 1758 igp = indinv(indigp) 1759 1760 if (igp .gt. ncouls .or. igp .le. 0) cycle 1761 1762 ssxDitt = (0D0,0D0) 1763 do ig = 1, ncouls 1764 ssxDit=I_epsA_array(ig,my_igp,ifreq)*fact1+ & 1765 I_epsA_array(ig,my_igp,ifreq+1)*fact2 1766 ssxDitt = ssxDitt + aqsntemp(ig,n1)*ssxDit 1767 enddo 1768 ssxDittt = ssxDittt + ssxDitt*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 1769 enddo 1770!$OMP END PARALLEL DO 1771 1772 ssxDi(iw) = ssxDi(iw) + ssxDittt 1773 1774 endif 1775#else 1776 if(wx.ge.0.d0) then 1777 1778 fact1 = (sig%dFreqGrid(ifreq+1)-wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1779 fact2 = (wx-sig%dFreqGrid(ifreq))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1780 1781 ssxDittt = 0D0 1782 1783!$OMP PARALLEL do private (my_igp,igp,indigp,ssxDitt,ig, & 1784!$OMP ssxDit) reduction(+:ssxDittt) 1785 do my_igp = 1, ngpown 1786 indigp = inv_igp_index(my_igp) 1787 igp = indinv(indigp) 1788 1789 if (igp .gt. ncouls .or. igp .le. 0) cycle 1790 1791 ssxDitt = (0D0,0D0) 1792 do ig = 1, ncouls 1793 ssxDit=I_epsR_array(ig,my_igp,ifreq)*fact1+ & 1794 I_epsR_array(ig,my_igp,ifreq+1)*fact2 1795 ssxDitt = ssxDitt + aqsntemp(ig,n1)*ssxDit 1796 enddo 1797 ssxDittt = ssxDittt + ssxDitt*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 1798 enddo 1799!$OMP END PARALLEL DO 1800 1801 ssxDi(iw) = ssxDi(iw) + ssxDittt 1802 1803 else 1804 1805 fact1 = (sig%dFreqGrid(ifreq+1)+wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1806 fact2 = (-sig%dFreqGrid(ifreq)-wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1807 1808 ssxDittt = 0D0 1809 1810!$OMP PARALLEL do private (my_igp,igp,indigp,ssxDitt,ig, & 1811!$OMP ssxDit) reduction(+:ssxDittt) 1812 do my_igp = 1, ngpown 1813 indigp = inv_igp_index(my_igp) 1814 igp = indinv(indigp) 1815 1816 if (igp .gt. ncouls .or. igp .le. 0) cycle 1817 1818 ssxDitt = (0D0,0D0) 1819 do ig = 1, ncouls 1820 ssxDit=CONJG(I_epsR_array(ig,my_igp,ifreq))*fact1 + & 1821 CONJG(I_epsR_array(ig,my_igp,ifreq+1))*fact2 1822 ssxDitt = ssxDitt + aqsntemp(ig,n1)*ssxDit 1823 enddo 1824 ssxDittt = ssxDittt + ssxDitt*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 1825 enddo 1826!$OMP END PARALLEL DO 1827 1828 ssxDi(iw) = ssxDi(iw) + ssxDittt 1829 1830 endif 1831#endif 1832 endif 1833 enddo 1834 call timing%stop(timing%m_cor_ra_sx) 1835 1836 ! JRD: Now do CH term 1837 call timing%start(timing%m_cor_ra_ch) 1838 SAFE_ALLOCATE(schDt_array,(sig%Nfreq)) 1839 schDt_array(:) = 0D0 1840!$OMP PARALLEL do private (my_igp,igp,indigp,ig,schDtt,I_epsRggp_int, & 1841!$OMP I_epsAggp_int,schD,schDt) 1842 do ifreq=1,sig%Nfreq 1843 schDt = (0D0,0D0) 1844 1845 do my_igp = 1, ngpown 1846 indigp = inv_igp_index(my_igp) 1847 igp = indinv(indigp) 1848 1849 if (igp .gt. ncouls .or. igp .le. 0) cycle 1850 1851! JRD: The below loop is performance critical 1852 1853 schDtt = (0D0,0D0) 1854 do ig = 1, ncouls 1855 1856 I_epsRggp_int = I_epsR_array(ig,my_igp,ifreq) 1857 1858#ifdef CPLX 1859 I_epsAggp_int = I_epsA_array(ig,my_igp,ifreq) 1860 1861 ! for G,G` components 1862 schD=I_epsRggp_int-I_epsAggp_int 1863 1864 ! for G`,G components 1865 schDtt = schDtt + aqsntemp(ig,n1)*schD 1866#else 1867 schD= CMPLX(IMAG(I_epsRggp_int),0.0d0) 1868 schDtt = schDtt + aqsntemp(ig,n1)*schD 1869#endif 1870 enddo 1871 schDt = schDt + schDtt*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 1872 enddo 1873 1874! XXX: Threads could be stomping on each-other`s cache over this... try reduction? 1875 schDt_array(ifreq) = schDt 1876 1877 enddo 1878!$OMP END PARALLEL DO 1879 call timing%stop(timing%m_cor_ra_ch) 1880 1881 call timing%start(timing%m_cor_ra_sum) 1882!$OMP PARALLEL do private (ifreq,schDt,cedifft_zb,cedifft_coh,cedifft_cor, & 1883!$OMP cedifft_zb_right,cedifft_zb_left,schDt_right,schDt_left, & 1884!$OMP schDt_avg,schDt_lin,schDt_lin2,intfact,iw, & 1885!$OMP schDt_lin3) reduction(+:schDi,schDi_corb,schDi_cor,sch2Di) 1886 do ifreq=1,sig%Nfreq 1887 1888 schDt = schDt_array(ifreq) 1889 1890 cedifft_zb = sig%dFreqGrid(ifreq) 1891 cedifft_coh = CMPLX(cedifft_zb,0D0)- sig%dFreqBrd(ifreq) 1892 1893 if( flag_occ) then 1894 cedifft_cor = -1.0d0*CMPLX(cedifft_zb,0D0) - sig%dFreqBrd(ifreq) 1895 else 1896 cedifft_cor = CMPLX(cedifft_zb,0D0) - sig%dFreqBrd(ifreq) 1897 endif 1898 1899 if (ifreq .ne. 1) then 1900 cedifft_zb_right = cedifft_zb 1901 cedifft_zb_left = sig%dFreqGrid(ifreq-1) 1902 schDt_right = schDt 1903 schDt_left = schDt_array(ifreq-1) 1904 schDt_avg = 0.5D0 * ( schDt_right + schDt_left ) 1905 schDt_lin = schDt_right - schDt_left 1906 schDt_lin2 = schDt_lin/(cedifft_zb_right-cedifft_zb_left) 1907 endif 1908 1909#ifdef CPLX 1910! The below two lines are for sigma1 and sigma3 1911 if (ifreq .ne. sig%Nfreq) then 1912 schDi(:) = schDi(:) - CMPLX(0.d0,pref(ifreq)) * schDt / ( wxi(:)-cedifft_coh) 1913 schDi_corb(:) = schDi_corb(:) - CMPLX(0.d0,pref(ifreq)) * schDt / ( wxi(:)-cedifft_cor) 1914 endif 1915 if (ifreq .ne. 1) then 1916 do iw = 1, sig%nfreqeval 1917!These lines are for sigma2 1918 intfact=abs((wxi(iw)-cedifft_zb_right)/(wxi(iw)-cedifft_zb_left)) 1919 if (intfact .lt. 1d-4) intfact = 1d-4 1920 if (intfact .gt. 1d4) intfact = 1d4 1921 intfact = -log(intfact) 1922 sch2Di(iw) = sch2Di(iw) - CMPLX(0.d0,pref_zb) * schDt_avg * intfact 1923!These lines are for sigma4 1924 if (flag_occ) then 1925 intfact=abs((wxi(iw)+cedifft_zb_right)/(wxi(iw)+cedifft_zb_left)) 1926 if (intfact .lt. 1d-4) intfact = 1d-4 1927 if (intfact .gt. 1d4) intfact = 1d4 1928 intfact = log(intfact) 1929 schDt_lin3 = (schDt_left + schDt_lin2*(-wxi(iw)-cedifft_zb_left))*intfact 1930 else 1931 schDt_lin3 = (schDt_left + schDt_lin2*(wxi(iw)-cedifft_zb_left))*intfact 1932 endif 1933 schDt_lin3 = schDt_lin3 + schDt_lin 1934 schDi_cor(iw) = schDi_cor(iw) - CMPLX(0.d0,pref_zb) * schDt_lin3 1935 enddo 1936 endif 1937#else 1938! The below two lines are for sigma1 and sigma3 1939 if (ifreq .ne. sig%Nfreq) then 1940 schDi(:) = schDi(:) + pref(ifreq)*schDt / ( wxi(:)-cedifft_coh) 1941 schDi_corb(:) = schDi_corb(:) + pref(ifreq)*schDt / ( wxi(:)-cedifft_cor) 1942 endif 1943 if (ifreq .ne. 1) then 1944 do iw = 1, sig%nfreqeval 1945!These lines are for sigma2 1946 intfact=abs((wxi(iw)-cedifft_zb_right)/(wxi(iw)-cedifft_zb_left)) 1947 if (intfact .lt. 1d-4) intfact = 1d-4 1948 if (intfact .gt. 1d4) intfact = 1d4 1949 intfact = -log(intfact) 1950 sch2Di(iw) = sch2Di(iw) + pref_zb * schDt_avg * intfact 1951!These lines are for sigma4 1952 if (flag_occ) then 1953 intfact=abs((wxi(iw)+cedifft_zb_right)/(wxi(iw)+cedifft_zb_left)) 1954 if (intfact .lt. 1d-4) intfact = 1d-4 1955 if (intfact .gt. 1d4) intfact = 1d4 1956 intfact = log(intfact) 1957 schDt_lin3 = (schDt_left + schDt_lin2*(-wxi(iw)-cedifft_zb_left))*intfact 1958 else 1959 schDt_lin3 = (schDt_left + schDt_lin2*(wxi(iw)-cedifft_zb_left))*intfact 1960 endif 1961 schDt_lin3 = schDt_lin3 + schDt_lin 1962 schDi_cor(iw) = schDi_cor(iw) + pref_zb * schDt_lin3 1963 enddo 1964 endif 1965#endif 1966 enddo 1967!$OMP END PARALLEL DO 1968 SAFE_DEALLOCATE(schDt_array) 1969 call timing%stop(timing%m_cor_ra_sum) 1970 1971 ! JRD: Compute Sigma2 and Sigma4 delta function contributions 1972 call timing%start(timing%m_cor_ra_ch2) 1973 do iw = 1, sig%nfreqeval 1974 wx = wxi(iw) 1975 if(wx .ge. 0.0d0) then 1976 ifreq=0 1977 do ijk = 1, sig%nFreq-1 1978 if (wx .ge. sig%dFreqGrid(ijk) .and. wx .lt. sig%dFreqGrid(ijk+1)) then 1979 ifreq=ijk 1980 endif 1981 enddo 1982 if (ifreq .eq. 0) then 1983 ifreq=sig%nFreq-1 1984 endif 1985 1986 fact1=(sig%dFreqGrid(ifreq+1)-wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1987 fact2=(wx-sig%dFreqGrid(ifreq))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 1988 1989 schDttt = 0D0 1990 schDttt_cor = 0D0 1991 1992!$OMP PARALLEL do private (my_igp,igp,indigp,ig, & 1993!$OMP sch2Dt,sch2Dtt) reduction(+:schDttt,schDttt_cor) 1994 do my_igp = 1, ngpown 1995 indigp = inv_igp_index(my_igp) 1996 igp = indinv(indigp) 1997 1998 if (igp .gt. ncouls .or. igp .le. 0) cycle 1999 2000 sch2Dtt = (0D0,0D0) 2001 do ig = 1, ncouls 2002#ifdef CPLX 2003 sch2Dt=-0.5D0*((I_epsR_array(ig,my_igp,ifreq)-I_epsA_array(ig,my_igp,ifreq))*fact1 + & 2004 (I_epsR_array(ig,my_igp,ifreq+1)-I_epsA_array(ig,my_igp,ifreq+1))*fact2) 2005#else 2006 sch2Dt = CMPLX(0D0,-1D0)* & 2007 (IMAG(I_epsR_array(ig,my_igp,ifreq))*fact1 + IMAG(I_epsR_array(ig,my_igp,ifreq+1))*fact2) 2008#endif 2009 sch2Dtt = sch2Dtt + aqsntemp(ig,n1)*sch2Dt 2010 enddo 2011 schDttt = schDttt + sch2Dtt*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 2012 if (.not.flag_occ) then 2013 schDttt_cor = schDttt_cor + sch2Dtt*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 2014 endif 2015 enddo 2016!$OMP END PARALLEL DO 2017 2018 sch2Di(iw) = sch2Di(iw) + schDttt 2019 schDi_cor(iw) = schDi_cor(iw) + schDttt_cor 2020 else if (flag_occ) then 2021 wx=-wx 2022 ifreq=0 2023 do ijk = 1, sig%nFreq-1 2024 if (wx .ge. sig%dFreqGrid(ijk) .and. wx .lt. sig%dFreqGrid(ijk+1)) then 2025 ifreq=ijk 2026 endif 2027 enddo 2028 if (ifreq .eq. 0) then 2029 ifreq=sig%nFreq-1 2030 endif 2031 2032 fact1=(sig%dFreqGrid(ifreq+1)-wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 2033 fact2=(wx-sig%dFreqGrid(ifreq))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 2034 2035 schDttt_cor = 0D0 2036 2037!$OMP PARALLEL do private (my_igp,igp,indigp,ig, & 2038!$OMP sch2Dt,sch2Dtt) reduction(+:schDttt_cor) 2039 do my_igp = 1, ngpown 2040 indigp = inv_igp_index(my_igp) 2041 igp = indinv(indigp) 2042 2043 if (igp .gt. ncouls .or. igp .le. 0) cycle 2044 2045 sch2Dtt = (0D0,0D0) 2046 do ig = 1, ncouls 2047#ifdef CPLX 2048 sch2Dt=-0.5D0*((I_epsR_array(ig,my_igp,ifreq)-I_epsA_array(ig,my_igp,ifreq))*fact1 + & 2049 (I_epsR_array(ig,my_igp,ifreq+1)-I_epsA_array(ig,my_igp,ifreq+1))*fact2) 2050#else 2051 sch2Dt = CMPLX(0D0,-1D0)* & 2052 (IMAG(I_epsR_array(ig,my_igp,ifreq))*fact1 + IMAG(I_epsR_array(ig,my_igp,ifreq+1))*fact2) 2053#endif 2054 sch2Dtt = sch2Dtt + aqsntemp(ig,n1)*sch2Dt 2055 enddo 2056 schDttt_cor = schDttt_cor + sch2Dtt*vcoul(igp)*MYCONJG(aqsmtemp(igp,n1)) 2057 enddo 2058!$OMP END PARALLEL DO 2059 schDi_cor(iw) = schDi_cor(iw) + schDttt_cor 2060 endif 2061 enddo 2062 call timing%stop(timing%m_cor_ra_ch2) 2063 2064 do iw = 1, sig%nfreqeval 2065 if (flag_occ) then 2066 asxDtemp(iw) = asxDtemp(iw) + ssxDi(iw)*occ 2067 endif 2068 achDtemp(iw) = achDtemp(iw) + schDi(iw) 2069 achDtemp_cor(iw) = achDtemp_cor(iw) + schDi_cor(iw) 2070 achDtemp_corb(iw) = achDtemp_corb(iw) + schDi_corb(iw) 2071 ach2Dtemp(iw) = ach2Dtemp(iw) + sch2Di(iw) 2072 ! JRD: This is now close to LDA 2073 if (iw.eq.iwlda) achtD_n1(n1true) = & 2074 achtD_n1(n1true) + schDi(iw) 2075 enddo ! over iw 2076777 format(1x,"WARNING: The real frequency range is too small." & 2077 ,/,3x,"l =",i3,1x,"n1 =",i5,1x,"E_l =",f8.3,1x,"E_n1" & 2078 ,1x,"=",f8.3,1x,"wx =",f8.3,1x,"E_max =",f8.3) 2079 2080 POP_SUB(mtxel_cor.sigma_ra) 2081 2082 end subroutine sigma_ra 2083 2084 2085!============================================================================== 2086! CONTOUR DEFORMATION 2087!============================================================================== 2088 !> Calculate Sigma matrix elements, full-freq / CONTOUR-DEFORMATION formalism 2089 subroutine sigma_cd() 2090 real(DP) :: imag_freqs(sig%nfreq_imag) 2091 SCALAR :: c0, c1, c2, c3, y1, y2, y3, m1, m2, mm(sig%nfreq_imag) 2092 real(DP) :: x1, x2, x3, dx 2093 SCALAR :: sW_imag_freqs(sig%nfreq_imag), sint(sig%nfreqeval) 2094 SCALAR :: sW_iomega, sW_iomega_acc 2095 complex(DPC) :: sres(sig%nfreqeval), sres_omega, sres_omega_acc 2096 integer :: occ_sign, ijk, ifreq_my_igp 2097 real(DP) :: fact1, fact2 2098 logical, save :: warned=.false. 2099 2100 ! FHJ: WARNING: we calc. the TO Sigma here, while RA/FF calculates Ret Sigma. 2101 ! Everything we implement here is Eqn. (14) from PRB 67, 155208 (2003). 2102 ! Note the apparent sign flip; that`s because we have (1-epsinv)*v instead 2103 ! of (epsinv-1)*v = W^c. 2104 ! 2105 ! FHJ: Definitions (following PRB 67, 155208 notation): 2106 ! sW_imag_freqs(i \omega) = - \sum_{G, G`} M^*_{G} W^c_{G G`}(i \omega) M(G`) 2107 ! sint(\omega) = \frac{1}{\pi} \int_0^{\inf} d{\omega`} 2108 ! sW_imag_freqs(i \omega`) (\omega-Elk)/((\omega-Elk)^2 + (\omega`)^2) 2109 ! 2110 ! Note: we actually evaluate sint(\omega) using the quadrature scheme 2111 ! from Fabien`s thesis. 2112 2113 PUSH_SUB(mtxel_cor.sigma_cd) 2114 2115 ! JRD: Find iw closest to e_lk 2116 diffmin = INF 2117 do iw=1,sig%nfreqeval 2118 diff = abs(freq0 + (iw-1)*sig%freqevalstep - e_lk) 2119 if (diff .lt. diffmin) then 2120 diffmin=diff 2121 iwlda=iw 2122 endif 2123 enddo 2124 2125 ! wxi = omega - e_n``k 2126 do iw=1,sig%nfreqeval 2127 wx = freq0 - e_n1kq + (iw-1)*sig%freqevalstep 2128 wxi(iw) = wx 2129 enddo 2130 2131 ! GSM: compute the static CH for the static remainder 2132 if (sig%exact_ch.eq.1) then 2133 call acc_static_ch(ngpown, ncouls, inv_igp_index, indinv, vcoul, & 2134 aqsntemp(:,n1), aqsmtemp(:,n1), achstemp, eps_cplx=I_epsR_array(:,:,1)) 2135 endif 2136 2137 sres(:) = (0D0,0D0) ! Residue contrib. 2138 sint(:) = ZERO ! Integral contrib. 2139 2140 ! FHJ: residue contribution to the correlation self energy 2141 ! JRD: Don`t want to thread here, nfreqeval could be small 2142 call timing%start(timing%m_cor_cd_res) 2143 do iw=1,sig%nfreqeval 2144 2145 wx = wxi(iw) 2146 ! FHJ: need either wx>=0 and empty or wx<0 and occupied 2147 if ( (wx>=0.0d0) .eqv. flag_occ) cycle 2148 occ_sign = idint(sign(1d0, wx)) 2149 ! FHJ: and from now on we can work on | omega - e_n``k | 2150 wx = abs(wx) 2151 2152 ifreq = 0 2153 do ijk = 1, nfreq_real-1 2154 if (wx>=sig%dFreqGrid(ijk) .and. wx<sig%dFreqGrid(ijk+1)) then 2155 ifreq = ijk 2156 endif 2157 enddo 2158 if (ifreq==0) then 2159 ifreq = nfreq_real+3 ! three is for luck 2160 endif 2161 if(ifreq>=nfreq_real) then 2162 if (.not.warned .and. peinf%pool_rank==0) then 2163 write(0,777) iband, n1true, e_lk, e_n1kq, wx, E_max 2164 endif 2165 warned = .true. 2166 ifreq = nfreq_real-1 2167 endif 2168 2169 sres_omega = 0D0 2170 if (nfreq_real>1) then 2171 fact1 = (sig%dFreqGrid(ifreq+1)-wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 2172 fact2 = (wx-sig%dFreqGrid(ifreq))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 2173 endif 2174 2175!$OMP PARALLEL do private (my_igp,igp,indigp,sres_omega_acc,ig) reduction(+:sres_omega) 2176 do my_igp = 1, ngpown 2177 indigp = inv_igp_index(my_igp) 2178 igp = indinv(indigp) 2179 if (igp>ncouls .or. igp<=0) cycle 2180 2181 sres_omega_acc = (0D0,0D0) 2182 ! JRD: The below loop is performance critical 2183 if (nfreq_real>1) then 2184 do ig = 1, ncouls 2185 sres_omega_acc = sres_omega_acc + aqsntemp(ig,n1)*( & 2186 I_epsR_array(ig,my_igp,ifreq)*fact1 + & 2187 I_epsR_array(ig,my_igp,ifreq+1)*fact2) 2188 enddo 2189 else 2190 do ig = 1, ncouls 2191 sres_omega_acc = sres_omega_acc + aqsntemp(ig,n1)*I_epsR_array(ig,my_igp,1) 2192 enddo 2193 endif 2194 sres_omega = sres_omega + MYCONJG(aqsmtemp(igp,n1))*sres_omega_acc*vcoul(igp) 2195 enddo 2196!$OMP END PARALLEL DO 2197 2198 sres(iw) = sres(iw) - occ_sign*sres_omega 2199 2200 enddo ! iw loop 2201 call timing%stop(timing%m_cor_cd_res) 2202 2203 ! JRD: Now do Integral contribution 2204 call timing%start(timing%m_cor_cd_int) 2205 sW_imag_freqs(:) = ZERO 2206!$OMP PARALLEL DO PRIVATE(igp,indigp,ig,sW_iomega_acc,ifreq,my_igp,ifreq_my_igp) & 2207!$OMP REDUCTION(+:sW_imag_freqs) 2208 do ifreq_my_igp = 0, sig%nfreq_imag*ngpown - 1 2209 ifreq = ifreq_my_igp/ngpown + 1 2210 my_igp = mod(ifreq_my_igp, ngpown) + 1 2211 indigp = inv_igp_index(my_igp) 2212 igp = indinv(indigp) 2213 if (igp>ncouls .or. igp<=0) cycle 2214 2215 sW_iomega_acc = ZERO 2216 ! JRD: The below loop is performance critical 2217 do ig = 1, ncouls 2218 sW_iomega_acc = sW_iomega_acc + aqsntemp(ig,n1)*I_eps_imag(ig,my_igp,ifreq) 2219 enddo 2220 sW_imag_freqs(ifreq) = sW_imag_freqs(ifreq) + & 2221 MYCONJG(aqsmtemp(igp,n1))*sW_iomega_acc*vcoul(igp) 2222 enddo 2223!$OMP END PARALLEL DO 2224 call timing%stop(timing%m_cor_cd_int) 2225 2226 call timing%start(timing%m_cor_cd_sum) 2227 imag_freqs(:) = IMAG(sig%dFreqGrid(nfreq_real+1:)+sig%dFreqBrd(nfreq_real+1:)) 2228 select case (sig%cd_int_method) 2229 2230 case (0) 2231 ! FHJ: Integration from Fabien`s thesis; assumes that the matrix elements 2232 ! are piecewise constant functions, and perform the integral over each 2233 ! interval centered are the frequency evaluation point. 2234 2235 ! JRD: XXX Should center this integral.... 2236 ! FHJ: What do you mean? 2237 ifreq = 1 2238 sW_iomega = sW_imag_freqs(ifreq) 2239 freqStart = imag_freqs(ifreq) 2240 freqEnd = (imag_freqs(ifreq) + imag_Freqs(ifreq+1)) * 0.5d0 2241 sint(:) = sint(:) + sW_iomega * ( atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:)) ) 2242 2243 !FHJ: no need to do OMP here, each trip is very short, and OMP can 2244 !hurt vectorization. 2245 do ifreq = 2, sig%nfreq_imag-1 2246 sW_iomega = sW_imag_freqs(ifreq) 2247 freqStart = (imag_freqs(ifreq-1) + imag_Freqs(ifreq)) * 0.5d0 2248 freqEnd = (imag_freqs(ifreq) + imag_Freqs(ifreq+1)) * 0.5d0 2249 sint(:) = sint(:) + sW_iomega * ( atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:)) ) 2250 enddo 2251 2252 sW_iomega = sW_imag_freqs(sig%nfreq_imag) 2253 freqStart = (imag_freqs(sig%nfreq_imag-1) + imag_Freqs(sig%nfreq_imag)) * 0.5d0 2254 freqEnd = (-imag_freqs(sig%nfreq_imag-1) + 3d0*imag_Freqs(sig%nfreq_imag)) * 0.5d0 2255 sint(:) = sint(:) + sW_iomega * ( atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:)) ) 2256 2257 2258 case (2) 2259 ! FHJ: Integration scheme: assume piecewise quadratic matrix elements 2260 ! on each integration segment 2261 2262 ! FHJ: First segment is special. TODO: assert that freqStart==0d0 2263 freqStart = imag_freqs(1) 2264 freqEnd = imag_freqs(2) 2265 c0 = sW_imag_freqs(1) 2266 c1 = 0d0 2267 c2 = (sW_imag_freqs(2) - c0) / (freqEnd-freqStart)**2 2268 sint(:) = sint(:) + & 2269 c2 * wxi(:) * (freqEnd-freqStart) + & 2270 (c0 - c2*wxi(:)**2) * (atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:))) + & 2271 c1*wxi(:)*0.5d0*log((wxi(:)**2 + freqEnd**2)/(wxi(:)**2 + freqStart**2)) 2272 2273 ! FHJ: other segments: find c0, c1 and c2 by using each previous point. 2274 ! This also works for complex y`s. This sum is over the starting points. 2275 do ifreq = 2, sig%nfreq_imag-1 2276 x1 = imag_freqs(ifreq-1) 2277 x2 = imag_freqs(ifreq) 2278 x3 = imag_freqs(ifreq+1) 2279 y1 = sW_imag_freqs(ifreq-1) 2280 y2 = sW_imag_freqs(ifreq) 2281 y3 = sW_imag_freqs(ifreq+1) 2282 c2 = ((y2-y1)*(x1-x3) + (y3-y1)*(x2-x1)) / & 2283 ((x1-x3)*(x2**2-x1**2) + (x2-x1)*(x3**2-x1**2)) 2284 c1 = ((y2-y1) - c2*(x2**2-x1**2)) / (x2-x1) 2285 c0 = y1 - c2*x1**2 - c1*x1 2286 freqEnd = x3 2287 freqStart = x2 2288 sint(:) = sint(:) + & 2289 c2 * wxi(:) * (freqEnd-freqStart) + & 2290 (c0 - c2*wxi(:)**2) * (atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:))) + & 2291 c1*wxi(:)*0.5d0*log((wxi(:)**2 + freqEnd**2)/(wxi(:)**2 + freqStart**2)) 2292 enddo 2293 2294 ! FHJ: add Lorentzian tail? This is only a good idea is the last freq. is really large 2295 !freqStart = x3 2296 !AA = y3*x3**2 2297 !sint(:) = sint(:) + AA/(2d0*wxi(:)**2*freqStart)*( & 2298 ! 2d0*wxi(:) - sign(1d0,wxi(:))*PI_D*freqStart + 2d0*freqStart*atan(freqStart/wxi(:)) ) 2299 2300 2301 case (3) 2302 ! FHJ: Integration scheme: assume piecewise cubic matrix elements 2303 ! on each integration segment. Find cubic Hermite spline representation 2304 ! using a finite difference approximations for the derivatives 2305 2306 ! Estimate derivatives 2307 mm(1) = ZERO 2308 mm(2:sig%nfreq_imag-1) = & 2309 0.5d0*(sW_imag_freqs(3:sig%nfreq_imag) - sW_imag_freqs(2:sig%nfreq_imag-1)) / & 2310 ( imag_freqs(3:sig%nfreq_imag) - imag_freqs(2:sig%nfreq_imag-1) ) + & 2311 0.5d0*(sW_imag_freqs(2:sig%nfreq_imag-1) - sW_imag_freqs(1:sig%nfreq_imag-2)) / & 2312 ( imag_freqs(2:sig%nfreq_imag-1) - imag_freqs(1:sig%nfreq_imag-2) ) 2313 mm(sig%nfreq_imag) = mm(sig%nfreq_imag-1) + & 2314 (imag_freqs(sig%nfreq_imag)-imag_freqs(sig%nfreq_imag-1)) * & 2315 (mm(sig%nfreq_imag-1)-mm(sig%nfreq_imag-2)) / & 2316 (imag_freqs(sig%nfreq_imag-1)-imag_freqs(sig%nfreq_imag-2)) 2317 2318 ! FHJ: other segments: find c0, c1 and c2 by using each previous point. 2319 ! This also works for complex y`s. This sum is over the starting points. 2320 do ifreq = 1, sig%nfreq_imag-1 2321 x1 = imag_freqs(ifreq) 2322 x2 = imag_freqs(ifreq+1) 2323 y1 = sW_imag_freqs(ifreq) 2324 y2 = sW_imag_freqs(ifreq+1) 2325 dx = x2 - x1 2326 2327 c0 = -(m1*x1) - (2*m1*x1**2)/dx - (m2*x1**2)/dx - (m1*x1**3)/dx**2 - & 2328 (m2*x1**3)/dx**2 + y1 - (3*x1**2*y1)/dx**2 - (2*x1**3*y1)/dx**3 + & 2329 (3*x1**2*y2)/dx**2 + (2*x1**3*y2)/dx**3 2330 c1 = m1 + (4*m1*x1)/dx + (2*m2*x1)/dx + (3*m1*x1**2)/dx**2 + & 2331 (3*m2*x1**2)/dx**2 + (6*x1*y1)/dx**2 + (6*x1**2*y1)/dx**3 - & 2332 (6*x1*y2)/dx**2 - (6*x1**2*y2)/dx**3 2333 c2 = (-2*m1)/dx - m2/dx - (3*m1*x1)/dx**2 - (3*m2*x1)/dx**2 - & 2334 (3*y1)/dx**2 - (6*x1*y1)/dx**3 + (3*y2)/dx**2 + (6*x1*y2)/dx**3 2335 c3 = m1/dx**2 + m2/dx**2 + (2*y1)/dx**3 - (2*y2)/dx**3 2336 2337 sint(:) = sint(:) + ((wxi*dx)*(2*c2 + c3*(x1+x2)) - & 2338 2d0*(c0 - wxi**2*c2)*(atan(x1/wxi) - atan(x2/wxi)) + & 2339 wxi*(-c1 + wxi**2*c3)*log((wxi**2 + x1**2)/(wxi**2 + x2**2)))/2d0 2340 enddo 2341 2342 case default 2343 call die('Invalid integration method') 2344 2345 endselect !sig%cd_int_method 2346 call timing%stop(timing%m_cor_cd_sum) 2347 2348 do iw = 1, sig%nfreqeval 2349 !FHJ: the output accumulated arrays "asxtDyn" and "asxtDyn" actually store 2350 !the residue and integral contribution to the GW self energy for CD 2351 !calculations, respectively. 2352 asxDtemp(iw) = asxDtemp(iw) + sres(iw) 2353 achDtemp(iw) = achDtemp(iw) + sint(iw)/PI_D 2354 ! JRD: This is now close to LDA 2355 if (iw==iwlda) achtD_n1(n1true) = achtD_n1(n1true) + sint(iw)/PI_D 2356 enddo ! over iw 2357 2358777 format(1x,"WARNING: The real frequency range is too small." & 2359 ,/,3x,"l =",i3,1x,"n1 =",i5,1x,"E_l =",f8.3,1x,"E_n1" & 2360 ,1x,"=",f8.3,1x,"wx =",f8.3,1x,"E_max =",f8.3) 2361 2362 POP_SUB(mtxel_cor.sigma_cd) 2363 2364 end subroutine sigma_cd 2365 2366!============================================================================== 2367! SUBSPACE CONTOUR DEFORMATION 2368!============================================================================== 2369 !> Calculate Subspace Sigma matrix elements, full-freq / CONTOUR-DEFORMATION formalism 2370 subroutine sigma_cd_subspace() 2371 real(DP) :: imag_freqs(sig%nfreq_imag) 2372 SCALAR :: c0, c1, c2, c3, y1, y2, y3, m1, m2, mm(sig%nfreq_imag) 2373 real(DP) :: x1, x2, x3, dx 2374 SCALAR :: sW_imag_freqs(sig%nfreq_imag, peinf%ntband_max), sint(sig%nfreqeval) 2375 SCALAR :: sW_iomega, sW_iomega_acc 2376 complex(DPC) :: sres(sig%nfreqeval), sres_omega, sres_omega_acc 2377 integer :: occ_sign, ijk, ifreq_my_igp 2378 real(DP) :: fact1, fact2 2379 logical, save :: warned=.false. 2380 2381 integer :: n1 2382 SCALAR, allocatable :: tempz(:,:,:), achstemp_array(:) 2383 ! variables for static reminder 2384 SCALAR, ALLOCATABLE :: schs_array(:,:), eps_scalar(:,:), achs_n1(:) 2385 logical :: use_zgemm 2386 integer :: ipe_block, ipe_real, my_ib 2387 integer :: n1_start, n1_end, n1_size_actual 2388 integer :: n1_ipe 2389 2390 PUSH_SUB(mtxel_cor.sigma_cd_subspace) 2391 2392 ! check if we need to calculate the static reminder 2393 if (sig%exact_ch.eq.1) then 2394 call timing%start(timing%m_cor_remain) 2395 SAFE_ALLOCATE(schs_array, (my_Nb, peinf%ntband_dist(ipe))) 2396 SAFE_ALLOCATE(achs_n1, (peinf%ntband_dist(ipe))) 2397 2398!$OMP PARALLEL do private (n1) 2399 do n1 = 1, peinf%ntband_dist(ipe) 2400 achs_n1(n1) = ZERO 2401 end do 2402!$OMP END PARALLEL DO 2403 2404 if (my_Nb_start > 0) then 2405#ifdef CPLX 2406 call zgemm('t','n', my_Nb, peinf%ntband_dist(ipe), Nb_tot, (-1D0,0D0), & 2407 Re_eps_sub(:,:,1), Nb_tot, & 2408 Msub_n_temp, Nb_tot, (0D0,0D0), & 2409 schs_array, my_Nb) 2410#else 2411 SAFE_ALLOCATE(eps_scalar, (Nb_tot,my_Nb)) 2412 eps_scalar(:,:) = SCALARIFY(Re_eps_sub(:,:,1)) 2413 CALL dgemm('t','n', my_Nb, peinf%ntband_dist(ipe), Nb_tot, -1.0D+00, & 2414 eps_scalar(:,:), Nb_tot, & 2415 Msub_n_temp, Nb_tot, 0.0D+00, & 2416 schs_array, my_Nb) 2417 SAFE_DEALLOCATE(eps_scalar) 2418#endif 2419 2420!$OMP PARALLEL do private (n1,my_ib) reduction(+:achs_n1) 2421 do n1 = 1, peinf%ntband_dist(ipe) 2422 do my_ib = 1, my_Nb 2423 achs_n1(n1) = achs_n1(n1) + MYCONJG(Msub_m_temp(my_ib+my_Nb_start-1,n1)) * & 2424 schs_array(my_ib,n1) * 0.5D0 2425 enddo 2426 end do 2427!$OMP END PARALLEL DO 2428 end if ! my_Nb_start > 0 2429 2430 ! correct wings, same flag as residual part (computed in the same loop later) 2431 ! note the sign is changed and the famous factor 0.5 2432 if(ipe-1 == peinf%pool_rank) then 2433 if(fix_wings_res) then 2434 do n1 = 1, peinf%ntband_dist(ipe) 2435 achs_n1(n1) = achs_n1(n1) - MYCONJG(aqsmtemp(wing_pos,n1)) * & 2436 SUM(wings_Re(1:ncouls,1,2) * aqsntemp(1:ncouls,n1)) * & 2437 vcoul(indinv(wing_pos)) * 0.5d0 2438 do ig = 1, ncouls 2439 achs_n1(n1) = achs_n1(n1) - MYCONJG(aqsmtemp(ig,n1)) * & 2440 wings_Re(ig,1,1) * & 2441 aqsntemp(wing_pos,n1) * & 2442 vcoul(indinv(ig)) * 0.5d0 2443 end do 2444 end do 2445 end if 2446 end if ! wings 2447 2448 do n1 = 1, peinf%ntband_dist(ipe) 2449 n1true = peinf%indext_dist(n1,ipe) 2450 achtcor_n1(n1true) = achtcor_n1(n1true) - 0.5d0 * achs_n1(n1) 2451 end do 2452 2453 SAFE_DEALLOCATE(schs_array) 2454 SAFE_DEALLOCATE(achs_n1) 2455 2456 call timing%stop(timing%m_cor_remain) 2457 end if 2458 2459 ! residual contribution 2460 do n1 = 1, peinf%ntband_dist(ipe) 2461 2462 ! n1true = "True" band index of the band n1 w.r.t. all bands 2463 n1true = peinf%indext_dist(n1,ipe) 2464 ! energy of the |n1,k-q> state 2465 e_n1kq = wfnkq%ekq(n1true,ispin) 2466 ! occupation of the |n1,k-q> state 2467 flag_occ = (n1true<=(sig%nvband+sig%ncrit)) & 2468 .and.((sig%ncrit==0).or.(e_n1kq<=(sig%efermi+sig%tol))) 2469 if (abs(e_n1kq-sig%efermi)<sig%tol) then 2470 occ = 0.5d0 ! Fermi-Dirac distribution = 1/2 at Fermi level 2471 else 2472 occ = 1.0d0 2473 endif 2474 2475 ! JRD: Find iw closest to e_lk 2476 diffmin = INF 2477 do iw=1,sig%nfreqeval 2478 diff = abs(freq0 + (iw-1)*sig%freqevalstep - e_lk) 2479 if (diff .lt. diffmin) then 2480 diffmin=diff 2481 iwlda=iw 2482 endif 2483 enddo 2484 2485 ! wxi = omega - e_n``k 2486 do iw=1,sig%nfreqeval 2487 wx = freq0 - e_n1kq + (iw-1)*sig%freqevalstep 2488 wxi(iw) = wx 2489 enddo 2490 2491 ! FHJ: residue contribution to the correlation self energy 2492 ! JRD: Don`t want to thread here, nfreqeval could be small 2493 call timing%start(timing%m_cor_cd_res) 2494 sres(:) = (0D0,0D0) ! Residue contrib. 2495 do iw=1,sig%nfreqeval 2496 2497 wx = wxi(iw) 2498 ! FHJ: need either wx>=0 and empty or wx<0 and occupied 2499 if ( (wx>=0.0d0) .eqv. flag_occ) cycle 2500 occ_sign = idint(sign(1d0, wx)) 2501 ! FHJ: and from now on we can work on | omega - e_n``k | 2502 wx = abs(wx) 2503 2504 ifreq = 0 2505 do ijk = 1, nfreq_real-1 2506 if (wx>=sig%dFreqGrid(ijk) .and. wx<sig%dFreqGrid(ijk+1)) then 2507 ifreq = ijk 2508 endif 2509 enddo 2510 if (ifreq==0) then 2511 ifreq = nfreq_real+3 ! three is for luck 2512 endif 2513 if(ifreq>=nfreq_real) then 2514 if (.not.warned .and. peinf%pool_rank==0) then 2515 write(0,777) iband, n1true, e_lk, e_n1kq, wx, E_max 2516 endif 2517 warned = .true. 2518 ifreq = nfreq_real-1 2519 endif 2520 2521 sres_omega = 0D0 2522 if (nfreq_real>1) then 2523 fact1 = (sig%dFreqGrid(ifreq+1)-wx)/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 2524 fact2 = (wx-sig%dFreqGrid(ifreq))/(sig%dFreqGrid(ifreq+1)-sig%dFreqGrid(ifreq)) 2525 endif 2526 2527 if (my_Nb_start > 0) then 2528 2529!$OMP PARALLEL do private (my_ib, sres_omega_acc, ig) & 2530!$OMP reduction(+:sres_omega) 2531 do my_ib = 1, my_Nb 2532 sres_omega_acc = (0D0,0D0) 2533 2534 ! JRD: The below loop is performance critical 2535 if (nfreq_real>1) then 2536 do ig = 1, Nb_tot 2537 sres_omega_acc = sres_omega_acc + Msub_n_temp(ig,n1)*(& 2538 Re_eps_sub(ig,my_ib,ifreq)*fact1 + & 2539 Re_eps_sub(ig,my_ib,ifreq+1)*fact2) 2540 end do 2541 else 2542 do ig = 1, Nb_tot 2543 sres_omega_acc = sres_omega_acc + Msub_n_temp(ig,n1)*Re_eps_sub(ig,my_ib,1) 2544 end do 2545 end if 2546 2547 sres_omega = sres_omega + MYCONJG(Msub_m_temp(my_ib+my_Nb_start-1,n1))*sres_omega_acc 2548 end do 2549!$OMP END PARALLEL DO 2550 2551 end if ! my_Nb_start > 0 2552 2553 if(ipe-1 == peinf%pool_rank) then 2554 if(fix_wings_res) then 2555 !XXXX if (nfreq_real>1) then 2556 !XXXX sres_omega = sres_omega + MYCONJG(aqsmtemp(wing_pos,n1)) * & 2557 !XXXX SUM((wings_Re(1:ncouls,ifreq,2)*fact1 + & 2558 !XXXX wings_Re(1:ncouls,ifreq+1,2)*fact2) * aqsntemp(1:ncouls,n1)) * & 2559 !XXXX vcoul(indinv(wing_pos)) 2560 !XXXX do ig = 1, ncouls 2561 !XXXX sres_omega = sres_omega + MYCONJG(aqsmtemp(ig,n1)) * & 2562 !XXXX (wings_Re(ig,ifreq,1)*fact1 + & 2563 !XXXX wings_Re(ig,ifreq+1,1)*fact2) * aqsntemp(wing_pos,n1) * & 2564 !XXXX vcoul(indinv(ig)) 2565 !XXXX end do 2566 !XXXX else 2567 !XXXX sres_omega = sres_omega + MYCONJG(aqsmtemp(wing_pos,n1)) * & 2568 !XXXX SUM(wings_Re(1:ncouls,1,2) * aqsntemp(1:ncouls,n1)) * & 2569 !XXXX vcoul(indinv(wing_pos)) 2570 !XXXX do ig = 1, ncouls 2571 !XXXX sres_omega = sres_omega + MYCONJG(aqsmtemp(ig,n1)) * & 2572 !XXXX wings_Re(ig,1,1) * & 2573 !XXXX aqsntemp(wing_pos,n1) * & 2574 !XXXX vcoul(indinv(ig)) 2575 !XXXX end do 2576 !XXXX end if 2577 sres_omega_acc = (0D0,0D0) 2578 if (nfreq_real>1) then 2579!$OMP PARALLEL do private (ig) & 2580!$OMP reduction(+:sres_omega_acc) 2581 do ig = 1, ncouls 2582 sres_omega_acc = sres_omega_acc + MYCONJG(aqsmtemp(wing_pos,n1)) * & 2583 (wings_Re(ig,ifreq,2)*fact1 + & 2584 wings_Re(ig,ifreq+1,2)*fact2) * aqsntemp(ig,n1) * & 2585 vcoul(indinv(wing_pos)) 2586 sres_omega_acc = sres_omega_acc + MYCONJG(aqsmtemp(ig,n1)) * & 2587 (wings_Re(ig,ifreq,1)*fact1 + & 2588 wings_Re(ig,ifreq+1,1)*fact2) * aqsntemp(wing_pos,n1) * & 2589 vcoul(indinv(ig)) 2590 end do 2591!$END OMP PARALLEL DO 2592 else 2593!$OMP PARALLEL do private (ig) & 2594!$OMP reduction(+:sres_omega_acc) 2595 do ig = 1, ncouls 2596 sres_omega_acc = sres_omega_acc + MYCONJG(aqsmtemp(wing_pos,n1)) * & 2597 wings_Re(ig,1,2) * aqsntemp(ig,n1) * & 2598 vcoul(indinv(wing_pos)) 2599 sres_omega_acc = sres_omega_acc + MYCONJG(aqsmtemp(ig,n1)) * & 2600 wings_Re(ig,1,1) * aqsntemp(wing_pos,n1) * & 2601 vcoul(indinv(ig)) 2602 end do 2603!$END OMP PARALLEL DO 2604 end if 2605 sres_omega = sres_omega + sres_omega_acc 2606 end if 2607 end if ! fix tail-wings 2608 2609 sres(iw) = sres(iw) - occ_sign*sres_omega 2610 2611 enddo ! iw loop 2612 call timing%stop(timing%m_cor_cd_res) 2613 2614 do iw = 1, sig%nfreqeval 2615 !FHJ: the output accumulated arrays "asxtDyn" and "asxtDyn" actually store 2616 !the residue and integral contribution to the GW self energy for CD 2617 !calculations, respectively. 2618 asxDtemp(iw) = asxDtemp(iw) + sres(iw) 2619 enddo ! over iw 2620 2621 end do ! n1 RESIDUAL PART 2622 if(ipe-1 == peinf%pool_rank) fix_wings_res = .false. 2623 2624 ! integral part 2625!$OMP PARALLEL do private (n1,ifreq) 2626 do n1 = 1, peinf%ntband_max 2627 do ifreq = 1, sig%nfreq_imag 2628 sW_imag_freqs(ifreq,n1) = ZERO 2629 end do 2630 end do 2631!$OMP END PARALLEL DO 2632 2633 if(my_Nb_start > 0) then 2634 2635 call timing%start(timing%m_cor_cd_int) 2636 ! go with ZGEMM 2637 SAFE_ALLOCATE(tempz, (my_Nb, sig%nfreq_imag, peinf%ntband_dist(ipe))) 2638 ! 2639 call timing%start(timing%m_cor_cd_gemm) 2640#ifdef CPLX 2641 call zgemm('t','n', my_Nb*sig%nfreq_imag, peinf%ntband_dist(ipe), Nb_tot, & 2642 (1D0,0D0), Im_eps_sub, Nb_tot, & 2643 Msub_n_temp, Nb_tot,& 2644 (0D0,0D0), tempz, my_Nb*sig%nfreq_imag) 2645#else 2646 call dgemm('t','n', my_Nb*sig%nfreq_imag, peinf%ntband_dist(ipe), Nb_tot, & 2647 1.0D0, Im_eps_sub, Nb_tot, & 2648 Msub_n_temp, Nb_tot,& 2649 0.0D0, tempz, my_Nb*sig%nfreq_imag) 2650 2651#endif 2652 call timing%stop(timing%m_cor_cd_gemm) 2653 2654!$OMP PARALLEL do private (n1,my_ib,my_igp,igp,indigp,sW_iomega_acc,ifreq) & 2655!$OMP reduction(+:sW_imag_freqs) 2656 do n1 = 1, peinf%ntband_dist(ipe) 2657 do ifreq = 1, sig%nfreq_imag 2658 sW_iomega_acc = ZERO 2659 do my_ib = 1, my_Nb 2660 sW_iomega_acc = sW_iomega_acc + & 2661 MYCONJG(Msub_m_temp(my_Nb_start+my_ib-1,n1))*tempz(my_ib,ifreq,n1) 2662 end do 2663 sW_imag_freqs(ifreq,n1) = sW_imag_freqs(ifreq,n1) + sW_iomega_acc 2664 end do 2665 end do 2666!$OMP END PARALLEL DO 2667 2668 SAFE_DEALLOCATE(tempz) 2669 end if ! my_Nb_start > 0 2670 2671 ! fix wings 2672 if(ipe-1 == peinf%pool_rank) then 2673 if(fix_wings) then 2674!$OMP PARALLEL do private (n1,ifreq) collapse(2) 2675 do n1 = 1, peinf%ntband_dist(ipe) 2676 do ifreq = 1, sig%nfreq_imag 2677 sW_imag_freqs(ifreq,n1) = sW_imag_freqs(ifreq,n1) + & 2678 MYCONJG(aqsmtemp(wing_pos,n1)) * & 2679 SUM(wings_Im(1:ncouls,ifreq,2) * aqsntemp(1:ncouls,n1)) * & 2680 vcoul(indinv(wing_pos)) 2681 end do 2682 end do 2683!$OMP END PARALLEL DO 2684 2685!$OMP PARALLEL do private (n1,ifreq,ig) collapse(2) 2686 do n1 = 1, peinf%ntband_dist(ipe) 2687 do ifreq = 1, sig%nfreq_imag 2688 do ig = 1, ncouls 2689 sW_imag_freqs(ifreq,n1) = sW_imag_freqs(ifreq,n1) + & 2690 MYCONJG(aqsmtemp(ig,n1)) * & 2691 wings_Im(ig,ifreq,1) * aqsntemp(wing_pos,n1) * & 2692 vcoul(indinv(ig)) 2693 end do 2694 end do 2695 end do 2696!$OMP END PARALLEL DO 2697 2698 fix_wings = .false. 2699 end if 2700 end if 2701 call timing%stop(timing%m_cor_cd_int) 2702 2703 call timing%start(timing%m_cor_cd_sum) 2704 ! calculate integral contribution 2705 do n1 = 1, peinf%ntband_dist(ipe) 2706 2707 ! n1true = "True" band index of the band n1 w.r.t. all bands 2708 n1true = peinf%indext_dist(n1,ipe) 2709 ! energy of the |n1,k-q> state 2710 e_n1kq = wfnkq%ekq(n1true,ispin) 2711 ! occupation of the |n1,k-q> state 2712 flag_occ = (n1true<=(sig%nvband+sig%ncrit)) & 2713 .and.((sig%ncrit==0).or.(e_n1kq<=(sig%efermi+sig%tol))) 2714 if (abs(e_n1kq-sig%efermi)<sig%tol) then 2715 occ = 0.5d0 ! Fermi-Dirac distribution = 1/2 at Fermi level 2716 else 2717 occ = 1.0d0 2718 endif 2719 2720 ! FHJ: CH term used for static remainder for current band "n1true". 2721 achstemp = ZERO 2722 2723 ! JRD: Find iw closest to e_lk 2724 diffmin = INF 2725 do iw=1,sig%nfreqeval 2726 diff = abs(freq0 + (iw-1)*sig%freqevalstep - e_lk) 2727 if (diff .lt. diffmin) then 2728 diffmin=diff 2729 iwlda=iw 2730 endif 2731 enddo 2732 2733 ! wxi = omega - e_n``k 2734 do iw=1,sig%nfreqeval 2735 wx = freq0 - e_n1kq + (iw-1)*sig%freqevalstep 2736 wxi(iw) = wx 2737 enddo 2738 2739 sint(:) = ZERO ! Integral contrib. 2740 imag_freqs(:) = IMAG(sig%dFreqGrid(nfreq_real+1:)+sig%dFreqBrd(nfreq_real+1:)) 2741 select case (sig%cd_int_method) 2742 2743 case (0) 2744 ! FHJ: Integration from Fabien`s thesis; assumes that the matrix elements 2745 ! are piecewise constant functions, and perform the integral over each 2746 ! interval centered are the frequency evaluation point. 2747 2748 ! JRD: XXX Should center this integral.... 2749 ! FHJ: What do you mean? 2750 ! MDB: explicit OMP parallelization over the sigma grid index 2751 2752!$OMP PARALLEL DO private (iw,ifreq,sW_iomega,freqStart,freqEnd) & 2753!$OMP reduction(+:sint) 2754 do iw=1,sig%nfreqeval 2755 ifreq = 1 2756 sW_iomega = sW_imag_freqs(ifreq,n1) 2757 freqStart = imag_freqs(ifreq) 2758 freqEnd = (imag_freqs(ifreq) + imag_Freqs(ifreq+1)) * 0.5d0 2759 2760 !XXX sint(:) = sint(:) + sW_iomega * ( atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:)) ) 2761 sint(iw) = sint(iw) + sW_iomega * ( atan(freqEnd/wxi(iw)) - atan(freqStart/wxi(iw)) ) 2762 2763 !FHJ: no need to do OMP here, each trip is very short, and OMP can 2764 !hurt vectorization. 2765 do ifreq = 2, sig%nfreq_imag-1 2766 sW_iomega = sW_imag_freqs(ifreq,n1) 2767 freqStart = (imag_freqs(ifreq-1) + imag_Freqs(ifreq)) * 0.5d0 2768 freqEnd = (imag_freqs(ifreq) + imag_Freqs(ifreq+1)) * 0.5d0 2769 2770 !XXXX sint(:) = sint(:) + sW_iomega * ( atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:)) ) 2771 sint(iw) = sint(iw) + sW_iomega * ( atan(freqEnd/wxi(iw)) - atan(freqStart/wxi(iw)) ) 2772 2773 enddo 2774 2775 sW_iomega = sW_imag_freqs(sig%nfreq_imag,n1) 2776 freqStart = (imag_freqs(sig%nfreq_imag-1) + imag_Freqs(sig%nfreq_imag)) * 0.5d0 2777 freqEnd = (-imag_freqs(sig%nfreq_imag-1) + 3d0*imag_Freqs(sig%nfreq_imag)) * 0.5d0 2778 2779 !XXXX sint(:) = sint(:) + sW_iomega * ( atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:)) ) 2780 sint(iw) = sint(iw) + sW_iomega * ( atan(freqEnd/wxi(iw)) - atan(freqStart/wxi(iw)) ) 2781 2782 end do ! iw 2783!$OMP END PARALLEL DO 2784 2785 case (2) 2786 ! FHJ: Integration scheme: assume piecewise quadratic matrix elements 2787 ! on each integration segment 2788 2789 ! FHJ: First segment is special. TODO: assert that freqStart==0d0 2790 freqStart = imag_freqs(1) 2791 freqEnd = imag_freqs(2) 2792 c0 = sW_imag_freqs(1,n1) 2793 c1 = 0d0 2794 c2 = (sW_imag_freqs(2,n1) - c0) / (freqEnd-freqStart)**2 2795 sint(:) = sint(:) + & 2796 c2 * wxi(:) * (freqEnd-freqStart) + & 2797 (c0 - c2*wxi(:)**2) * (atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:))) + & 2798 c1*wxi(:)*0.5d0*log((wxi(:)**2 + freqEnd**2)/(wxi(:)**2 + freqStart**2)) 2799 2800 ! FHJ: other segments: find c0, c1 and c2 by using each previous point. 2801 ! This also works for complex y`s. This sum is over the starting points. 2802 do ifreq = 2, sig%nfreq_imag-1 2803 x1 = imag_freqs(ifreq-1) 2804 x2 = imag_freqs(ifreq) 2805 x3 = imag_freqs(ifreq+1) 2806 y1 = sW_imag_freqs(ifreq-1,n1) 2807 y2 = sW_imag_freqs(ifreq,n1) 2808 y3 = sW_imag_freqs(ifreq+1,n1) 2809 c2 = ((y2-y1)*(x1-x3) + (y3-y1)*(x2-x1)) / & 2810 ((x1-x3)*(x2**2-x1**2) + (x2-x1)*(x3**2-x1**2)) 2811 c1 = ((y2-y1) - c2*(x2**2-x1**2)) / (x2-x1) 2812 c0 = y1 - c2*x1**2 - c1*x1 2813 freqEnd = x3 2814 freqStart = x2 2815 sint(:) = sint(:) + & 2816 c2 * wxi(:) * (freqEnd-freqStart) + & 2817 (c0 - c2*wxi(:)**2) * (atan(freqEnd/wxi(:)) - atan(freqStart/wxi(:))) + & 2818 c1*wxi(:)*0.5d0*log((wxi(:)**2 + freqEnd**2)/(wxi(:)**2 + freqStart**2)) 2819 enddo 2820 2821 ! FHJ: add Lorentzian tail? This is only a good idea is the last freq. is really large 2822 !freqStart = x3 2823 !AA = y3*x3**2 2824 !sint(:) = sint(:) + AA/(2d0*wxi(:)**2*freqStart)*( & 2825 ! 2d0*wxi(:) - sign(1d0,wxi(:))*PI_D*freqStart + 2d0*freqStart*atan(freqStart/wxi(:)) ) 2826 2827 2828 case (3) 2829 ! FHJ: Integration scheme: assume piecewise cubic matrix elements 2830 ! on each integration segment. Find cubic Hermite spline representation 2831 ! using a finite difference approximations for the derivatives 2832 2833 ! Estimate derivatives 2834 mm(1) = ZERO 2835 mm(2:sig%nfreq_imag-1) = & 2836 0.5d0*(sW_imag_freqs(3:sig%nfreq_imag,n1) - sW_imag_freqs(2:sig%nfreq_imag-1,n1)) / & 2837 ( imag_freqs(3:sig%nfreq_imag) - imag_freqs(2:sig%nfreq_imag-1) ) + & 2838 0.5d0*(sW_imag_freqs(2:sig%nfreq_imag-1,n1) - sW_imag_freqs(1:sig%nfreq_imag-2,n1)) / & 2839 ( imag_freqs(2:sig%nfreq_imag-1) - imag_freqs(1:sig%nfreq_imag-2) ) 2840 mm(sig%nfreq_imag) = mm(sig%nfreq_imag-1) + & 2841 (imag_freqs(sig%nfreq_imag)-imag_freqs(sig%nfreq_imag-1)) * & 2842 (mm(sig%nfreq_imag-1)-mm(sig%nfreq_imag-2)) / & 2843 (imag_freqs(sig%nfreq_imag-1)-imag_freqs(sig%nfreq_imag-2)) 2844 2845 ! FHJ: other segments: find c0, c1 and c2 by using each previous point. 2846 ! This also works for complex y`s. This sum is over the starting points. 2847 do ifreq = 1, sig%nfreq_imag-1 2848 x1 = imag_freqs(ifreq) 2849 x2 = imag_freqs(ifreq+1) 2850 y1 = sW_imag_freqs(ifreq,n1) 2851 y2 = sW_imag_freqs(ifreq+1,n1) 2852 dx = x2 - x1 2853 2854 c0 = -(m1*x1) - (2*m1*x1**2)/dx - (m2*x1**2)/dx - (m1*x1**3)/dx**2 - & 2855 (m2*x1**3)/dx**2 + y1 - (3*x1**2*y1)/dx**2 - (2*x1**3*y1)/dx**3 + & 2856 (3*x1**2*y2)/dx**2 + (2*x1**3*y2)/dx**3 2857 c1 = m1 + (4*m1*x1)/dx + (2*m2*x1)/dx + (3*m1*x1**2)/dx**2 + & 2858 (3*m2*x1**2)/dx**2 + (6*x1*y1)/dx**2 + (6*x1**2*y1)/dx**3 - & 2859 (6*x1*y2)/dx**2 - (6*x1**2*y2)/dx**3 2860 c2 = (-2*m1)/dx - m2/dx - (3*m1*x1)/dx**2 - (3*m2*x1)/dx**2 - & 2861 (3*y1)/dx**2 - (6*x1*y1)/dx**3 + (3*y2)/dx**2 + (6*x1*y2)/dx**3 2862 c3 = m1/dx**2 + m2/dx**2 + (2*y1)/dx**3 - (2*y2)/dx**3 2863 2864 sint(:) = sint(:) + ((wxi*dx)*(2*c2 + c3*(x1+x2)) - & 2865 2d0*(c0 - wxi**2*c2)*(atan(x1/wxi) - atan(x2/wxi)) + & 2866 wxi*(-c1 + wxi**2*c3)*log((wxi**2 + x1**2)/(wxi**2 + x2**2)))/2d0 2867 enddo 2868 2869 case default 2870 call die('Invalid integration method') 2871 2872 endselect !sig%cd_int_method 2873 call timing%stop(timing%m_cor_cd_sum) 2874 2875 do iw = 1, sig%nfreqeval 2876 !FHJ: the output accumulated arrays "asxtDyn" and "asxtDyn" actually store 2877 !the residue and integral contribution to the GW self energy for CD 2878 !calculations, respectively. 2879 achDtemp(iw) = achDtemp(iw) + sint(iw)/PI_D 2880 ! JRD: This is now close to LDA 2881 if (iw==iwlda) achtD_n1(n1true) = achtD_n1(n1true) + sint(iw)/PI_D 2882 enddo ! over iw 2883 2884 end do ! n1 2885 2886777 format(1x,"WARNING: The real frequency range is too small." & 2887 ,/,3x,"l =",i3,1x,"n1 =",i5,1x,"E_l =",f8.3,1x,"E_n1" & 2888 ,1x,"=",f8.3,1x,"wx =",f8.3,1x,"E_max =",f8.3) 2889 2890 POP_SUB(mtxel_cor.sigma_cd_subspace) 2891 2892 end subroutine sigma_cd_subspace 2893 2894end subroutine mtxel_cor 2895 2896!> Accumulates the partial static CH as a sum over all bands. We use this to 2897!! calculate the static remainder later on as 1/2 of the difference between the 2898!! exact static CH and the partial static CH obtained here. 2899subroutine acc_static_ch(ngpown, ncouls, inv_igp_index, indinv, vcoul, & 2900 aqsn_n1, aqsm_n1, achs, eps_scalar, eps_cplx) 2901 integer, intent(in) :: ngpown, ncouls, inv_igp_index(:), indinv(:) 2902 real(DP), intent(in) :: vcoul(:) 2903 SCALAR, intent(in) :: aqsn_n1(:), aqsm_n1(:) 2904 SCALAR, intent(inout) :: achs 2905 SCALAR, intent(in), optional :: eps_scalar(:,:) 2906 complex(DPC), intent(in), optional :: eps_cplx(:,:) 2907 2908 SCALAR :: schs 2909 integer :: my_igp, igp, indigp, ig 2910 2911 PUSH_SUB(acc_static_ch) 2912 2913 if (present(eps_scalar).eqv.present(eps_cplx)) & 2914 call die('Internal error calling acc_static_ch', only_root_writes=.true.) 2915 2916 call timing%start(timing%m_cor_remain) 2917!$OMP PARALLEL do private (my_igp,indigp,igp,ig,schs) reduction(+:achs) 2918 do my_igp = 1, ngpown 2919 indigp = inv_igp_index(my_igp) 2920 igp = indinv(indigp) 2921 if (igp>ncouls .or. igp<=0) cycle 2922 2923 schs = ZERO 2924 if (present(eps_scalar)) then 2925 do ig = 1, ncouls 2926 schs = schs - aqsn_n1(ig)*eps_scalar(ig,my_igp) 2927 enddo 2928 else 2929 do ig = 1, ncouls 2930 schs = schs - aqsn_n1(ig)*SCALARIFY(eps_cplx(ig,my_igp)) 2931 enddo 2932 endif 2933 achs = achs + MYCONJG(aqsm_n1(igp))*schs*vcoul(igp)*0.5D0 2934 enddo 2935!$OMP END PARALLEL DO 2936 call timing%stop(timing%m_cor_remain) 2937 2938 POP_SUB(acc_static_ch) 2939 2940end subroutine acc_static_ch 2941 2942end module mtxel_cor_m 2943 2944