1! 2! Copyright (C) 2001-2013 Quantum ESPRESSO group 3! This file is distributed under the terms of the 4! GNU General Public License. See the file `License' 5! in the root directory of the present distribution, 6! or http://www.gnu.org/copyleft/gpl.txt . 7! 8! 9 10 11 12subroutine do_reducible_pola(tf ,options) 13!this subroutine calculates and writes on disk the reducible polarizability from the screen interaction 14 15 USE kinds, ONLY : DP 16 USE io_global, ONLY : stdout, ionode, ionode_id 17 USE input_gw, ONLY : input_options 18 USE basic_structures, ONLY : v_pot,wannier_u,free_memory, initialize_memory,lanczos_chain, vt_mat_lanczos,tt_mat_lanczos,& 19 & semicore 20 USE green_function, ONLY : green, read_green, free_memory_green, initialize_green 21 USE polarization, ONLY : polaw, free_memory_polaw, read_polaw, write_polaw,invert_v_pot, initialize_polaw, & 22 & read_polaw_global 23 USE mp, ONLY : mp_sum, mp_bcast 24 USE mp_world, ONLY : nproc,mpime,world_comm 25 USE times_gw, ONLY : times_freqs 26 USE self_energy_storage, ONLY : self_storage,write_self_storage_ondisk,free_memory_self_storage 27 USE lanczos 28 USE constants, ONLY : tpi,pi 29 USE start_end ! debug 30 USE parallel_include 31 32 implicit none 33 34 TYPE(times_freqs), INTENT(in) :: tf!for time frequency grids 35 TYPE(input_options) :: options 36 TYPE(self_storage) :: ss 37 38 TYPE(v_pot) :: vp,vpi 39 TYPE(polaw) :: ww!dressed polarization 40 INTEGER :: l_blk, nbegin,nend, nsize 41 REAL(kind=DP), ALLOCATABLE:: wtemp(:,:) 42 INTEGER :: iw 43 REAL(kind=DP) :: v_head 44 45 nullify(vp%vmat) 46 nullify(vpi%vmat) 47 call initialize_polaw(ww) 48 49 50 51 write(stdout,*) 'Trasform W to Pgreek' 52 FLUSH(stdout) 53 54 if(options%w_divergence == 2) then 55 call read_data_pw_v(vp,options%prefix,options%debug,0,.true.) 56 else 57 call read_data_pw_v(vp,options%prefix,options%debug,0,.false.) 58 endif 59 v_head=vp%vmat(vp%numpw,vp%numpw) 60 61 call invert_v_pot(vp,vpi) 62 call free_memory(vp) 63 64 l_blk= (tf%n+1)/nproc 65 if(l_blk*nproc < (tf%n+1)) l_blk = l_blk+1 66 nbegin=mpime*l_blk 67 nend=nbegin+l_blk-1 68 69 70!loop on imaginary frequency i\omega 71 do iw=nbegin,nbegin+l_blk-1 72 if(iw <= tf%n) then 73 74 call read_polaw(iw,ww,options%debug,options%l_verbose) 75 76 allocate(wtemp(ww%numpw,ww%numpw)) 77 call dgemm('N','N',ww%numpw,ww%numpw,ww%numpw,1.d0,vpi%vmat,ww%numpw,ww%pw,ww%numpw,& 78 &0.d0, wtemp,ww%numpw) 79 call dgemm('N','N',ww%numpw,ww%numpw,ww%numpw,1.d0,wtemp,ww%numpw,vpi%vmat,ww%numpw,& 80 &0.d0,ww%pw,ww%numpw) 81 deallocate(wtemp) 82 83 call write_polaw(ww,options%debug) 84 endif 85 enddo 86 87 88 call free_memory(vpi) 89 call free_memory_polaw(ww) 90 write(stdout,*) 'Done' 91 FLUSH(stdout) 92 93 94 95 return 96end subroutine do_reducible_pola 97 98subroutine do_self_lanczos_time(ss, tf ,options,l_real_axis,energy) 99!this subroutine calculte the self-energy on time using fourier trasfrom using the lanczos scheme 100 101 USE kinds, ONLY : DP 102 USE io_global, ONLY : stdout, ionode, ionode_id 103 USE input_gw, ONLY : input_options 104 USE basic_structures, ONLY : v_pot,wannier_u,free_memory, initialize_memory,lanczos_chain, vt_mat_lanczos,tt_mat_lanczos,& 105 & semicore 106 USE green_function, ONLY : green, read_green, free_memory_green, initialize_green 107 USE polarization, ONLY : polaw, free_memory_polaw, read_polaw, write_polaw,invert_v_pot, initialize_polaw, & 108 & read_polaw_global 109 USE mp, ONLY : mp_sum, mp_bcast 110 USE mp_world, ONLY : nproc,mpime,world_comm 111 USE times_gw, ONLY : times_freqs 112 USE self_energy_storage, ONLY : self_storage,write_self_storage_ondisk,free_memory_self_storage 113 USE lanczos 114 USE constants, ONLY : tpi,pi 115 USE start_end ! debug 116 USE parallel_include 117 USE io_files, ONLY : prefix, tmp_dir 118 119 implicit none 120 121 TYPE(times_freqs), INTENT(in) :: tf!for time frequency grids 122 TYPE(input_options) :: options 123 TYPE(self_storage) :: ss 124 LOGICAL, INTENT(in) :: l_real_axis 125!if true calculates on real frequency axis at given energy 126 REAL(kind=DP), INTENT(in) :: energy!energy on real axis at which calculating the self-energy (or part of it) 127 !only if l_real_axis == true 128 129 130 TYPE(v_pot) :: vp,vpi 131 TYPE(polaw) :: ww!dressed polarization 132 REAL(kind=DP) :: inv_epsi,v_head 133 INTEGER :: l_blk, nbegin,nend, nsize, l_blk_freq, nbegin_freq,nend_freq 134 REAL(kind=DP), ALLOCATABLE:: wtemp(:,:) 135 INTEGER :: iw 136 TYPE(wannier_u) :: uu 137 REAL(kind=DP) :: offset 138 TYPE(lanczos_chain) :: lc 139 REAL(kind=DP), ALLOCATABLE :: re_e_mat(:,:,:),im_e_mat(:,:,:) 140 COMPLEX(kind=DP), ALLOCATABLE :: e_mat_tmp(:,:,:) 141 COMPLEX(kind=DP) :: af(1) 142 REAL(kind=DP), ALLOCATABLE :: pw_mat(:,:,:),pw_mat_t(:,:,:) 143 INTEGER :: numpw,numt,numl 144 INTEGER :: it, ii,jj 145 REAL(kind=DP) :: time,factor 146 COMPLEX(kind=DP) :: cfactor 147 REAL(kind=DP), ALLOCATABLE :: pw_tmp(:,:),pw_dumm(:,:) 148 INTEGER :: iproc_time,ierr 149 150 TYPE(vt_mat_lanczos) :: vtl,vtl_j 151 TYPE(tt_mat_lanczos) :: ttl,ttl_j 152 153 REAL(kind=DP), ALLOCATABLE :: re_g_mat(:,:,:), im_g_mat(:,:,:),re_g_mat_t(:,:,:), im_g_mat_t(:,:,:) 154 REAL(kind=DP), ALLOCATABLE :: tmp_mat(:,:),tmp_mat1(:,:),tmp_mat2(:,:) 155 REAL(kind=DP), ALLOCATABLE :: g_tmp(:,:), g_dumm(:,:), re_h_mat(:,:),im_h_mat(:,:) 156 REAL(kind=DP), EXTERNAL :: DDOT 157 158 LOGICAL :: l_single=.true.!if true e_mat is saved in single precision 159 160 REAL(kind=4), ALLOCATABLE :: re_e_mat_single(:,:,:),im_e_mat_single(:,:,:) 161 REAL(kind=DP), ALLOCATABLE :: e_mat_double(:,:) 162 INTEGER :: l_blk_t, nbegin_t,nend_t, nsize_t,in 163 INTEGER, PARAMETER :: ndivt=1!10 164 INTEGER :: l_blk_g, nbegin_g,nend_g, nsize_g!paremter for optional dedicated frequency grid for G 165 INTEGER :: j_min, j_max, is 166 167 TYPE(semicore) :: sc 168 REAL(kind=DP), ALLOCATABLE :: tmp_vec_sc(:) 169 INTEGER :: iv_sc 170 171 LOGICAL, PARAMETER :: l_distribute_sm=.true.!if true the S matrices are distributed among mpi tasks instead of being read from disk 172 !it requires the parameter l_single==false 173 INTEGER :: l_blk_sm, nbegin_sm,nend_sm, nsize_sm,iproc 174 REAL(kind=DP), ALLOCATABLE :: st_save(:,:,:), sl_save(:,:,:) 175 COMPLEX(kind=DP), ALLOCATABLE :: exp_table(:,:) 176 REAL(kind=DP), ALLOCATABLE :: re_e_mat_t(:,:,:),im_e_mat_t(:,:,:)!in time for fourier trasform 177 REAL(kind=DP), ALLOCATABLE :: re_e_mat_part(:,:,:),im_e_mat_part(:,:,:)!in time for storing partial calculations 178 REAL(kind=DP), ALLOCATABLE :: pw_part_t(:,:,:)!in time for storing partial calculations 179 INTEGER :: n_cycles, i_cycles,i_min_cycles,i_max_cycles 180 INTEGER :: n_list(2),iun,iun2 181 INTEGER, ALLOCATABLE :: i_list(:,:) 182 INTEGER, EXTERNAL :: find_free_unit 183 184 if(options%whole_s) then 185 l_single=.false. 186 endif 187 write(stdout,*) 'Routine do_self_lanczos_time' 188 FLUSH(stdout) 189 190 if(options%l_big_system) then 191 n_cycles=options%i_max-options%i_min+1 192 else 193 n_cycles=1 194 endif 195 196 if(options%l_list) then 197 if(ionode) then 198 iun = find_free_unit() 199 open( unit=iun, file=trim(tmp_dir)//trim(prefix)//'-'//'list_1.dat', status='old') 200 read(iun,*) n_list(1) 201 if(uu%nspin==2) then 202 iun2 = find_free_unit() 203 open( unit=iun2, file=trim(tmp_dir)//trim(prefix)//'-'//'list_2.dat', status='old') 204 read(iun,*) n_list(2) 205 else 206 n_list(2)=0 207 endif 208 endif 209 call mp_bcast(n_list,ionode_id,world_comm) 210 allocate(i_list(max(n_list(1),n_list(2)),2)) 211 i_list=0 212 if(ionode) then 213 do ii=1,n_list(1) 214 read(iun,*) i_list(ii,1) 215 enddo 216 close(iun) 217 if(uu%nspin==2) then 218 do ii=1,n_list(2) 219 read(iun2,*) i_list(ii,2) 220 enddo 221 close(iun2) 222 endif 223 endif 224 call mp_bcast(i_list,ionode_id,world_comm) 225 n_cycles=n_list(1) 226 endif 227 228 229 230!keeps in memory G, P(i\tau) 231 232 233 nullify(vp%vmat) 234 nullify(vpi%vmat) 235 call initialize_polaw(ww) 236 237 238 call initialize_memory(sc) 239 240 241!calculate offset 242!read in DFT energies 243 244 call read_data_pw_u(uu,options%prefix) 245 246 ss%ontime=.true. 247 ss%max_i=options%max_i 248 ss%i_min=options%i_min 249 ss%i_max=options%i_max 250 ss%n=tf%n 251 ss%tau=options%tau 252 ss%whole_s=options%whole_s 253 ss%n_grid_fit=tf%n_grid_fit 254 if(ss%whole_s) then 255 ss%i_min_whole=options%i_min_whole 256 ss%i_max_whole=options%i_max_whole 257 endif 258 259 ss%nspin=uu%nspin 260 if(ss%whole_s) then 261 allocate(ss%whole(ss%i_min_whole:ss%i_max_whole,ss%max_i,2*ss%n+1,ss%nspin)) 262 ss%whole(:,:,:,:)=(0.d0,0.d0) 263 allocate(ss%whole_freq_fit(ss%i_min_whole:ss%i_max_whole,ss%max_i,2*ss%n_grid_fit+1,ss%nspin)) 264 ss%whole_freq_fit(:,:,:,:)=(0.d0,0.d0) 265 allocate(ss%diag(ss%max_i,2*ss%n+1,ss%nspin)) 266 ss%diag(:,:,:)=(0.d0,0.d0) 267 allocate(ss%diag_freq_fit(ss%max_i,2*ss%n_grid_fit+1,ss%nspin)) 268 ss%diag_freq_fit(:,:,:)=(0.d0,0.d0) 269 else 270 allocate(ss%diag(ss%max_i,2*ss%n+1,ss%nspin)) 271 ss%diag(:,:,:)=(0.d0,0.d0) 272 nullify(ss%whole) 273 allocate(ss%diag_freq_fit(ss%max_i,2*ss%n_grid_fit+1,ss%nspin)) 274 ss%diag_freq_fit(:,:,:)=(0.d0,0.d0) 275 nullify(ss%whole_freq_fit) 276 endif 277!for compatibility 278 allocate(ss%ene_remainder(ss%max_i,1)) 279 ss%ene_remainder(:,1)=0.d0 280 281!loop on spin 282 do is=1,ss%nspin 283 284 285!NOT_TO_BE_INCLUDED_START 286 if(options%l_semicore) call read_data_pw_semicore(sc, options%prefix, is) 287!NOT_TO_BE_INCLUDED_END 288 if(.not.l_real_axis) then 289 if(uu%nums > uu%nums_occ(is)) then 290 offset=-(uu%ene(uu%nums_occ(is)+1,is)+uu%ene(uu%nums_occ(is),is))/2.d0! CUSSI XE GIUSTO DEBUG 291 !offset=-(uu%ene(uu%nums_occ(2)+1,2)+uu%ene(uu%nums_occ(1),1))/2.d0 292 else 293 offset=-uu%ene(uu%nums_occ(is),is) 294 endif 295 else 296 offset=-energy 297 endif 298 299 300!!!!!!!! 301 l_blk= (tf%n+1)/nproc 302 if(l_blk*nproc < (tf%n+1)) l_blk = l_blk+1 303 nbegin=mpime*l_blk 304 nend=nbegin+l_blk-1 305 if(nend > tf%n) nend=tf%n 306 nsize=nend-nbegin+1 307 308 309 !read polarizability matrices 310 311 if( options%l_verbose) write(stdout,*) 'Read Pgreek' 312 FLUSH(stdout) 313 314 do iw=nbegin,nend 315 call read_polaw(iw,ww,options%debug,options%l_verbose) 316 if(iw==nbegin) allocate(pw_mat(ww%numpw,ww%numpw,l_blk)) 317 pw_mat(:,:,iw-nbegin+1)=ww%pw(:,:) 318 call free_memory_polaw(ww) 319 enddo 320 numpw=ww%numpw 321 call mp_bcast(numpw, ionode_id,world_comm) 322 if(nbegin > tf%n) allocate(pw_mat(numpw,numpw,l_blk)) 323 324!Fourier trasform reducible polarizability matrices to imaginary time 325 326 327 write(stdout,*) 'Fourier trasform Pgreek' 328 FLUSH(stdout) 329 330 331 allocate(pw_tmp(numpw,numpw)) 332 allocate(pw_dumm(numpw,numpw)) 333 allocate(pw_mat_t(numpw,numpw,l_blk)) 334!loop on time 335 do it=0,tf%n 336!each procs sums up its matrices in frequency with opportune factor 337 time=tf%times(it) 338 pw_tmp(:,:)=0.d0 339 do iw=nbegin,nend 340 factor=2.d0*dble(tf%weights_freq(iw)*exp((0.d0,1.d0)*tf%times(it)*tf%freqs_eff(iw)))/(2.d0*pi) 341 pw_tmp(:,:)=pw_tmp(:,:)+pw_mat(:,:,iw-nbegin+1)*factor 342 enddo 343#if defined(__MPI) 344!the distribution of times on procs is the same of that for frequecies 345 iproc_time=it/l_blk 346!all processors sums to iproc_time 347 if(iproc_time==mpime) then 348 call MPI_REDUCE(pw_tmp,pw_mat_t(1,1,it-nbegin+1),numpw*numpw,MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr) 349 else 350 call MPI_REDUCE(pw_tmp,pw_dumm,numpw*numpw,MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr) 351 endif 352 353#else 354 pw_mat_t(:,:,it+1)=pw_tmp 355 356#endif 357 358 359!mp_sum to processer owing that time 360!this processor put on opportune array 361 enddo 362 deallocate(pw_tmp) 363 deallocate(pw_dumm) 364 deallocate(pw_mat) 365 366 l_blk_g= (tf%n_g+1)/nproc 367 if(l_blk_g*nproc < (tf%n_g+1)) l_blk_g = l_blk_g+1 368 nbegin_g=mpime*l_blk_g 369 nend_g=nbegin_g+l_blk_g-1 370 if(nend_g > tf%n_g) nend_g=tf%n_g 371 nsize_g=nend_g-nbegin_g+1 372!allocate and compute table for Fourier trasform 373 allocate(exp_table(tf%n+1,l_blk_g)) 374 375 do it=0,tf%n 376 do iw=nbegin_g,nend_g 377 exp_table(it+1,iw-nbegin_g+1)=exp((0.d0,1.d0)*tf%times(it)*tf%freqs_g_eff(iw)) 378 enddo 379 enddo 380 381 382 383 384 do i_cycles=1,n_cycles 385!calculates G 386 387 call initialize_memory(lc) 388 389 if(.not.options%l_list) then 390 call read_data_pw_lanczos_chain(lc, i_cycles, options%prefix, .false.,is) 391 else 392 call read_data_pw_lanczos_chain(lc, i_list(i_cycles,is), options%prefix, .false.,is) 393 endif 394 write(stdout,*) 'Lanczos dimensions', lc%numt,lc%num_steps,is 395 FLUSH(stdout) 396 397 398 399 400 401 if(.not.l_single) then 402 allocate(re_e_mat(lc%numt,lc%numt,l_blk_g)) 403 allocate(im_e_mat(lc%numt,lc%numt,l_blk_g)) 404 405 allocate(e_mat_tmp(lc%numt,lc%numt,1)) 406 do iw=nbegin_g,nbegin_g+l_blk_g-1 407 if(iw <= tf%n_g) then 408 af(1)=dcmplx(offset,-tf%freqs_g(iw)) 409 call solve_lanczos_complex(1,af,e_mat_tmp,lc) 410 re_e_mat(:,:,iw-nbegin_g+1)=dble(e_mat_tmp(:,:,1)) 411 im_e_mat(:,:,iw-nbegin_g+1)=dimag(e_mat_tmp(:,:,1)) 412 else 413 call solve_lanczos_fake_complex(lc) 414 endif 415 416 end do 417 deallocate(e_mat_tmp) 418!for entire self-energy store 419!!!!!!!!!!!!!! 420 if( ss%whole_s .and.l_distribute_sm ) then 421 if( options%l_verbose) write(stdout,*) 'before allocate',lc%numt,l_blk 422 FLUSH(stdout) 423 allocate(re_e_mat_t(lc%numt,lc%numt,l_blk), im_e_mat_t(lc%numt,lc%numt,l_blk)) 424 if( options%l_verbose) write(stdout,*) 'after ' 425 allocate(g_tmp(lc%numt,lc%numt)) 426 allocate(g_dumm(lc%numt,lc%numt)) 427 if( options%l_verbose) write(stdout,*) 'ATT1' 428 FLUSH(stdout) 429 430 numt=lc%numt 431!loop on time 432 do it=0,tf%n 433!each procs sums up its matrices in frequency with opportune factor 434 time=tf%times(it) 435 g_tmp(:,:)=0.d0 436 do iw=nbegin_g,nend_g 437 factor=2.d0*dble(tf%weights_freq_g(iw)*exp_table(it+1,iw-nbegin_g+1))/(2.d0*pi) 438 g_tmp(1:numt,1:numt)=g_tmp(1:numt,1:numt)+re_e_mat(1:numt,1:numt,iw-nbegin_g+1)*factor 439 enddo 440#if defined(__MPI) 441!the distribution of times on procs is the same of that for frequecies 442 iproc_time=it/l_blk 443!all processors sums to iproc_time 444 if(iproc_time==mpime) then 445 call MPI_REDUCE(g_tmp,re_e_mat_t(1,1,it-nbegin+1),numt*numt,& 446 &MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr) 447 else 448 call MPI_REDUCE(g_tmp,g_dumm,numt*numt,MPI_DOUBLE_PRECISION,& 449 &MPI_SUM,iproc_time,world_comm,ierr) 450 endif 451 452#else 453 re_e_mat_t(1:numt,1:numt,it+1)=g_tmp(1:numt,1:numt) 454 455#endif 456 457 enddo 458!loop on time 459 do it=0,tf%n 460!each procs sums up its matrices in frequency with opportune factor 461 time=tf%times(it) 462 g_tmp(:,:)=0.d0 463 do iw=nbegin_g,nend_g 464 factor=-2.d0*dimag(tf%weights_freq_g(iw)*exp_table(it+1,iw-nbegin_g+1))/(2.d0*pi) 465 g_tmp(1:numt,1:numt)=g_tmp(1:numt,1:numt)+im_e_mat(1:numt,1:numt,iw-nbegin_g+1)*factor 466 enddo 467#if defined(__MPI) 468!the distribution of times on procs is the same of that for frequecies 469 iproc_time=it/l_blk 470!all processors sums to iproc_time 471 if(iproc_time==mpime) then 472 call MPI_REDUCE(g_tmp,im_e_mat_t(1,1,it-nbegin+1),numt*numt,& 473 &MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr) 474 else 475 call MPI_REDUCE(g_tmp,g_dumm,numt*numt,MPI_DOUBLE_PRECISION,& 476 &MPI_SUM,iproc_time,world_comm,ierr) 477 endif 478 479#else 480 im_e_mat_t(1:numt,1:numt,it+1)=g_tmp(1:numt,1:numt) 481 482#endif 483 484 enddo 485 486 deallocate(g_tmp,g_dumm) 487 endif 488 if( options%l_verbose) write(stdout,*) 'ATT2' !DEBUG 489 FLUSH(stdout) 490 491 else 492 if(i_cycles==1) then 493 allocate(re_e_mat_single(lc%numt,lc%numt,l_blk_g)) 494 allocate(im_e_mat_single(lc%numt,lc%numt,l_blk_g)) 495 endif 496 do iw=nbegin_g,nbegin_g+l_blk_g-1 497 if(iw <= tf%n_g) then 498 af(1)=dcmplx(offset,-tf%freqs_g(iw)) 499 call solve_lanczos_single(af(1),re_e_mat_single(1,1,iw-nbegin_g+1),im_e_mat_single(1,1,iw-nbegin_g+1),lc) 500 else 501 call solve_lanczos_fake_single(lc) 502 endif 503 504 end do 505 l_blk_t= (lc%numt)/ndivt 506 if(l_blk_t*ndivt < (lc%numt)) l_blk_t = l_blk_t+1 507 508 509 if(i_cycles==1) allocate(e_mat_double(lc%numt,l_blk_t)) 510 511 endif 512 if( options%l_verbose) write(stdout,*) 'Done' 513 FLUSH(stdout) 514 515 call initialize_memory(vtl) 516 call initialize_memory(ttl) 517 call initialize_memory(vtl_j) 518 call initialize_memory(ttl_j) 519 520!if required read all S matrices and distribute among mpi tasks 521 522 if( ss%whole_s .and.l_distribute_sm ) then 523 l_blk_sm= (ss%max_i)/nproc 524 if(l_blk_sm*nproc < (ss%max_i)) l_blk_sm = l_blk_sm+1 525 nbegin_sm=mpime*l_blk_sm+1 526 nend_sm=nbegin_sm+l_blk_sm-1 527 if(nend_sm > ss%max_i) nend_sm=ss%max_i 528 nsize_sm=nend_sm-nbegin_sm+1 529 do jj=1,ss%max_i 530 call read_data_pw_vt_mat_lanczos(vtl_j, jj, options%prefix, .false.,is) 531 call read_data_pw_tt_mat_lanczos(ttl_j, jj, options%prefix, .false.,is) 532 if(jj==1) then 533 numt=ttl_j%numt 534 numl=ttl_j%numl 535 allocate(sl_save(numpw,numl,l_blk_sm)) 536 allocate(st_save(numt,numl,l_blk_sm)) 537 endif 538 if(jj>=nbegin_sm .and. jj<=nend_sm) then 539 st_save(1:numt,1:numl,jj-nbegin_sm+1)=ttl_j%tt_mat(1:numt,1:numl) 540 sl_save(1:numpw,1:numl,jj-nbegin_sm+1)=vtl_j%vt_mat(1:numpw,1:numl) 541 endif 542 call free_memory(vtl_j) 543 call free_memory(ttl_j) 544 enddo 545 allocate(re_e_mat_part(numt,numl,l_blk),im_e_mat_part(numt,numl,l_blk)) 546 allocate(pw_part_t(numpw,numl,l_blk)) 547 endif 548 549 550 551 552 553 !loop on KS states 554 if(options%l_big_system) then 555 if(.not.options%l_list) then 556 i_min_cycles=ss%i_min+i_cycles-1 557 i_max_cycles=i_min_cycles 558 else 559 i_min_cycles=i_list(i_cycles,is) 560 i_max_cycles=i_min_cycles 561 endif 562 else 563 i_min_cycles=ss%i_min 564 i_max_cycles=ss%i_max 565 endif 566 567 do ii=i_min_cycles,i_max_cycles 568 write(stdout,*) 'Loop on KS:',ii, is 569 FLUSH(stdout) 570 571 572 573!calculates G on partial basis 574!read vtl ttl 575 call read_data_pw_vt_mat_lanczos(vtl, ii, options%prefix, .false.,is) 576 call read_data_pw_tt_mat_lanczos(ttl, ii, options%prefix, .false.,is) 577 if(ii==i_min_cycles.and.i_cycles==1) then 578 numt=ttl%numt 579 numl=ttl%numl 580 allocate(re_g_mat(numl,numl,l_blk_g),im_g_mat(numl,numl,l_blk_g)) 581 allocate(re_g_mat_t(numl,numl,l_blk),im_g_mat_t(numl,numl,l_blk)) 582 endif 583!multiply and put on array 584!loop on frequency 585 if(.not.ss%whole_s) then 586 j_min=ii 587 j_max=ii 588 else 589 j_min=ss%i_min_whole 590 j_max=ss%i_max_whole 591 endif 592 if( options%l_verbose) write(stdout,*) 'Doing dgemms',numl,numt,numl,numpw 593 594 if(ss%whole_s .and. l_distribute_sm) then!if required calculates partial products 595 do it=nbegin,nend 596 call dgemm('N','N',numt,numl,numt,1.d0,re_e_mat_t(1,1,it-nbegin+1),numt,& 597 &ttl%tt_mat,numt,0.d0,re_e_mat_part(1,1,it-nbegin+1),numt) 598 call dgemm('N','N',numt,numl,numt,1.d0,im_e_mat_t(1,1,it-nbegin+1),numt,& 599 &ttl%tt_mat,numt,0.d0,im_e_mat_part(1,1,it-nbegin+1),numt) 600 call dgemm('N','N',numpw,numl,numpw,1.d0,pw_mat_t(1,1,it-nbegin+1) ,numpw, & 601 & vtl%vt_mat,numpw,0.d0,pw_part_t(1,1,it-nbegin+1),numpw) 602 enddo 603 604 605 endif 606 FLUSH(stdout) 607 do jj=j_min,j_max 608 if(ss%whole_s .and. l_distribute_sm) then 609 allocate(ttl_j%tt_mat(numt,numl)) 610 allocate(vtl_j%vt_mat(numpw,numl)) 611 if(jj>=nbegin_sm .and. jj<=nend_sm) then 612 vtl_j%vt_mat(1:numpw,1:numl)=sl_save(1:numpw,1:numl,jj-nbegin_sm+1) 613 ttl_j%tt_mat(1:numt,1:numl)=st_save(1:numt,1:numl,jj-nbegin_sm+1) 614 615 endif 616 iproc=(jj-1)/l_blk_sm 617 call mp_bcast(vtl_j%vt_mat, iproc,world_comm) 618 call mp_bcast(ttl_j%tt_mat, iproc,world_comm) 619 else 620 call read_data_pw_vt_mat_lanczos(vtl_j, jj, options%prefix, .false.,is) 621 call read_data_pw_tt_mat_lanczos(ttl_j, jj, options%prefix, .false.,is) 622 endif 623 if(.not.(ss%whole_s .and. l_distribute_sm)) then 624 allocate(tmp_mat(numl,numt)) 625 do iw=nbegin_g,nend_g 626 if( options%l_verbose) write(stdout,*) 'Doing dgemms',numl,numt,numpw,l_blk,iw 627 FLUSH(stdout) 628 629 if(.not.l_single) then 630 call dgemm('T','N',numl,numt,numt,1.d0,ttl_j%tt_mat,numt,re_e_mat(1,1,iw-nbegin_g+1),numt,0.d0,tmp_mat,numl) 631 call dgemm('N','N',numl,numl,numt,1.d0,tmp_mat,numl,ttl%tt_mat,numt,0.d0,re_g_mat(1,1,iw-nbegin_g+1),numl) 632 call dgemm('T','N',numl,numt,numt,1.d0,ttl_j%tt_mat,numt,im_e_mat(1,1,iw-nbegin_g+1),numt,0.d0,tmp_mat,numl) 633 call dgemm('N','N',numl,numl,numt,1.d0,tmp_mat,numl,ttl%tt_mat,numt,0.d0,im_g_mat(1,1,iw-nbegin_g+1),numl) 634 else 635 do in=0,ndivt-1 636 nbegin_t=in*l_blk_t+1 637 nend_t=min(nbegin_t+l_blk_t-1,lc%numt) 638 nsize_t=nend_t-nbegin_t+1 639 if(nsize_t >= 1) then 640 e_mat_double(1:lc%numt, 1:nsize_t)=dble(re_e_mat_single(1:lc%numt, nbegin_t:nend_t, iw-nbegin_g+1)) 641 642 call dgemm('T','N',numl,nsize_t,numt,1.d0,ttl_j%tt_mat,numt,e_mat_double,numt,0.d0,& 643 &tmp_mat(1,nbegin_t),numl) 644 endif 645 enddo 646 647 if( options%l_verbose) write(stdout,*) 'ATT1'!DEBUG 648 FLUSH(stdout) 649 call dgemm('N','N',numl,numl,numt,1.d0,tmp_mat,numl,ttl%tt_mat,numt,0.d0,& 650 &re_g_mat(1,1,iw-nbegin_g+1),numl) 651 if( options%l_verbose) write(stdout,*) 'ATT2'!DEBUG 652 FLUSH(stdout) 653 654 do in=0,ndivt-1 655 nbegin_t=in*l_blk_t+1 656 nend_t=min(nbegin_t+l_blk_t-1,lc%numt) 657 nsize_t=nend_t-nbegin_t+1 658 e_mat_double(1:lc%numt, 1:nsize_t)=dble(im_e_mat_single(1:lc%numt, nbegin_t:nend_t, iw-nbegin_g+1)) 659 call dgemm('T','N',numl,nsize_t,numt,1.d0,ttl_j%tt_mat,numt,e_mat_double,numt,0.d0,tmp_mat(1,nbegin_t),numl) 660 enddo 661 if( options%l_verbose) write(stdout,*) 'ATT3'!DEBUG 662 FLUSH(stdout) 663 664 call dgemm('N','N',numl,numl,numt,1.d0,tmp_mat,numl,ttl%tt_mat,numt,0.d0,im_g_mat(1,1,iw-nbegin_g+1),numl) 665 endif 666 667 668 669 enddo 670 671 deallocate(tmp_mat) 672 673 write(stdout,*) 'Fourier trasform:' 674 FLUSH(stdout) 675 676 677 678!Fourier trasform 679 allocate(g_tmp(numl,numl)) 680 allocate(g_dumm(numl,numl)) 681 if( options%l_verbose) write(stdout,*) 'ATT1' 682 FLUSH(stdout) 683 684!loop on time 685 do it=0,tf%n 686!each procs sums up its matrices in frequency with opportune factor 687 time=tf%times(it) 688 g_tmp(:,:)=0.d0 689 do iw=nbegin_g,nend_g 690 !factor=2.d0*dble(tf%weights_freq_g(iw)*exp((0.d0,1.d0)*tf%times(it)*tf%freqs_g_eff(iw)))/(2.d0*pi) 691 factor=2.d0*dble(tf%weights_freq_g(iw)*exp_table(it+1,iw-nbegin_g+1))/(2.d0*pi) 692 g_tmp(:,:)=g_tmp(:,:)+re_g_mat(:,:,iw-nbegin_g+1)*factor 693 enddo 694#if defined(__MPI) 695!the distribution of times on procs is the same of that for frequecies 696 iproc_time=it/l_blk 697!all processors sums to iproc_time 698 if(iproc_time==mpime) then 699 call MPI_REDUCE(g_tmp,re_g_mat_t(1,1,it-nbegin+1),numl*numl,& 700 &MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr) 701 else 702 call MPI_REDUCE(g_tmp,g_dumm,numl*numl,MPI_DOUBLE_PRECISION,& 703 &MPI_SUM,iproc_time,world_comm,ierr) 704 endif 705 706#else 707 re_g_mat_t(:,:,it+1)=g_tmp(:,:) 708 709#endif 710 g_tmp(:,:)=0.d0 711 do iw=nbegin_g,nend_g 712 !factor=-2.d0*dimag(tf%weights_freq_g(iw)*exp((0.d0,1.d0)*tf%times(it)*tf%freqs_g_eff(iw)))/(2.d0*pi) 713 factor=-2.d0*dimag(tf%weights_freq_g(iw)*exp_table(it+1,iw-nbegin_g+1))/(2.d0*pi) 714 g_tmp(:,:)=g_tmp(:,:)+im_g_mat(:,:,iw-nbegin_g+1)*factor 715 enddo 716#if defined(__MPI) 717!the distribution of times on procs is the same of that for frequecies 718 iproc_time=it/l_blk 719!all processors sums to iproc_time 720 if(iproc_time==mpime) then 721 call MPI_REDUCE(g_tmp,im_g_mat_t(1,1,it-nbegin+1),numl*numl,& 722 &MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr) 723 else 724 call MPI_REDUCE(g_tmp,g_dumm,numl*numl,MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr) 725 endif 726 727#else 728 im_g_mat_t(:,:,it+1)=g_tmp(:,:) 729 730#endif 731 732 733 enddo 734 deallocate(g_tmp,g_dumm) 735 else!for whole self-energy calculates directly in time domain 736 do it=nbegin,nend 737 call dgemm('T','N',numl,numl,numt,1.d0,ttl_j%tt_mat,numt,re_e_mat_part(1,1,it-nbegin+1),& 738 &numt,0.d0,re_g_mat_t(1,1,it-nbegin+1),numl) 739 call dgemm('T','N',numl,numl,numt,1.d0,ttl_j%tt_mat,numt,im_e_mat_part(1,1,it-nbegin+1),& 740 &numt,0.d0,im_g_mat_t(1,1,it-nbegin+1),numl) 741 enddo 742 endif 743 if( options%l_verbose) write(stdout,*) 'done' 744 FLUSH(stdout) 745 746 747 748 749!loop on frequency 750 751 752 write(stdout,*) 'Products in imaginary time:' 753 FLUSH(stdout) 754 755 756 if( .not.(ss%whole_s .and.l_distribute_sm )) then 757 allocate(tmp_mat(numpw,numl),re_h_mat(numpw,numpw),im_h_mat(numpw,numpw)) 758 do it=nbegin,nend 759 760 761!matrix multiply 762 call dgemm('N','N',numpw,numl,numl,1.d0,vtl_j%vt_mat,numpw,re_g_mat_t(1,1,it-nbegin+1),numl,0.d0,tmp_mat,numpw) 763 call dgemm('N','T',numpw,numpw,numl,1.d0,tmp_mat,numpw, vtl%vt_mat,numpw,0.d0,re_h_mat,numpw) 764 765 call dgemm('N','N',numpw,numl,numl,1.d0,vtl_j%vt_mat,numpw,im_g_mat_t(1,1,it-nbegin+1),numl,0.d0,tmp_mat,numpw) 766 call dgemm('N','T',numpw,numpw,numl,1.d0,tmp_mat,numpw, vtl%vt_mat,numpw,0.d0,im_h_mat,numpw) 767 768!product 769 770 if(ii==jj) then 771 ss%diag(ii,it+ss%n+1,is)=DDOT(numpw*numpw,re_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)+& 772 &DDOT(numpw*numpw,im_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1) 773 ss%diag(ii,ss%n+1-it,is)=DDOT(numpw*numpw,re_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)-& 774 &DDOT(numpw*numpw,im_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1) 775 776!if required add semicore terms 777 if(options%l_semicore) then 778!NOT_TO_BE_INCLUDED_START 779 allocate(tmp_vec_sc(numpw)) 780 do iv_sc=1,sc%n_semicore 781 if(it==0) write(stdout,*) 'SEMICORE PRIMA',iv_sc,ii, ss%diag(ii,it+ss%n+1,is) 782 call dgemv('N',numpw,numpw,1.d0,pw_mat_t(1,1,it-nbegin+1),numpw,& 783 &sc%ppw_mat(1,iv_sc,ii),1,0.d0,tmp_vec_sc,1) 784 ss%diag(ii,it+ss%n+1,is)= ss%diag(ii,it+ss%n+1,is)- &!ATTENZIONE ERA + 785 &DDOT(numpw,tmp_vec_sc,1,sc%ppw_mat(1,iv_sc,ii),1)*exp((offset+sc%en_sc(iv_sc))*tf%times(it)) 786 if(it==0) write(stdout,*) 'SEMICORE',iv_sc,ii, ss%diag(ii,it+ss%n+1,is) 787 if((offset+sc%en_sc(iv_sc))*tf%times(it)>0) write(stdout,*) 'OCIO!!!'!DEBUG ATTENZIONE 788 enddo 789 deallocate(tmp_vec_sc) 790!NOT_TO_BE_INCLUDED_END 791 endif 792 endif 793 if(ss%whole_s) then 794 ss%whole(jj,ii,it+ss%n+1,is)=DDOT(numpw*numpw,re_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)+& 795 &DDOT(numpw*numpw,im_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1) 796 ss%whole(jj,ii,ss%n+1-it,is)=DDOT(numpw*numpw,re_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)-& 797 &DDOT(numpw*numpw,im_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1) 798 799 endif 800 enddo 801 deallocate(re_h_mat,im_h_mat) 802 deallocate(tmp_mat) 803 else 804 allocate(tmp_mat(numl,numl)) 805 do it=nbegin,nend 806 call dgemm('T','N',numl,numl,numpw,1.d0,vtl_j%vt_mat,numpw,pw_part_t(1,1,it-nbegin+1),numpw,0.d0,tmp_mat,numl) 807 if(ii==jj) then 808 ss%diag(ii,it+ss%n+1,is)=DDOT(numl*numl,re_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)+& 809 &DDOT(numl*numl,im_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1) 810 ss%diag(ii,ss%n+1-it,is)=DDOT(numl*numl,re_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)-& 811 &DDOT(numl*numl,im_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1) 812 endif 813 814 ss%whole(jj,ii,it+ss%n+1,is)=DDOT(numl*numl,re_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)+& 815 &DDOT(numl*numl,im_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1) 816 ss%whole(jj,ii,ss%n+1-it,is)=DDOT(numl*numl,re_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)-& 817 &DDOT(numl*numl,im_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1) 818 819 820 enddo 821 deallocate(tmp_mat) 822 endif 823 if( options%l_verbose) write(stdout,*) 'done' 824 FLUSH(stdout) 825 826 827 828!mp_sum for distributing on all processors 829 if(ii==jj) then 830 call mp_sum(ss%diag(ii,1:2*ss%n+1,is),world_comm) 831 endif 832 if(ss%whole_s) then 833 call mp_sum(ss%whole(jj,ii,1:2*ss%n+1,is),world_comm) 834 endif 835 836 837!FT of diagonal part of self-nergy 838 839 840 841 !deallocate(re_g_mat_t, im_g_mat_t) 842 843 call free_memory(vtl_j) 844 call free_memory(ttl_j) 845 846 847 enddo !jj 848 849 call free_memory(vtl) 850 call free_memory(ttl) 851 852 enddo!on i_min, i_max 853 854 enddo!on i_cycles 855 856 857!put factor due to FFT on imaginary axis 858 ss%diag(:,:,is)=ss%diag(:,:,is)*(0.d0,1.d0) 859 860 if(ss%whole_s) then 861 ss%whole(:,:,:,is)=ss%whole(:,:,:,is)*(0.d0,1.d0) 862 endif 863 deallocate(re_g_mat,im_g_mat) 864 deallocate(re_g_mat_t,im_g_mat_t) 865 866 deallocate(pw_mat_t) 867 if(.not.l_single) then 868 deallocate(re_e_mat,im_e_mat) 869 else 870 deallocate(re_e_mat_single, im_e_mat_single, e_mat_double) 871 endif 872 if( ss%whole_s .and.l_distribute_sm ) then 873 deallocate(st_save,sl_save) 874 if(.not.l_single) deallocate(re_e_mat_t,im_e_mat_t) 875 if(.not.l_single) deallocate(re_e_mat_part,im_e_mat_part,pw_part_t) 876 endif 877 deallocate(exp_table) 878 call free_memory(lc) 879 enddo!on spin 880 call free_memory(uu) 881 call free_memory(sc) 882 return 883end subroutine do_self_lanczos_time 884 885 886 887subroutine solve_lanczos_single(alpha,re_e_mat,im_e_mat,lc) 888!this subroutine sums to the matrix E_{no}=<t_n|(H-alpha)^-1|t_o> 889 890 USE kinds, ONLY : DP 891 USE basic_structures, ONLY : lanczos_chain, initialize_memory,free_memory 892 USE io_global, ONLY : stdout 893 USE mp, ONLY : mp_sum,mp_bcast 894 USE mp_world, ONLY : nproc, mpime, world_comm 895 896 implicit none 897 898 899 COMPLEX(kind=DP) :: alpha!constant for Ev+iw 900 TYPE(lanczos_chain) :: lc!lanczos chain descriptor 901 REAL(kind=4) :: re_e_mat(lc%numt,lc%numt) !real part of matrix to be calculated 902 REAL(kind=4) :: im_e_mat(lc%numt,lc%numt) !imaginary part of matrix to be calculated 903 904 905 906 INTEGER :: io,info,ii,jj 907 COMPLEX(kind=DP), ALLOCATABLE :: dl(:),du(:),d(:),t(:) 908 COMPLEX(kind=DP), ALLOCATABLE :: omat(:,:) 909 REAL(kind=DP), ALLOCATABLE :: tmp_mat(:,:) 910 INTEGER :: l_blk,nbegin,nend, iproc 911 COMPLEX(kind=DP), ALLOCATABLE :: e_mat(:) 912 913 914 915 l_blk= (lc%numt)/nproc 916 if(l_blk*nproc < (lc%numt)) l_blk = l_blk+1 917 nbegin=mpime*l_blk+1 918 nend=nbegin+l_blk-1 919 920 allocate(dl(lc%num_steps-1),du(lc%num_steps-1),d(lc%num_steps),t(lc%num_steps)) 921 re_e_mat(:,:)=0.0 922 im_e_mat(:,:)=0.0 923 allocate(omat(lc%numt,lc%num_steps)) 924 allocate(tmp_mat(lc%numt,lc%num_steps)) 925 allocate(e_mat(lc%numt)) 926!loop on o 927 do io=1,lc%numt 928!!set up vectors for lapack routine 929 !recover matrix from processor 930 tmp_mat(:,:)=0.d0 931 if(io >= nbegin .and. io <= nend) then 932 tmp_mat(1:lc%numt,1:lc%num_steps)=lc%o_mat(1:lc%numt,1:lc%num_steps,io-nbegin+1) 933 endif 934 iproc=(io-1)/l_blk 935 call mp_bcast(tmp_mat(:,:), iproc, world_comm) 936 omat(:,:)=dcmplx(tmp_mat(:,:),0.d0) 937 938 939 940 dl(1:lc%num_steps-1)=cmplx(lc%f(1:lc%num_steps-1,io),0.d0) 941 du(1:lc%num_steps-1)=cmplx(lc%f(1:lc%num_steps-1,io),0.d0) 942 d(1:lc%num_steps)=cmplx(lc%d(1:lc%num_steps,io),0.d0)+alpha 943 t(:)=(0.d0,0.d0) 944 t(1)=(1.d0,0.d0) 945 946!!call lapack 947 call zgtsv(lc%num_steps,1,dl,d,du,t,lc%num_steps,info) 948 if(info /= 0) then 949 write(stdout,*) 'ZGTSV info:', info 950 FLUSH(stdout) 951 stop 952 endif 953 954 955 956!!calculate term 957 958 call zgemm('N','N',lc%numt,1,lc%num_steps,(1.d0,0.d0),omat,lc%numt,t,lc%num_steps,(0.d0,0.d0),e_mat,lc%numt) 959 re_e_mat(1:lc%numt,io)=re_e_mat(1:lc%numt,io)+real(e_mat(1:lc%numt)) 960 im_e_mat(1:lc%numt,io)=im_e_mat(1:lc%numt,io)+imag(e_mat(1:lc%numt)) 961 enddo 962 963 964 do ii=1,lc%numt 965 do jj=ii+1,lc%numt 966 re_e_mat(ii,jj)=0.5*(re_e_mat(ii,jj)+re_e_mat(jj,ii)) 967 re_e_mat(jj,ii)=re_e_mat(ii,jj) 968 im_e_mat(ii,jj)=0.5*(im_e_mat(ii,jj)+im_e_mat(jj,ii)) 969 im_e_mat(jj,ii)=im_e_mat(ii,jj) 970 971 enddo 972 enddo 973 974 deallocate(dl,du,d,t) 975 deallocate(omat,tmp_mat) 976 deallocate(e_mat) 977 return 978end subroutine solve_lanczos_single 979 980 981subroutine solve_lanczos_fake_single(lc) 982 983!this subroutine is a parallel fake routine for the solve lanczos routine 984 985 USE kinds, ONLY : DP 986 USE basic_structures, ONLY : lanczos_chain, initialize_memory,free_memory 987 USE io_global, ONLY : stdout 988 USE mp, ONLY : mp_sum,mp_bcast 989 USE mp_world, ONLY : nproc,mpime,world_comm 990 991 implicit none 992 993 TYPE(lanczos_chain) :: lc!lanczos chain descriptor 994 995 INTEGER :: l_blk,nbegin,nend, iproc 996 REAL(kind=DP), ALLOCATABLE :: o_mat(:,:) 997 INTEGER :: io 998 999 1000 allocate(o_mat(lc%numt,lc%num_steps)) 1001 l_blk= (lc%numt)/nproc 1002 if(l_blk*nproc < (lc%numt)) l_blk = l_blk+1 1003 nbegin=mpime*l_blk+1 1004 nend=nbegin+l_blk-1 1005 1006 1007!loop on io 1008 do io=1,lc%numt 1009!recover matrix from processor 1010 o_mat(:,:)=0.d0 1011 if(io >= nbegin .and. io <= nend) then 1012 o_mat(:,:)=lc%o_mat(:,:,io-nbegin+1) 1013 endif 1014 iproc=(io-1)/l_blk 1015 call mp_bcast(o_mat(:,:), iproc, world_comm) 1016 1017 1018 enddo 1019 1020 deallocate(o_mat) 1021end subroutine solve_lanczos_fake_single 1022 1023 1024