1!
2! Copyright (C) 2001-2013 Quantum ESPRESSO group
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8!
9
10
11
12subroutine do_reducible_pola(tf ,options)
13!this subroutine calculates and writes on disk the reducible polarizability from the screen interaction
14
15  USE kinds,             ONLY : DP
16  USE io_global,         ONLY : stdout, ionode, ionode_id
17  USE input_gw,          ONLY : input_options
18  USE basic_structures,  ONLY : v_pot,wannier_u,free_memory, initialize_memory,lanczos_chain, vt_mat_lanczos,tt_mat_lanczos,&
19                                     & semicore
20  USE green_function,    ONLY : green, read_green, free_memory_green, initialize_green
21  USE polarization,      ONLY : polaw, free_memory_polaw, read_polaw, write_polaw,invert_v_pot, initialize_polaw, &
22                                  & read_polaw_global
23  USE mp,                ONLY : mp_sum, mp_bcast
24  USE mp_world,          ONLY : nproc,mpime,world_comm
25  USE times_gw,          ONLY : times_freqs
26  USE self_energy_storage, ONLY : self_storage,write_self_storage_ondisk,free_memory_self_storage
27  USE lanczos
28  USE constants,          ONLY : tpi,pi
29  USE start_end ! debug
30  USE parallel_include
31
32  implicit none
33
34  TYPE(times_freqs), INTENT(in) :: tf!for time frequency grids
35  TYPE(input_options) :: options
36  TYPE(self_storage)  :: ss
37
38  TYPE(v_pot) :: vp,vpi
39  TYPE(polaw) :: ww!dressed polarization
40  INTEGER :: l_blk, nbegin,nend, nsize
41  REAL(kind=DP), ALLOCATABLE:: wtemp(:,:)
42  INTEGER :: iw
43  REAL(kind=DP) :: v_head
44
45  nullify(vp%vmat)
46  nullify(vpi%vmat)
47  call initialize_polaw(ww)
48
49
50
51  write(stdout,*) 'Trasform W to Pgreek'
52  FLUSH(stdout)
53
54  if(options%w_divergence == 2) then
55     call read_data_pw_v(vp,options%prefix,options%debug,0,.true.)
56  else
57     call read_data_pw_v(vp,options%prefix,options%debug,0,.false.)
58  endif
59  v_head=vp%vmat(vp%numpw,vp%numpw)
60
61  call invert_v_pot(vp,vpi)
62  call free_memory(vp)
63
64  l_blk= (tf%n+1)/nproc
65  if(l_blk*nproc < (tf%n+1)) l_blk = l_blk+1
66  nbegin=mpime*l_blk
67  nend=nbegin+l_blk-1
68
69
70!loop on imaginary frequency i\omega
71  do iw=nbegin,nbegin+l_blk-1
72     if(iw <= tf%n) then
73
74        call read_polaw(iw,ww,options%debug,options%l_verbose)
75
76        allocate(wtemp(ww%numpw,ww%numpw))
77        call dgemm('N','N',ww%numpw,ww%numpw,ww%numpw,1.d0,vpi%vmat,ww%numpw,ww%pw,ww%numpw,&
78             &0.d0, wtemp,ww%numpw)
79        call dgemm('N','N',ww%numpw,ww%numpw,ww%numpw,1.d0,wtemp,ww%numpw,vpi%vmat,ww%numpw,&
80             &0.d0,ww%pw,ww%numpw)
81        deallocate(wtemp)
82
83        call write_polaw(ww,options%debug)
84     endif
85  enddo
86
87
88  call free_memory(vpi)
89  call free_memory_polaw(ww)
90  write(stdout,*) 'Done'
91  FLUSH(stdout)
92
93
94
95  return
96end subroutine do_reducible_pola
97
98subroutine do_self_lanczos_time(ss, tf ,options,l_real_axis,energy)
99!this subroutine calculte the self-energy on time using fourier trasfrom using the lanczos scheme
100
101  USE kinds,             ONLY : DP
102  USE io_global,         ONLY : stdout, ionode, ionode_id
103  USE input_gw,          ONLY : input_options
104  USE basic_structures,  ONLY : v_pot,wannier_u,free_memory, initialize_memory,lanczos_chain, vt_mat_lanczos,tt_mat_lanczos,&
105                                     & semicore
106  USE green_function,    ONLY : green, read_green, free_memory_green, initialize_green
107  USE polarization,      ONLY : polaw, free_memory_polaw, read_polaw, write_polaw,invert_v_pot, initialize_polaw, &
108                                  & read_polaw_global
109  USE mp,                ONLY : mp_sum, mp_bcast
110  USE mp_world,          ONLY : nproc,mpime,world_comm
111  USE times_gw,          ONLY : times_freqs
112  USE self_energy_storage, ONLY : self_storage,write_self_storage_ondisk,free_memory_self_storage
113  USE lanczos
114  USE constants,          ONLY : tpi,pi
115  USE start_end ! debug
116  USE parallel_include
117  USE io_files,  ONLY : prefix, tmp_dir
118
119  implicit none
120
121  TYPE(times_freqs), INTENT(in) :: tf!for time frequency grids
122  TYPE(input_options) :: options
123  TYPE(self_storage)  :: ss
124  LOGICAL, INTENT(in) :: l_real_axis
125!if true calculates on real frequency axis at given energy
126  REAL(kind=DP), INTENT(in) :: energy!energy on real axis at which calculating the self-energy (or part of it)
127                                     !only if l_real_axis == true
128
129
130  TYPE(v_pot) :: vp,vpi
131  TYPE(polaw) :: ww!dressed polarization
132  REAL(kind=DP) :: inv_epsi,v_head
133  INTEGER :: l_blk, nbegin,nend, nsize, l_blk_freq, nbegin_freq,nend_freq
134  REAL(kind=DP), ALLOCATABLE:: wtemp(:,:)
135  INTEGER :: iw
136  TYPE(wannier_u) :: uu
137  REAL(kind=DP) :: offset
138  TYPE(lanczos_chain) :: lc
139  REAL(kind=DP), ALLOCATABLE :: re_e_mat(:,:,:),im_e_mat(:,:,:)
140  COMPLEX(kind=DP), ALLOCATABLE :: e_mat_tmp(:,:,:)
141  COMPLEX(kind=DP) :: af(1)
142  REAL(kind=DP), ALLOCATABLE :: pw_mat(:,:,:),pw_mat_t(:,:,:)
143  INTEGER :: numpw,numt,numl
144  INTEGER :: it, ii,jj
145  REAL(kind=DP) :: time,factor
146  COMPLEX(kind=DP) :: cfactor
147  REAL(kind=DP), ALLOCATABLE :: pw_tmp(:,:),pw_dumm(:,:)
148  INTEGER :: iproc_time,ierr
149
150  TYPE(vt_mat_lanczos) :: vtl,vtl_j
151  TYPE(tt_mat_lanczos) :: ttl,ttl_j
152
153  REAL(kind=DP), ALLOCATABLE :: re_g_mat(:,:,:), im_g_mat(:,:,:),re_g_mat_t(:,:,:), im_g_mat_t(:,:,:)
154  REAL(kind=DP), ALLOCATABLE :: tmp_mat(:,:),tmp_mat1(:,:),tmp_mat2(:,:)
155  REAL(kind=DP), ALLOCATABLE :: g_tmp(:,:), g_dumm(:,:), re_h_mat(:,:),im_h_mat(:,:)
156  REAL(kind=DP), EXTERNAL :: DDOT
157
158  LOGICAL :: l_single=.true.!if true e_mat is saved in single precision
159
160  REAL(kind=4), ALLOCATABLE :: re_e_mat_single(:,:,:),im_e_mat_single(:,:,:)
161  REAL(kind=DP), ALLOCATABLE :: e_mat_double(:,:)
162  INTEGER :: l_blk_t, nbegin_t,nend_t, nsize_t,in
163  INTEGER, PARAMETER :: ndivt=1!10
164  INTEGER :: l_blk_g, nbegin_g,nend_g, nsize_g!paremter for optional dedicated frequency grid for G
165  INTEGER :: j_min, j_max, is
166
167  TYPE(semicore) :: sc
168  REAL(kind=DP), ALLOCATABLE :: tmp_vec_sc(:)
169  INTEGER :: iv_sc
170
171  LOGICAL, PARAMETER :: l_distribute_sm=.true.!if true the S matrices are distributed among mpi tasks instead of being read from disk
172                                              !it requires the parameter l_single==false
173  INTEGER :: l_blk_sm, nbegin_sm,nend_sm, nsize_sm,iproc
174  REAL(kind=DP), ALLOCATABLE :: st_save(:,:,:), sl_save(:,:,:)
175  COMPLEX(kind=DP), ALLOCATABLE :: exp_table(:,:)
176  REAL(kind=DP), ALLOCATABLE :: re_e_mat_t(:,:,:),im_e_mat_t(:,:,:)!in time for fourier trasform
177  REAL(kind=DP), ALLOCATABLE :: re_e_mat_part(:,:,:),im_e_mat_part(:,:,:)!in time for storing partial calculations
178  REAL(kind=DP), ALLOCATABLE :: pw_part_t(:,:,:)!in time for storing partial calculations
179  INTEGER :: n_cycles, i_cycles,i_min_cycles,i_max_cycles
180  INTEGER :: n_list(2),iun,iun2
181  INTEGER, ALLOCATABLE :: i_list(:,:)
182  INTEGER, EXTERNAL :: find_free_unit
183
184  if(options%whole_s) then
185     l_single=.false.
186  endif
187  write(stdout,*) 'Routine do_self_lanczos_time'
188  FLUSH(stdout)
189
190  if(options%l_big_system) then
191     n_cycles=options%i_max-options%i_min+1
192  else
193     n_cycles=1
194  endif
195
196  if(options%l_list) then
197     if(ionode) then
198        iun =  find_free_unit()
199        open( unit=iun, file=trim(tmp_dir)//trim(prefix)//'-'//'list_1.dat', status='old')
200        read(iun,*) n_list(1)
201        if(uu%nspin==2) then
202           iun2 =  find_free_unit()
203           open( unit=iun2, file=trim(tmp_dir)//trim(prefix)//'-'//'list_2.dat', status='old')
204           read(iun,*) n_list(2)
205        else
206           n_list(2)=0
207        endif
208     endif
209     call mp_bcast(n_list,ionode_id,world_comm)
210     allocate(i_list(max(n_list(1),n_list(2)),2))
211     i_list=0
212     if(ionode) then
213        do ii=1,n_list(1)
214           read(iun,*) i_list(ii,1)
215        enddo
216        close(iun)
217        if(uu%nspin==2) then
218           do ii=1,n_list(2)
219              read(iun2,*) i_list(ii,2)
220           enddo
221           close(iun2)
222        endif
223     endif
224     call mp_bcast(i_list,ionode_id,world_comm)
225     n_cycles=n_list(1)
226  endif
227
228
229
230!keeps in memory G, P(i\tau)
231
232
233  nullify(vp%vmat)
234  nullify(vpi%vmat)
235  call initialize_polaw(ww)
236
237
238  call initialize_memory(sc)
239
240
241!calculate offset
242!read in DFT energies
243
244  call read_data_pw_u(uu,options%prefix)
245
246  ss%ontime=.true.
247  ss%max_i=options%max_i
248  ss%i_min=options%i_min
249  ss%i_max=options%i_max
250  ss%n=tf%n
251  ss%tau=options%tau
252  ss%whole_s=options%whole_s
253  ss%n_grid_fit=tf%n_grid_fit
254  if(ss%whole_s) then
255     ss%i_min_whole=options%i_min_whole
256     ss%i_max_whole=options%i_max_whole
257  endif
258
259  ss%nspin=uu%nspin
260  if(ss%whole_s) then
261     allocate(ss%whole(ss%i_min_whole:ss%i_max_whole,ss%max_i,2*ss%n+1,ss%nspin))
262     ss%whole(:,:,:,:)=(0.d0,0.d0)
263     allocate(ss%whole_freq_fit(ss%i_min_whole:ss%i_max_whole,ss%max_i,2*ss%n_grid_fit+1,ss%nspin))
264     ss%whole_freq_fit(:,:,:,:)=(0.d0,0.d0)
265     allocate(ss%diag(ss%max_i,2*ss%n+1,ss%nspin))
266     ss%diag(:,:,:)=(0.d0,0.d0)
267     allocate(ss%diag_freq_fit(ss%max_i,2*ss%n_grid_fit+1,ss%nspin))
268     ss%diag_freq_fit(:,:,:)=(0.d0,0.d0)
269  else
270     allocate(ss%diag(ss%max_i,2*ss%n+1,ss%nspin))
271     ss%diag(:,:,:)=(0.d0,0.d0)
272     nullify(ss%whole)
273     allocate(ss%diag_freq_fit(ss%max_i,2*ss%n_grid_fit+1,ss%nspin))
274     ss%diag_freq_fit(:,:,:)=(0.d0,0.d0)
275     nullify(ss%whole_freq_fit)
276  endif
277!for compatibility
278  allocate(ss%ene_remainder(ss%max_i,1))
279  ss%ene_remainder(:,1)=0.d0
280
281!loop on spin
282  do is=1,ss%nspin
283
284
285!NOT_TO_BE_INCLUDED_START
286     if(options%l_semicore) call read_data_pw_semicore(sc, options%prefix, is)
287!NOT_TO_BE_INCLUDED_END
288     if(.not.l_real_axis) then
289        if(uu%nums > uu%nums_occ(is)) then
290           offset=-(uu%ene(uu%nums_occ(is)+1,is)+uu%ene(uu%nums_occ(is),is))/2.d0! CUSSI XE GIUSTO DEBUG
291           !offset=-(uu%ene(uu%nums_occ(2)+1,2)+uu%ene(uu%nums_occ(1),1))/2.d0
292        else
293           offset=-uu%ene(uu%nums_occ(is),is)
294        endif
295     else
296        offset=-energy
297     endif
298
299
300!!!!!!!!
301     l_blk= (tf%n+1)/nproc
302     if(l_blk*nproc < (tf%n+1)) l_blk = l_blk+1
303     nbegin=mpime*l_blk
304     nend=nbegin+l_blk-1
305     if(nend > tf%n) nend=tf%n
306     nsize=nend-nbegin+1
307
308
309 !read polarizability matrices
310
311     if( options%l_verbose) write(stdout,*) 'Read Pgreek'
312     FLUSH(stdout)
313
314     do iw=nbegin,nend
315        call read_polaw(iw,ww,options%debug,options%l_verbose)
316        if(iw==nbegin) allocate(pw_mat(ww%numpw,ww%numpw,l_blk))
317        pw_mat(:,:,iw-nbegin+1)=ww%pw(:,:)
318        call free_memory_polaw(ww)
319     enddo
320     numpw=ww%numpw
321     call mp_bcast(numpw, ionode_id,world_comm)
322     if(nbegin > tf%n) allocate(pw_mat(numpw,numpw,l_blk))
323
324!Fourier trasform reducible polarizability matrices to imaginary time
325
326
327     write(stdout,*) 'Fourier trasform Pgreek'
328     FLUSH(stdout)
329
330
331     allocate(pw_tmp(numpw,numpw))
332     allocate(pw_dumm(numpw,numpw))
333     allocate(pw_mat_t(numpw,numpw,l_blk))
334!loop on time
335     do it=0,tf%n
336!each procs sums up its matrices in frequency with opportune factor
337        time=tf%times(it)
338        pw_tmp(:,:)=0.d0
339        do iw=nbegin,nend
340           factor=2.d0*dble(tf%weights_freq(iw)*exp((0.d0,1.d0)*tf%times(it)*tf%freqs_eff(iw)))/(2.d0*pi)
341           pw_tmp(:,:)=pw_tmp(:,:)+pw_mat(:,:,iw-nbegin+1)*factor
342        enddo
343#if defined(__MPI)
344!the distribution of times on procs is the same of that for frequecies
345        iproc_time=it/l_blk
346!all processors sums to iproc_time
347        if(iproc_time==mpime) then
348           call MPI_REDUCE(pw_tmp,pw_mat_t(1,1,it-nbegin+1),numpw*numpw,MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr)
349        else
350           call MPI_REDUCE(pw_tmp,pw_dumm,numpw*numpw,MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr)
351        endif
352
353#else
354        pw_mat_t(:,:,it+1)=pw_tmp
355
356#endif
357
358
359!mp_sum to processer owing that time
360!this processor put on opportune array
361     enddo
362     deallocate(pw_tmp)
363     deallocate(pw_dumm)
364     deallocate(pw_mat)
365
366     l_blk_g= (tf%n_g+1)/nproc
367     if(l_blk_g*nproc < (tf%n_g+1)) l_blk_g = l_blk_g+1
368     nbegin_g=mpime*l_blk_g
369     nend_g=nbegin_g+l_blk_g-1
370     if(nend_g > tf%n_g) nend_g=tf%n_g
371     nsize_g=nend_g-nbegin_g+1
372!allocate and compute table for Fourier trasform
373     allocate(exp_table(tf%n+1,l_blk_g))
374
375     do it=0,tf%n
376        do iw=nbegin_g,nend_g
377           exp_table(it+1,iw-nbegin_g+1)=exp((0.d0,1.d0)*tf%times(it)*tf%freqs_g_eff(iw))
378        enddo
379     enddo
380
381
382
383
384     do i_cycles=1,n_cycles
385!calculates G
386
387     call initialize_memory(lc)
388
389     if(.not.options%l_list) then
390        call read_data_pw_lanczos_chain(lc, i_cycles, options%prefix, .false.,is)
391     else
392        call read_data_pw_lanczos_chain(lc, i_list(i_cycles,is), options%prefix, .false.,is)
393     endif
394     write(stdout,*) 'Lanczos dimensions', lc%numt,lc%num_steps,is
395     FLUSH(stdout)
396
397
398
399
400
401     if(.not.l_single) then
402        allocate(re_e_mat(lc%numt,lc%numt,l_blk_g))
403        allocate(im_e_mat(lc%numt,lc%numt,l_blk_g))
404
405        allocate(e_mat_tmp(lc%numt,lc%numt,1))
406        do iw=nbegin_g,nbegin_g+l_blk_g-1
407           if(iw <= tf%n_g) then
408              af(1)=dcmplx(offset,-tf%freqs_g(iw))
409              call solve_lanczos_complex(1,af,e_mat_tmp,lc)
410              re_e_mat(:,:,iw-nbegin_g+1)=dble(e_mat_tmp(:,:,1))
411              im_e_mat(:,:,iw-nbegin_g+1)=dimag(e_mat_tmp(:,:,1))
412           else
413              call solve_lanczos_fake_complex(lc)
414           endif
415
416        end do
417        deallocate(e_mat_tmp)
418!for entire self-energy store
419!!!!!!!!!!!!!!
420        if( ss%whole_s .and.l_distribute_sm ) then
421           if( options%l_verbose) write(stdout,*) 'before allocate',lc%numt,l_blk
422           FLUSH(stdout)
423           allocate(re_e_mat_t(lc%numt,lc%numt,l_blk), im_e_mat_t(lc%numt,lc%numt,l_blk))
424           if( options%l_verbose) write(stdout,*) 'after '
425           allocate(g_tmp(lc%numt,lc%numt))
426           allocate(g_dumm(lc%numt,lc%numt))
427           if( options%l_verbose) write(stdout,*) 'ATT1'
428           FLUSH(stdout)
429
430           numt=lc%numt
431!loop on time
432           do it=0,tf%n
433!each procs sums up its matrices in frequency with opportune factor
434              time=tf%times(it)
435              g_tmp(:,:)=0.d0
436              do iw=nbegin_g,nend_g
437                 factor=2.d0*dble(tf%weights_freq_g(iw)*exp_table(it+1,iw-nbegin_g+1))/(2.d0*pi)
438                 g_tmp(1:numt,1:numt)=g_tmp(1:numt,1:numt)+re_e_mat(1:numt,1:numt,iw-nbegin_g+1)*factor
439              enddo
440#if defined(__MPI)
441!the distribution of times on procs is the same of that for frequecies
442              iproc_time=it/l_blk
443!all processors sums to iproc_time
444              if(iproc_time==mpime) then
445                 call MPI_REDUCE(g_tmp,re_e_mat_t(1,1,it-nbegin+1),numt*numt,&
446                      &MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr)
447              else
448                 call MPI_REDUCE(g_tmp,g_dumm,numt*numt,MPI_DOUBLE_PRECISION,&
449                      &MPI_SUM,iproc_time,world_comm,ierr)
450              endif
451
452#else
453              re_e_mat_t(1:numt,1:numt,it+1)=g_tmp(1:numt,1:numt)
454
455#endif
456
457           enddo
458!loop on time
459           do it=0,tf%n
460!each procs sums up its matrices in frequency with opportune factor
461              time=tf%times(it)
462              g_tmp(:,:)=0.d0
463              do iw=nbegin_g,nend_g
464                 factor=-2.d0*dimag(tf%weights_freq_g(iw)*exp_table(it+1,iw-nbegin_g+1))/(2.d0*pi)
465                 g_tmp(1:numt,1:numt)=g_tmp(1:numt,1:numt)+im_e_mat(1:numt,1:numt,iw-nbegin_g+1)*factor
466              enddo
467#if defined(__MPI)
468!the distribution of times on procs is the same of that for frequecies
469              iproc_time=it/l_blk
470!all processors sums to iproc_time
471              if(iproc_time==mpime) then
472                 call MPI_REDUCE(g_tmp,im_e_mat_t(1,1,it-nbegin+1),numt*numt,&
473                      &MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr)
474              else
475                 call MPI_REDUCE(g_tmp,g_dumm,numt*numt,MPI_DOUBLE_PRECISION,&
476                      &MPI_SUM,iproc_time,world_comm,ierr)
477              endif
478
479#else
480              im_e_mat_t(1:numt,1:numt,it+1)=g_tmp(1:numt,1:numt)
481
482#endif
483
484           enddo
485
486           deallocate(g_tmp,g_dumm)
487        endif
488        if( options%l_verbose) write(stdout,*) 'ATT2' !DEBUG
489        FLUSH(stdout)
490
491     else
492       if(i_cycles==1) then
493          allocate(re_e_mat_single(lc%numt,lc%numt,l_blk_g))
494          allocate(im_e_mat_single(lc%numt,lc%numt,l_blk_g))
495       endif
496        do iw=nbegin_g,nbegin_g+l_blk_g-1
497           if(iw <= tf%n_g) then
498              af(1)=dcmplx(offset,-tf%freqs_g(iw))
499              call solve_lanczos_single(af(1),re_e_mat_single(1,1,iw-nbegin_g+1),im_e_mat_single(1,1,iw-nbegin_g+1),lc)
500           else
501              call solve_lanczos_fake_single(lc)
502           endif
503
504        end do
505        l_blk_t= (lc%numt)/ndivt
506        if(l_blk_t*ndivt < (lc%numt)) l_blk_t = l_blk_t+1
507
508
509        if(i_cycles==1) allocate(e_mat_double(lc%numt,l_blk_t))
510
511     endif
512     if( options%l_verbose) write(stdout,*) 'Done'
513     FLUSH(stdout)
514
515     call initialize_memory(vtl)
516     call initialize_memory(ttl)
517     call initialize_memory(vtl_j)
518     call initialize_memory(ttl_j)
519
520!if required read all S matrices and distribute among mpi tasks
521
522     if( ss%whole_s .and.l_distribute_sm ) then
523        l_blk_sm= (ss%max_i)/nproc
524        if(l_blk_sm*nproc < (ss%max_i)) l_blk_sm = l_blk_sm+1
525        nbegin_sm=mpime*l_blk_sm+1
526        nend_sm=nbegin_sm+l_blk_sm-1
527        if(nend_sm > ss%max_i) nend_sm=ss%max_i
528        nsize_sm=nend_sm-nbegin_sm+1
529        do jj=1,ss%max_i
530           call  read_data_pw_vt_mat_lanczos(vtl_j, jj, options%prefix, .false.,is)
531           call  read_data_pw_tt_mat_lanczos(ttl_j, jj, options%prefix, .false.,is)
532           if(jj==1) then
533              numt=ttl_j%numt
534              numl=ttl_j%numl
535              allocate(sl_save(numpw,numl,l_blk_sm))
536              allocate(st_save(numt,numl,l_blk_sm))
537           endif
538           if(jj>=nbegin_sm .and. jj<=nend_sm) then
539              st_save(1:numt,1:numl,jj-nbegin_sm+1)=ttl_j%tt_mat(1:numt,1:numl)
540              sl_save(1:numpw,1:numl,jj-nbegin_sm+1)=vtl_j%vt_mat(1:numpw,1:numl)
541           endif
542           call free_memory(vtl_j)
543           call free_memory(ttl_j)
544        enddo
545        allocate(re_e_mat_part(numt,numl,l_blk),im_e_mat_part(numt,numl,l_blk))
546        allocate(pw_part_t(numpw,numl,l_blk))
547     endif
548
549
550
551
552
553 !loop on KS states
554     if(options%l_big_system) then
555        if(.not.options%l_list) then
556           i_min_cycles=ss%i_min+i_cycles-1
557           i_max_cycles=i_min_cycles
558        else
559           i_min_cycles=i_list(i_cycles,is)
560           i_max_cycles=i_min_cycles
561        endif
562     else
563        i_min_cycles=ss%i_min
564        i_max_cycles=ss%i_max
565     endif
566
567     do ii=i_min_cycles,i_max_cycles
568        write(stdout,*) 'Loop on KS:',ii, is
569        FLUSH(stdout)
570
571
572
573!calculates G on partial basis
574!read vtl ttl
575        call  read_data_pw_vt_mat_lanczos(vtl, ii, options%prefix, .false.,is)
576        call  read_data_pw_tt_mat_lanczos(ttl, ii, options%prefix, .false.,is)
577        if(ii==i_min_cycles.and.i_cycles==1) then
578           numt=ttl%numt
579           numl=ttl%numl
580           allocate(re_g_mat(numl,numl,l_blk_g),im_g_mat(numl,numl,l_blk_g))
581           allocate(re_g_mat_t(numl,numl,l_blk),im_g_mat_t(numl,numl,l_blk))
582        endif
583!multiply and  put on array
584!loop on frequency
585        if(.not.ss%whole_s) then
586           j_min=ii
587           j_max=ii
588        else
589           j_min=ss%i_min_whole
590           j_max=ss%i_max_whole
591        endif
592        if( options%l_verbose) write(stdout,*) 'Doing dgemms',numl,numt,numl,numpw
593
594        if(ss%whole_s .and. l_distribute_sm) then!if required calculates partial products
595           do it=nbegin,nend
596              call dgemm('N','N',numt,numl,numt,1.d0,re_e_mat_t(1,1,it-nbegin+1),numt,&
597                   &ttl%tt_mat,numt,0.d0,re_e_mat_part(1,1,it-nbegin+1),numt)
598              call dgemm('N','N',numt,numl,numt,1.d0,im_e_mat_t(1,1,it-nbegin+1),numt,&
599                   &ttl%tt_mat,numt,0.d0,im_e_mat_part(1,1,it-nbegin+1),numt)
600              call dgemm('N','N',numpw,numl,numpw,1.d0,pw_mat_t(1,1,it-nbegin+1) ,numpw, &
601                   & vtl%vt_mat,numpw,0.d0,pw_part_t(1,1,it-nbegin+1),numpw)
602           enddo
603
604
605        endif
606        FLUSH(stdout)
607        do jj=j_min,j_max
608           if(ss%whole_s .and. l_distribute_sm) then
609              allocate(ttl_j%tt_mat(numt,numl))
610              allocate(vtl_j%vt_mat(numpw,numl))
611              if(jj>=nbegin_sm .and. jj<=nend_sm) then
612                 vtl_j%vt_mat(1:numpw,1:numl)=sl_save(1:numpw,1:numl,jj-nbegin_sm+1)
613                 ttl_j%tt_mat(1:numt,1:numl)=st_save(1:numt,1:numl,jj-nbegin_sm+1)
614
615              endif
616             iproc=(jj-1)/l_blk_sm
617             call mp_bcast(vtl_j%vt_mat, iproc,world_comm)
618             call mp_bcast(ttl_j%tt_mat, iproc,world_comm)
619           else
620              call  read_data_pw_vt_mat_lanczos(vtl_j, jj, options%prefix, .false.,is)
621              call  read_data_pw_tt_mat_lanczos(ttl_j, jj, options%prefix, .false.,is)
622           endif
623           if(.not.(ss%whole_s .and. l_distribute_sm)) then
624              allocate(tmp_mat(numl,numt))
625              do iw=nbegin_g,nend_g
626                 if( options%l_verbose) write(stdout,*) 'Doing dgemms',numl,numt,numpw,l_blk,iw
627                 FLUSH(stdout)
628
629                 if(.not.l_single) then
630                    call dgemm('T','N',numl,numt,numt,1.d0,ttl_j%tt_mat,numt,re_e_mat(1,1,iw-nbegin_g+1),numt,0.d0,tmp_mat,numl)
631                    call dgemm('N','N',numl,numl,numt,1.d0,tmp_mat,numl,ttl%tt_mat,numt,0.d0,re_g_mat(1,1,iw-nbegin_g+1),numl)
632                    call dgemm('T','N',numl,numt,numt,1.d0,ttl_j%tt_mat,numt,im_e_mat(1,1,iw-nbegin_g+1),numt,0.d0,tmp_mat,numl)
633                    call dgemm('N','N',numl,numl,numt,1.d0,tmp_mat,numl,ttl%tt_mat,numt,0.d0,im_g_mat(1,1,iw-nbegin_g+1),numl)
634                 else
635                    do in=0,ndivt-1
636                       nbegin_t=in*l_blk_t+1
637                       nend_t=min(nbegin_t+l_blk_t-1,lc%numt)
638                       nsize_t=nend_t-nbegin_t+1
639                       if(nsize_t >= 1) then
640                          e_mat_double(1:lc%numt, 1:nsize_t)=dble(re_e_mat_single(1:lc%numt, nbegin_t:nend_t, iw-nbegin_g+1))
641
642                          call dgemm('T','N',numl,nsize_t,numt,1.d0,ttl_j%tt_mat,numt,e_mat_double,numt,0.d0,&
643                           &tmp_mat(1,nbegin_t),numl)
644                       endif
645                    enddo
646
647                    if( options%l_verbose) write(stdout,*) 'ATT1'!DEBUG
648                    FLUSH(stdout)
649                    call dgemm('N','N',numl,numl,numt,1.d0,tmp_mat,numl,ttl%tt_mat,numt,0.d0,&
650                         &re_g_mat(1,1,iw-nbegin_g+1),numl)
651                    if( options%l_verbose) write(stdout,*) 'ATT2'!DEBUG
652                    FLUSH(stdout)
653
654                    do in=0,ndivt-1
655                       nbegin_t=in*l_blk_t+1
656                       nend_t=min(nbegin_t+l_blk_t-1,lc%numt)
657                       nsize_t=nend_t-nbegin_t+1
658                       e_mat_double(1:lc%numt, 1:nsize_t)=dble(im_e_mat_single(1:lc%numt, nbegin_t:nend_t, iw-nbegin_g+1))
659                       call dgemm('T','N',numl,nsize_t,numt,1.d0,ttl_j%tt_mat,numt,e_mat_double,numt,0.d0,tmp_mat(1,nbegin_t),numl)
660                    enddo
661                    if( options%l_verbose) write(stdout,*) 'ATT3'!DEBUG
662                    FLUSH(stdout)
663
664                    call dgemm('N','N',numl,numl,numt,1.d0,tmp_mat,numl,ttl%tt_mat,numt,0.d0,im_g_mat(1,1,iw-nbegin_g+1),numl)
665                 endif
666
667
668
669              enddo
670
671              deallocate(tmp_mat)
672
673              write(stdout,*) 'Fourier trasform:'
674              FLUSH(stdout)
675
676
677
678!Fourier trasform
679              allocate(g_tmp(numl,numl))
680              allocate(g_dumm(numl,numl))
681              if( options%l_verbose) write(stdout,*) 'ATT1'
682              FLUSH(stdout)
683
684!loop on time
685              do it=0,tf%n
686!each procs sums up its matrices in frequency with opportune factor
687                 time=tf%times(it)
688                 g_tmp(:,:)=0.d0
689                 do iw=nbegin_g,nend_g
690                    !factor=2.d0*dble(tf%weights_freq_g(iw)*exp((0.d0,1.d0)*tf%times(it)*tf%freqs_g_eff(iw)))/(2.d0*pi)
691                    factor=2.d0*dble(tf%weights_freq_g(iw)*exp_table(it+1,iw-nbegin_g+1))/(2.d0*pi)
692                    g_tmp(:,:)=g_tmp(:,:)+re_g_mat(:,:,iw-nbegin_g+1)*factor
693                 enddo
694#if defined(__MPI)
695!the distribution of times on procs is the same of that for frequecies
696                 iproc_time=it/l_blk
697!all processors sums to iproc_time
698                 if(iproc_time==mpime) then
699                    call MPI_REDUCE(g_tmp,re_g_mat_t(1,1,it-nbegin+1),numl*numl,&
700                         &MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr)
701                 else
702                    call MPI_REDUCE(g_tmp,g_dumm,numl*numl,MPI_DOUBLE_PRECISION,&
703                         &MPI_SUM,iproc_time,world_comm,ierr)
704                 endif
705
706#else
707                 re_g_mat_t(:,:,it+1)=g_tmp(:,:)
708
709#endif
710                 g_tmp(:,:)=0.d0
711                 do iw=nbegin_g,nend_g
712                    !factor=-2.d0*dimag(tf%weights_freq_g(iw)*exp((0.d0,1.d0)*tf%times(it)*tf%freqs_g_eff(iw)))/(2.d0*pi)
713                    factor=-2.d0*dimag(tf%weights_freq_g(iw)*exp_table(it+1,iw-nbegin_g+1))/(2.d0*pi)
714                    g_tmp(:,:)=g_tmp(:,:)+im_g_mat(:,:,iw-nbegin_g+1)*factor
715                 enddo
716#if defined(__MPI)
717!the distribution of times on procs is the same of that for frequecies
718                 iproc_time=it/l_blk
719!all processors sums to iproc_time
720                 if(iproc_time==mpime) then
721                    call MPI_REDUCE(g_tmp,im_g_mat_t(1,1,it-nbegin+1),numl*numl,&
722                         &MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr)
723                 else
724                    call MPI_REDUCE(g_tmp,g_dumm,numl*numl,MPI_DOUBLE_PRECISION,MPI_SUM,iproc_time,world_comm,ierr)
725                 endif
726
727#else
728                 im_g_mat_t(:,:,it+1)=g_tmp(:,:)
729
730#endif
731
732
733              enddo
734              deallocate(g_tmp,g_dumm)
735           else!for whole self-energy calculates directly in time domain
736              do it=nbegin,nend
737                 call dgemm('T','N',numl,numl,numt,1.d0,ttl_j%tt_mat,numt,re_e_mat_part(1,1,it-nbegin+1),&
738                      &numt,0.d0,re_g_mat_t(1,1,it-nbegin+1),numl)
739                 call dgemm('T','N',numl,numl,numt,1.d0,ttl_j%tt_mat,numt,im_e_mat_part(1,1,it-nbegin+1),&
740                      &numt,0.d0,im_g_mat_t(1,1,it-nbegin+1),numl)
741              enddo
742           endif
743           if( options%l_verbose) write(stdout,*) 'done'
744           FLUSH(stdout)
745
746
747
748
749!loop on frequency
750
751
752           write(stdout,*) 'Products in imaginary time:'
753           FLUSH(stdout)
754
755
756           if( .not.(ss%whole_s .and.l_distribute_sm )) then
757              allocate(tmp_mat(numpw,numl),re_h_mat(numpw,numpw),im_h_mat(numpw,numpw))
758              do it=nbegin,nend
759
760
761!matrix multiply
762                 call dgemm('N','N',numpw,numl,numl,1.d0,vtl_j%vt_mat,numpw,re_g_mat_t(1,1,it-nbegin+1),numl,0.d0,tmp_mat,numpw)
763                 call dgemm('N','T',numpw,numpw,numl,1.d0,tmp_mat,numpw, vtl%vt_mat,numpw,0.d0,re_h_mat,numpw)
764
765                 call dgemm('N','N',numpw,numl,numl,1.d0,vtl_j%vt_mat,numpw,im_g_mat_t(1,1,it-nbegin+1),numl,0.d0,tmp_mat,numpw)
766                 call dgemm('N','T',numpw,numpw,numl,1.d0,tmp_mat,numpw, vtl%vt_mat,numpw,0.d0,im_h_mat,numpw)
767
768!product
769
770                 if(ii==jj) then
771                    ss%diag(ii,it+ss%n+1,is)=DDOT(numpw*numpw,re_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)+&
772                         &DDOT(numpw*numpw,im_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)
773                    ss%diag(ii,ss%n+1-it,is)=DDOT(numpw*numpw,re_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)-&
774                         &DDOT(numpw*numpw,im_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)
775
776!if required add semicore terms
777                    if(options%l_semicore) then
778!NOT_TO_BE_INCLUDED_START
779                       allocate(tmp_vec_sc(numpw))
780                       do iv_sc=1,sc%n_semicore
781                          if(it==0) write(stdout,*) 'SEMICORE PRIMA',iv_sc,ii, ss%diag(ii,it+ss%n+1,is)
782                          call dgemv('N',numpw,numpw,1.d0,pw_mat_t(1,1,it-nbegin+1),numpw,&
783                               &sc%ppw_mat(1,iv_sc,ii),1,0.d0,tmp_vec_sc,1)
784                          ss%diag(ii,it+ss%n+1,is)= ss%diag(ii,it+ss%n+1,is)- &!ATTENZIONE ERA +
785                               &DDOT(numpw,tmp_vec_sc,1,sc%ppw_mat(1,iv_sc,ii),1)*exp((offset+sc%en_sc(iv_sc))*tf%times(it))
786                          if(it==0) write(stdout,*) 'SEMICORE',iv_sc,ii, ss%diag(ii,it+ss%n+1,is)
787                          if((offset+sc%en_sc(iv_sc))*tf%times(it)>0) write(stdout,*) 'OCIO!!!'!DEBUG ATTENZIONE
788                       enddo
789                       deallocate(tmp_vec_sc)
790!NOT_TO_BE_INCLUDED_END
791                    endif
792                 endif
793                 if(ss%whole_s) then
794                    ss%whole(jj,ii,it+ss%n+1,is)=DDOT(numpw*numpw,re_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)+&
795                         &DDOT(numpw*numpw,im_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)
796                    ss%whole(jj,ii,ss%n+1-it,is)=DDOT(numpw*numpw,re_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)-&
797                         &DDOT(numpw*numpw,im_h_mat,1,pw_mat_t(1,1,it-nbegin+1),1)
798
799                 endif
800              enddo
801              deallocate(re_h_mat,im_h_mat)
802              deallocate(tmp_mat)
803           else
804              allocate(tmp_mat(numl,numl))
805              do it=nbegin,nend
806                 call dgemm('T','N',numl,numl,numpw,1.d0,vtl_j%vt_mat,numpw,pw_part_t(1,1,it-nbegin+1),numpw,0.d0,tmp_mat,numl)
807                 if(ii==jj) then
808                    ss%diag(ii,it+ss%n+1,is)=DDOT(numl*numl,re_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)+&
809                         &DDOT(numl*numl,im_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)
810                    ss%diag(ii,ss%n+1-it,is)=DDOT(numl*numl,re_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)-&
811                         &DDOT(numl*numl,im_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)
812                 endif
813
814                 ss%whole(jj,ii,it+ss%n+1,is)=DDOT(numl*numl,re_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)+&
815                      &DDOT(numl*numl,im_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)
816                 ss%whole(jj,ii,ss%n+1-it,is)=DDOT(numl*numl,re_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)-&
817                      &DDOT(numl*numl,im_g_mat_t(1,1,it-nbegin+1),1,tmp_mat,1)
818
819
820              enddo
821              deallocate(tmp_mat)
822           endif
823           if( options%l_verbose) write(stdout,*) 'done'
824           FLUSH(stdout)
825
826
827
828!mp_sum for distributing on all processors
829           if(ii==jj) then
830              call mp_sum(ss%diag(ii,1:2*ss%n+1,is),world_comm)
831           endif
832           if(ss%whole_s) then
833              call mp_sum(ss%whole(jj,ii,1:2*ss%n+1,is),world_comm)
834           endif
835
836
837!FT of diagonal part of self-nergy
838
839
840
841           !deallocate(re_g_mat_t, im_g_mat_t)
842
843           call free_memory(vtl_j)
844           call free_memory(ttl_j)
845
846
847        enddo !jj
848
849        call free_memory(vtl)
850        call free_memory(ttl)
851
852     enddo!on i_min, i_max
853
854  enddo!on i_cycles
855
856
857!put factor due to FFT on imaginary axis
858     ss%diag(:,:,is)=ss%diag(:,:,is)*(0.d0,1.d0)
859
860     if(ss%whole_s) then
861        ss%whole(:,:,:,is)=ss%whole(:,:,:,is)*(0.d0,1.d0)
862     endif
863     deallocate(re_g_mat,im_g_mat)
864     deallocate(re_g_mat_t,im_g_mat_t)
865
866     deallocate(pw_mat_t)
867     if(.not.l_single) then
868        deallocate(re_e_mat,im_e_mat)
869     else
870        deallocate(re_e_mat_single, im_e_mat_single, e_mat_double)
871     endif
872     if( ss%whole_s .and.l_distribute_sm ) then
873        deallocate(st_save,sl_save)
874        if(.not.l_single)  deallocate(re_e_mat_t,im_e_mat_t)
875        if(.not.l_single)  deallocate(re_e_mat_part,im_e_mat_part,pw_part_t)
876     endif
877     deallocate(exp_table)
878     call free_memory(lc)
879  enddo!on spin
880  call free_memory(uu)
881  call free_memory(sc)
882  return
883end subroutine do_self_lanczos_time
884
885
886
887subroutine solve_lanczos_single(alpha,re_e_mat,im_e_mat,lc)
888!this subroutine sums to  the matrix E_{no}=<t_n|(H-alpha)^-1|t_o>
889
890  USE kinds,            ONLY : DP
891  USE basic_structures, ONLY : lanczos_chain, initialize_memory,free_memory
892  USE io_global,        ONLY : stdout
893  USE mp,               ONLY : mp_sum,mp_bcast
894  USE mp_world,         ONLY : nproc, mpime, world_comm
895
896  implicit none
897
898
899  COMPLEX(kind=DP) :: alpha!constant for Ev+iw
900  TYPE(lanczos_chain) :: lc!lanczos chain descriptor
901  REAL(kind=4) :: re_e_mat(lc%numt,lc%numt) !real part of matrix to be calculated
902  REAL(kind=4) :: im_e_mat(lc%numt,lc%numt) !imaginary part of matrix to be calculated
903
904
905
906  INTEGER :: io,info,ii,jj
907  COMPLEX(kind=DP), ALLOCATABLE :: dl(:),du(:),d(:),t(:)
908  COMPLEX(kind=DP), ALLOCATABLE :: omat(:,:)
909  REAL(kind=DP), ALLOCATABLE :: tmp_mat(:,:)
910  INTEGER :: l_blk,nbegin,nend, iproc
911  COMPLEX(kind=DP), ALLOCATABLE :: e_mat(:)
912
913
914
915  l_blk= (lc%numt)/nproc
916  if(l_blk*nproc < (lc%numt)) l_blk = l_blk+1
917  nbegin=mpime*l_blk+1
918  nend=nbegin+l_blk-1
919
920  allocate(dl(lc%num_steps-1),du(lc%num_steps-1),d(lc%num_steps),t(lc%num_steps))
921  re_e_mat(:,:)=0.0
922  im_e_mat(:,:)=0.0
923  allocate(omat(lc%numt,lc%num_steps))
924  allocate(tmp_mat(lc%numt,lc%num_steps))
925  allocate(e_mat(lc%numt))
926!loop on o
927  do io=1,lc%numt
928!!set up vectors for lapack routine
929     !recover matrix from processor
930     tmp_mat(:,:)=0.d0
931     if(io >= nbegin .and. io <= nend) then
932        tmp_mat(1:lc%numt,1:lc%num_steps)=lc%o_mat(1:lc%numt,1:lc%num_steps,io-nbegin+1)
933     endif
934     iproc=(io-1)/l_blk
935     call mp_bcast(tmp_mat(:,:), iproc, world_comm)
936     omat(:,:)=dcmplx(tmp_mat(:,:),0.d0)
937
938
939
940     dl(1:lc%num_steps-1)=cmplx(lc%f(1:lc%num_steps-1,io),0.d0)
941     du(1:lc%num_steps-1)=cmplx(lc%f(1:lc%num_steps-1,io),0.d0)
942     d(1:lc%num_steps)=cmplx(lc%d(1:lc%num_steps,io),0.d0)+alpha
943     t(:)=(0.d0,0.d0)
944     t(1)=(1.d0,0.d0)
945
946!!call lapack
947     call zgtsv(lc%num_steps,1,dl,d,du,t,lc%num_steps,info)
948     if(info /= 0) then
949        write(stdout,*) 'ZGTSV info:', info
950        FLUSH(stdout)
951        stop
952     endif
953
954
955
956!!calculate term
957
958     call zgemm('N','N',lc%numt,1,lc%num_steps,(1.d0,0.d0),omat,lc%numt,t,lc%num_steps,(0.d0,0.d0),e_mat,lc%numt)
959     re_e_mat(1:lc%numt,io)=re_e_mat(1:lc%numt,io)+real(e_mat(1:lc%numt))
960     im_e_mat(1:lc%numt,io)=im_e_mat(1:lc%numt,io)+imag(e_mat(1:lc%numt))
961  enddo
962
963
964  do ii=1,lc%numt
965     do jj=ii+1,lc%numt
966        re_e_mat(ii,jj)=0.5*(re_e_mat(ii,jj)+re_e_mat(jj,ii))
967        re_e_mat(jj,ii)=re_e_mat(ii,jj)
968        im_e_mat(ii,jj)=0.5*(im_e_mat(ii,jj)+im_e_mat(jj,ii))
969        im_e_mat(jj,ii)=im_e_mat(ii,jj)
970
971     enddo
972  enddo
973
974  deallocate(dl,du,d,t)
975  deallocate(omat,tmp_mat)
976  deallocate(e_mat)
977  return
978end  subroutine solve_lanczos_single
979
980
981subroutine solve_lanczos_fake_single(lc)
982
983!this subroutine is a parallel fake routine for the solve lanczos routine
984
985  USE kinds,            ONLY : DP
986  USE basic_structures, ONLY : lanczos_chain, initialize_memory,free_memory
987  USE io_global,        ONLY : stdout
988  USE mp,               ONLY : mp_sum,mp_bcast
989  USE mp_world,         ONLY : nproc,mpime,world_comm
990
991  implicit none
992
993  TYPE(lanczos_chain) :: lc!lanczos chain descriptor
994
995  INTEGER :: l_blk,nbegin,nend, iproc
996  REAL(kind=DP), ALLOCATABLE :: o_mat(:,:)
997  INTEGER :: io
998
999
1000  allocate(o_mat(lc%numt,lc%num_steps))
1001  l_blk= (lc%numt)/nproc
1002  if(l_blk*nproc < (lc%numt)) l_blk = l_blk+1
1003  nbegin=mpime*l_blk+1
1004  nend=nbegin+l_blk-1
1005
1006
1007!loop on io
1008  do io=1,lc%numt
1009!recover matrix from processor
1010     o_mat(:,:)=0.d0
1011     if(io >= nbegin .and. io <= nend) then
1012        o_mat(:,:)=lc%o_mat(:,:,io-nbegin+1)
1013     endif
1014     iproc=(io-1)/l_blk
1015     call mp_bcast(o_mat(:,:), iproc, world_comm)
1016
1017
1018  enddo
1019
1020  deallocate(o_mat)
1021end subroutine solve_lanczos_fake_single
1022
1023
1024