1
2! Tangled code
3  MODULE m_pexsi_local_dos
4#ifdef SIESTA__PEXSI
5  private
6  public :: pexsi_local_dos
7
8  CONTAINS
9  subroutine pexsi_local_dos( )
10    use m_energies
11
12    use sparse_matrices
13    USE siesta_options
14    use siesta_geom
15    use atomlist,       only: indxuo, indxua
16    use atomlist,       only: qtot, no_u, no_l
17    use atomlist,       only: iphorb
18    use atomlist,       only: datm, no_s, iaorb
19    use fdf
20    use files,          only : slabel
21    use files,          only : filesOut_t ! type for output file names
22    use parallel,       only:  SIESTA_worker
23    use m_ntm
24    use m_forces,       only: fa
25    use m_spin,         only: nspin
26    use atomlist,       only: qs => qtots
27    use m_energies,     only: efs
28    use m_dhscf,        only: dhscf
29
30    implicit none
31
32    integer   :: dummy_iscf = 1
33    real(dp)  :: dummy_str(3,3), dummy_strl(3,3)  ! for dhscf call
34    real(dp)  :: dummy_dipol(3)
35
36    real(dp)  :: factor, g2max
37    real(dp)  :: energy, broadening
38
39    type(filesOut_t)     :: filesOut  ! blank output file names
40
41    energy = fdf_get('PEXSI.LDOS.Energy',0.0_dp,"Ry")
42    broadening = fdf_get('PEXSI.LDOS.Broadening',0.01_dp,"Ry")
43
44    ! Note that we re-use Dscf, so it will be obliterated
45    call get_LDOS_SI( no_u, no_l, nspin,  &
46         maxnh, numh, listh, H, S,  &
47         Dscf, energy, broadening)
48
49    if (SIESTA_worker) then
50       !Find the LDOS in the real space mesh
51       filesOut%rho = trim(slabel) // '.LDSI'
52       g2max = g2cut
53
54       ! There is too much clutter here, because dhscf is
55       ! a "kitchen-sink" routine that does too many things
56
57       call dhscf( nspin, no_s, iaorb, iphorb, no_l, &
58            no_u, na_u, na_s, isa, xa_last, indxua,  &
59            ntm, 0, 0, 0, filesOut,                  &
60            maxnh, numh, listhptr, listh, Dscf, Datm, maxnh, H, &
61            Enaatm, Enascf, Uatm, Uscf, DUscf, DUext, Exc, Dxc, &
62            dummy_dipol, dummy_str, fa, dummy_strl )
63    endif
64
65  END subroutine pexsi_local_dos
66  subroutine get_LDOS_SI( no_u, no_l, nspin_in,  &
67       maxnh, numh, listh, H, S,  &
68       LDOS_DM, energy, broadening)
69
70  use precision, only  : dp
71  use fdf
72  use units,       only: eV, pi
73  use sys,         only: die
74  use m_mpi_utils, only: broadcast
75  use parallel, only   : SIESTA_worker, BlockSize
76  use parallel, only   : SIESTA_Group, SIESTA_Comm
77  use m_redist_spmatrix, only: aux_matrix, redistribute_spmatrix
78  use class_Distribution
79  use alloc,             only: re_alloc, de_alloc
80  use mpi_siesta
81  use iso_c_binding
82  use f_ppexsi_interface, only: f_ppexsi_options
83  use f_ppexsi_interface, only: f_ppexsi_plan_finalize
84  use f_ppexsi_interface, only: f_ppexsi_plan_initialize
85  use f_ppexsi_interface, only: f_ppexsi_selinv_complex_symmetric_matrix
86  use f_ppexsi_interface, only: f_ppexsi_load_real_symmetric_hs_matrix
87  use f_ppexsi_interface, only: f_ppexsi_set_default_options
88  use f_ppexsi_interface, &
89        only: f_ppexsi_symbolic_factorize_complex_symmetric_matrix
90
91  implicit          none
92
93  integer, intent(in)          :: maxnh, no_u, no_l, nspin_in
94  integer, intent(in), target  :: listh(maxnh), numh(no_l)
95  real(dp), intent(in), target :: H(maxnh,nspin_in), S(maxnh)
96  real(dp), intent(in)         :: energy, broadening
97  real(dp), intent(out)        :: LDOS_DM(maxnh,nspin_in)
98  integer        :: ih, i
99  integer        :: info
100  logical        :: write_ok
101  !------------
102  external         :: timer
103  integer          :: World_Comm, mpirank, ierr
104  !
105  integer   :: norbs
106  integer   :: nspin
107  integer :: PEXSI_Pole_Group, PEXSI_Spatial_Group, World_Group
108  integer, allocatable :: pexsi_pole_ranks_in_world(:)
109  integer  :: nworkers_SIESTA
110  integer, allocatable :: siesta_ranks_in_world(:)
111  integer :: PEXSI_Pole_Group_in_World
112  integer, allocatable :: PEXSI_Pole_ranks_in_World_Spin(:,:)
113  integer :: PEXSI_Pole_Comm, PEXSI_Spatial_Comm, PEXSI_Spin_Comm
114  integer :: numNodesTotal
115  integer :: npPerPole
116  logical  :: PEXSI_worker
117  !
118  type(Distribution)   :: dist1
119  type(Distribution), allocatable, target   :: dist2_spin(:)
120  type(Distribution), pointer :: dist2
121  integer  :: pbs, color, spatial_rank, spin_rank
122  type(aux_matrix), allocatable, target :: m1_spin(:)
123  type(aux_matrix) :: m2
124  type(aux_matrix), pointer :: m1
125  integer :: nrows, nnz, nnzLocal, numColLocal
126  integer, pointer, dimension(:) ::  colptrLocal=> null(), rowindLocal=>null()
127  !
128  real(dp), pointer, dimension(:) :: &
129          HnzvalLocal=>null(), SnzvalLocal=>null(),  &
130          DMnzvalLocal => null()
131  !
132  integer :: ispin, pexsi_spin
133  type(f_ppexsi_options) :: options
134  !
135  integer                :: isSIdentity
136  integer                :: verbosity
137  integer(c_intptr_t)    :: plan
138    integer :: numProcRow, numProcCol
139    integer :: outputFileIndex
140  real(dp), pointer, dimension(:) :: AnzvalLocal => null()
141  real(dp), pointer, dimension(:) :: AinvnzvalLocal => null()
142  integer :: loc
143  !  --------  for serial compilation
144  !
145  ! Our global communicator is a duplicate of the passed communicator
146  !
147  call MPI_Comm_Dup(true_MPI_Comm_World, World_Comm, ierr)
148  call mpi_comm_rank( World_Comm, mpirank, ierr )
149
150  call timer("pexsi-ldos", 1)
151
152  if (SIESTA_worker) then
153     ! rename some intent(in) variables, which are only
154     ! defined for the Siesta subset
155     norbs = no_u
156     nspin = nspin_in
157  endif
158  !
159  call broadcast(norbs,comm=World_Comm)
160  call broadcast(nspin,World_Comm)
161  call MPI_Comm_Group(World_Comm,World_Group, ierr)
162  call MPI_Group_Size(SIESTA_Group, nworkers_SIESTA, ierr)
163  allocate(siesta_ranks_in_world(nworkers_SIESTA))
164  call MPI_Group_translate_ranks( SIESTA_Group, nworkers_SIESTA, &
165       (/ (i,i=0,nworkers_SIESTA-1) /), &
166       World_Group, siesta_ranks_in_world, ierr )
167  call newDistribution(dist1,World_Comm,siesta_ranks_in_world, &
168                       TYPE_BLOCK_CYCLIC,BlockSize,"bc dist")
169  deallocate(siesta_ranks_in_world)
170  call MPI_Barrier(World_Comm,ierr)
171
172  call mpi_comm_size( World_Comm, numNodesTotal, ierr )
173
174  npPerPole  = fdf_get("PEXSI.np-per-pole",4)
175  npPerPole  = fdf_get("PEXSI.LDOS.np-per-pole",npPerPole)
176  if (nspin*npPerPole > numNodesTotal) &
177       call die("PEXSI.np-per-pole is too big for MPI size")
178
179  ! "Row" communicator for independent PEXSI operations on each spin
180  ! The name refers to "spatial" degrees of freedom.
181  color = mod(mpirank,nspin)    ! {0,1} for nspin = 2, or {0} for nspin = 1
182  call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spatial_Comm, ierr)
183
184  ! "Column" communicator for spin reductions
185  color = mpirank/nspin
186  call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spin_Comm, ierr)
187
188  ! Group and Communicator for first-pole team of PEXSI workers
189  !
190  call MPI_Comm_Group(PEXSI_Spatial_Comm, PEXSI_Spatial_Group, Ierr)
191  call MPI_Group_incl(PEXSI_Spatial_Group, npPerPole,   &
192       (/ (i,i=0,npPerPole-1) /),&
193       PEXSI_Pole_Group, Ierr)
194  call MPI_Comm_create(PEXSI_Spatial_Comm, PEXSI_Pole_Group,&
195       PEXSI_Pole_Comm, Ierr)
196
197
198  call mpi_comm_rank( PEXSI_Spatial_Comm, spatial_rank, ierr )
199  call mpi_comm_rank( PEXSI_Spin_Comm, spin_rank, ierr )
200  PEXSI_worker = (spatial_rank < npPerPole)   ! Could be spin up or spin down
201
202  ! PEXSI blocksize
203  pbs = norbs/npPerPole
204
205  ! Careful with this. For the purposes of matrix transfers,
206  ! we need the ranks of the Pole group
207  ! in the "bridge" communicator/group (World)
208
209  allocate(pexsi_pole_ranks_in_world(npPerPole))
210  call MPI_Comm_Group(World_Comm, World_Group, Ierr)
211
212  call MPI_Group_translate_ranks( PEXSI_Pole_Group, npPerPole, &
213       (/ (i,i=0,npPerPole-1) /), &
214       World_Group, pexsi_pole_ranks_in_world, ierr )
215
216  ! What we need is to include the actual world ranks
217  ! in the distribution object
218  allocate (PEXSI_Pole_ranks_in_World_Spin(npPerPole,nspin))
219  call MPI_AllGather(pexsi_pole_ranks_in_world,npPerPole,MPI_integer,&
220       PEXSI_Pole_Ranks_in_World_Spin(1,1),npPerPole, &
221       MPI_integer,PEXSI_Spin_Comm,ierr)
222
223  ! Create distributions known to all nodes
224  allocate(dist2_spin(nspin))
225  do ispin = 1, nspin
226     call newDistribution(dist2_spin(ispin), World_Comm, &
227          PEXSI_Pole_Ranks_in_World_Spin(:,ispin),  &
228          TYPE_PEXSI, pbs, "px dist")
229  enddo
230  deallocate(pexsi_pole_ranks_in_world,PEXSI_Pole_Ranks_in_World_Spin)
231  call MPI_Barrier(World_Comm,ierr)
232
233
234  pexsi_spin = spin_rank+1  ! {1,2}
235  ! This is done serially on the Siesta side, each time
236  ! filling in the structures in one PEXSI set
237
238  allocate(m1_spin(nspin))
239  do ispin = 1, nspin
240
241     m1 => m1_spin(ispin)
242
243     if (SIESTA_worker) then
244        m1%norbs = norbs
245        m1%no_l  = no_l
246        m1%nnzl  = sum(numH(1:no_l))
247        m1%numcols => numH
248        m1%cols    => listH
249        allocate(m1%vals(2))
250        m1%vals(1)%data => S(:)
251        m1%vals(2)%data => H(:,ispin)
252
253     endif  ! SIESTA_worker
254
255     call timer("redist_orbs_fwd", 1)
256
257     ! Note that we cannot simply wrap this in a pexsi_spin test, as
258     ! there are Siesta nodes in both spin sets.
259     ! We must discriminate the PEXSI workers by the distribution info
260     dist2 => dist2_spin(ispin)
261     call redistribute_spmatrix(norbs,m1,dist1,m2,dist2,World_Comm)
262
263     call timer("redist_orbs_fwd", 2)
264
265     if (PEXSI_worker .and. (pexsi_spin == ispin) ) then
266
267        nrows = m2%norbs          ! or simply 'norbs'
268        numColLocal = m2%no_l
269        nnzLocal    = m2%nnzl
270        call MPI_AllReduce(nnzLocal,nnz,1,MPI_integer,MPI_sum,PEXSI_Pole_Comm,ierr)
271
272        call re_alloc(colptrLocal,1,numColLocal+1,"colptrLocal","pexsi_ldos")
273        colptrLocal(1) = 1
274        do ih = 1,numColLocal
275           colptrLocal(ih+1) = colptrLocal(ih) + m2%numcols(ih)
276        enddo
277
278        rowindLocal => m2%cols
279        SnzvalLocal => m2%vals(1)%data
280        HnzvalLocal => m2%vals(2)%data
281
282        call re_alloc(DMnzvalLocal,1,nnzLocal,"DMnzvalLocal","pexsi_ldos")
283
284        call memory_all("after transferring H+S for PEXSI-LDOS",PEXSI_Pole_Comm)
285
286     endif ! PEXSI worker
287  enddo
288
289  ! Make these available to all
290  ! (Note that the values are those on process 0, which is in the spin=1 set
291  ! In fact, they are only needed for calls to the interface, so the broadcast
292  ! could be over PEXSI_Spatial_Comm only.
293
294  call MPI_Bcast(nrows,1,MPI_integer,0,World_Comm,ierr)
295  call MPI_Bcast(nnz,1,MPI_integer,0,World_Comm,ierr)
296
297  call memory_all("after setting up H+S for PEXSI LDOS",World_comm)
298
299  ! --
300    isSIdentity = 0
301  !
302    call f_ppexsi_set_default_options( options )
303    ! Ordering flag:
304    !   1: Use METIS
305    !   0: Use PARMETIS/PTSCOTCH
306    options%ordering = fdf_get("PEXSI.ordering",1)
307    ! Number of processors for symbolic factorization
308    ! Only relevant for PARMETIS/PT_SCOTCH
309    options%npSymbFact = fdf_get("PEXSI.np-symbfact",1)
310    options%verbosity = fdf_get("PEXSI.verbosity",1)
311    call get_row_col(npPerPole,numProcRow,numProcCol)
312
313    ! Set the outputFileIndex to be the pole index.
314    ! Starting from PEXSI v0.8.0, the first processor for each pole outputs
315    ! information
316
317    if( mod( mpirank, npPerPole ) .eq. 0 ) then
318      outputFileIndex = mpirank / npPerPole;
319    else
320      outputFileIndex = -1;
321    endif
322  !
323  ! Note that even though we only use one pole's worth of processors, we
324  ! still use the full spatial PEXSI communicator in the plan.
325  ! Failing to do so leads to an error. This is not sufficiently documented.
326  !
327    plan = f_ppexsi_plan_initialize(&
328        PEXSI_Spatial_Comm,&
329        numProcRow,&
330        numProcCol,&
331        outputFileIndex,&
332        info)
333
334  call check_info(info,"plan_initialize in LDOS")
335  call f_ppexsi_load_real_symmetric_hs_matrix(&
336        plan,&
337        options,&
338        nrows,&
339        nnz,&
340        nnzLocal,&
341        numColLocal,&
342        colptrLocal,&
343        rowindLocal,&
344        HnzvalLocal,&
345        isSIdentity,&
346        SnzvalLocal,&
347        info)
348
349  call check_info(info,"load_real_sym_hs_matrix in LDOS")
350
351    call f_ppexsi_symbolic_factorize_complex_symmetric_matrix(&
352         plan, &
353         options,&
354         info)
355    call check_info(info,"factorize complex matrix in LDOS")
356
357  options%isSymbolicFactorize = 0 ! We do not need it anymore
358  if (PEXSI_worker) then
359
360     if(mpirank == 0) then
361        write(6,"(a,f16.5,f10.5)") &
362             'Calling PEXSI LDOS routine. Energy and broadening (eV) ', &
363             energy/eV, broadening/eV
364        write(6,"(a,i4)") &
365             'Processors working on selected inversion: ', npPerPole
366     endif
367
368     call timer("pexsi-ldos-selinv", 1)
369
370     ! Build AnzvalLocal as H-zS, where z=(E,broadening), and treat
371     ! it as a one-dimensional real array with 2*nnzlocal entries
372
373     call re_alloc(AnzvalLocal,1,2*nnzLocal,"AnzvalLocal","pexsi_ldos")
374     call re_alloc(AinvnzvalLocal,1,2*nnzLocal,"AinvnzvalLocal","pexsi_ldos")
375
376     loc = 1
377     do i = 1, nnzLocal
378        AnzvalLocal(loc) = Hnzvallocal(i) - energy*Snzvallocal(i)
379        AnzvalLocal(loc+1) =  - broadening*Snzvallocal(i)
380        loc = loc + 2
381     enddo
382
383     call f_ppexsi_selinv_complex_symmetric_matrix(&
384          plan,&
385          options,&
386          AnzvalLocal,&
387          AinvnzvalLocal,&
388          info)
389
390     call check_info(info,"selinv complex matrix in LDOS")
391
392     ! Get DMnzvalLocal as 1/pi * Imag(Ainv...)
393
394     loc = 1
395     do i = 1, nnzLocal
396        DMnzvalLocal(i) = (1.0_dp/pi) * AinvnzvalLocal(loc+1)
397        loc = loc + 2
398     enddo
399     call de_alloc(AnzvalLocal,"AnzvalLocal","pexsi_ldos")
400     call de_alloc(AinvnzvalLocal,"AinvnzvalLocal","pexsi_ldos")
401
402     call timer("pexsi-ldos-selinv", 2)
403     !
404  endif ! PEXSI_worker
405
406  do ispin = 1, nspin
407
408     m1 => m1_spin(ispin)
409
410     if (PEXSI_worker .and. (pexsi_spin == ispin)) then
411        ! Prepare m2 to transfer
412
413        call de_alloc(colPtrLocal,"colPtrLocal","pexsi_ldos")
414
415        call de_alloc(m2%vals(1)%data,"m2%vals(1)%data","pexsi_ldos")
416        call de_alloc(m2%vals(2)%data,"m2%vals(2)%data","pexsi_ldos")
417
418       deallocate(m2%vals)
419       allocate(m2%vals(1))
420       m2%vals(1)%data => DMnzvalLocal(1:nnzLocal)
421
422     endif
423
424     ! Prepare m1 to receive the results
425     if (SIESTA_worker) then
426        nullify(m1%vals(1)%data)    ! formerly pointing to S
427        nullify(m1%vals(2)%data)    ! formerly pointing to H
428        deallocate(m1%vals)
429        nullify(m1%numcols)         ! formerly pointing to numH
430        nullify(m1%cols)            ! formerly pointing to listH
431     endif
432
433     call timer("redist_orbs_bck", 1)
434     dist2 => dist2_spin(ispin)
435     call redistribute_spmatrix(norbs,m2,dist2,m1,dist1,World_Comm)
436     call timer("redist_orbs_bck", 2)
437
438     if (PEXSI_worker .and. (pexsi_spin == ispin)) then
439        call de_alloc(DMnzvalLocal, "DMnzvalLocal", "pexsi_ldos")
440
441        nullify(m2%vals(1)%data)    ! formerly pointing to DM
442        deallocate(m2%vals)
443        ! allocated in the direct transfer
444        call de_alloc(m2%numcols,"m2%numcols","pexsi_ldos")
445        call de_alloc(m2%cols,   "m2%cols",   "pexsi_ldos")
446     endif
447
448     if (SIESTA_worker) then
449
450        LDOS_DM(:,ispin)  = m1%vals(1)%data(:)
451        ! Check no_l
452        if (no_l /= m1%no_l) then
453           call die("Mismatch in no_l")
454        endif
455        ! Check listH
456        if (any(listH(:) /= m1%cols(:))) then
457           call die("Mismatch in listH")
458        endif
459
460        call de_alloc(m1%vals(1)%data,"m1%vals(1)%data","pexsi_ldos")
461        deallocate(m1%vals)
462        ! allocated in the direct transfer
463        call de_alloc(m1%numcols,"m1%numcols","pexsi_ldos")
464        call de_alloc(m1%cols,   "m1%cols",   "pexsi_ldos")
465
466     endif
467  enddo
468  call timer("pexsi-ldos", 2)
469
470
471  call delete(dist1)
472  do ispin = 1, nspin
473     call delete(dist2_spin(ispin))
474  enddo
475  deallocate(dist2_spin)
476  deallocate(m1_spin)
477
478  call MPI_Comm_Free(PEXSI_Spatial_Comm, ierr)
479  call MPI_Comm_Free(PEXSI_Spin_Comm, ierr)
480  call MPI_Comm_Free(World_Comm, ierr)
481
482  ! This communicator was created from a subgroup.
483  ! As such, it is MPI_COMM_NULL for those processes
484  ! not in the subgroup (non PEXSI_workers). Only
485  ! defined communicators can be freed
486  if (PEXSI_worker) then
487     call MPI_Comm_Free(PEXSI_Pole_Comm, ierr)
488  endif
489
490  ! We finalize the plan here
491  call f_ppexsi_plan_finalize( plan, info )
492
493  call MPI_Group_Free(PEXSI_Spatial_Group, ierr)
494  call MPI_Group_Free(PEXSI_Pole_Group, ierr)
495  call MPI_Group_Free(World_Group, ierr)
496
497  CONTAINS
498
499  subroutine get_row_col(np,nrow,ncol)
500  integer, intent(in)  :: np
501  integer, intent(out) :: nrow, ncol
502  !
503  ! Finds the factors nrow and ncol such that nrow*ncol=np,
504  ! are as similar as possible, and nrow>=ncol.
505  ! For prime np, ncol=1, nrow=np.
506
507  ncol  = floor(sqrt(dble(np)))
508  do
509    nrow = np/ncol
510    if (nrow*ncol == np) exit
511    ncol = ncol - 1
512  enddo
513  end subroutine get_row_col
514
515  subroutine check_info(info,str)
516  integer, intent(in) :: info
517  character(len=*), intent(in) :: str
518
519      if(mpirank == 0) then
520         if (info /= 0) then
521            write(6,*) trim(str) // " info : ", info
522            call die("Error exit from " // trim(str) // " routine")
523         endif
524        call pxfflush(6)
525      endif
526  end subroutine check_info
527
528  end subroutine get_LDOS_SI
529#endif
530  end module m_pexsi_local_dos
531! End of tangled code
532