1-*- mode: Org -*-
2#+TITLE: Literate version of PEXSI LDOS calculator
3#+AUTHOR: Alberto Garcia
4
5* Introduction
6
7This is a spin-polarized version of the SIESTA-PEXSI interface for the calculation of
8the LDOS (which is a "partial" DM computed in a restricted energy window).
9Module structure:
10
11#+BEGIN_SRC f90 :noweb-ref  module-structure
12  MODULE m_pexsi_local_dos
13#ifdef SIESTA__PEXSI
14  private
15  public :: pexsi_local_dos
16
17  CONTAINS
18  <<siesta-side-parent-routine>>
19  <<pexsi-ldos-routine>>
20#endif
21  end module m_pexsi_local_dos
22#+END_SRC
23
24#+BEGIN_SRC f90 :noweb yes :tangle m_pexsi_local_dos.F90 :exports none
25    ! Tangled code
26    <<module-structure>>
27    ! End of tangled code
28#+END_SRC
29
30* Parent routine with dispatch to Siesta post-processing
31
32This routine defines the energy window (=energy= and =broadening=),
33and calls the PEXSI routine to compute the partial DM, which is then
34passed to the =dhscf= machinery for the calculation of the
35corresponding charge, which is just the LDOS needed. The output file
36will have extension =.LDSI=.
37
38#+BEGIN_SRC f90 :noweb-ref siesta-side-parent-routine
39  subroutine pexsi_local_dos( )
40    use m_energies
41
42    use sparse_matrices
43    USE siesta_options
44    use siesta_geom
45    use atomlist,       only: indxuo, indxua
46    use atomlist,       only: qtot, no_u, no_l
47    use atomlist,       only: iphorb
48    use atomlist,       only: datm, no_s, iaorb
49    use fdf
50    use files,          only : slabel
51    use files,          only : filesOut_t ! type for output file names
52    use parallel,       only:  SIESTA_worker
53    use m_ntm
54    use m_forces,       only: fa
55    use m_spin,         only: nspin
56    use m_dhscf,        only: dhscf
57
58    implicit none
59
60    integer   :: dummy_iscf = 1
61    real(dp)  :: dummy_str(3,3), dummy_strl(3,3)  ! for dhscf call
62    real(dp)  :: dummy_dipol(3)
63
64    real(dp)  :: factor, g2max
65    real(dp)  :: energy, broadening
66
67    type(filesOut_t)     :: filesOut  ! blank output file names
68
69    energy = fdf_get('PEXSI.LDOS.Energy',0.0_dp,"Ry")
70    broadening = fdf_get('PEXSI.LDOS.Broadening',0.01_dp,"Ry")
71
72    ! Note that we re-use Dscf, so it will be obliterated
73    call get_LDOS_SI( no_u, no_l, nspin,  &
74         maxnh, numh, listh, H, S,  &
75         Dscf, energy, broadening)
76
77    if (SIESTA_worker) then
78       !Find the LDOS in the real space mesh
79       filesOut%rho = trim(slabel) // '.LDSI'
80       g2max = g2cut
81
82       ! There is too much clutter here, because dhscf is
83       ! a "kitchen-sink" routine that does too many things
84
85       call dhscf( nspin, no_s, iaorb, iphorb, no_l, &
86            no_u, na_u, na_s, isa, xa_last, indxua,  &
87            ntm, 0, 0, 0, filesOut,                  &
88            maxnh, numh, listhptr, listh, Dscf, Datm, maxnh, H, &
89            Enaatm, Enascf, Uatm, Uscf, DUscf, DUext, Exc, Dxc, &
90            dummy_dipol, dummy_str, fa, dummy_strl )
91    endif
92
93  END subroutine pexsi_local_dos
94#+end_src
95
96
97* PEXSI LDOS routine
98
99** Main structure
100
101This is the main structure of the LDOS routine.
102
103#+begin_src f90 :noweb-ref pexsi-ldos-routine
104<<routine-header>>
105<<routine-variables>>
106!  --------  for serial compilation
107<<define-communicators>>
108<<re-distribute-matrices>>
109<<set-options>>
110<<prepare-plan>>
111<<load-hs-matrices>>
112<<factorization>>
113<<compute-ldos>>
114<<copy-to-siesta-side>>
115<<clean-up>>
116
117CONTAINS
118
119<<support-routines>>
120
121end subroutine get_LDOS_SI
122#+end_src
123
124
125** Routine header
126
127#+BEGIN_SRC f90 :noweb-ref routine-header
128    subroutine get_LDOS_SI( no_u, no_l, nspin_in,  &
129         maxnh, numh, listh, H, S,  &
130         LDOS_DM, energy, broadening)
131
132    <<used-modules>>
133
134    implicit          none
135
136    integer, intent(in)          :: maxnh, no_u, no_l, nspin_in
137    integer, intent(in), target  :: listh(maxnh), numh(no_l)
138    real(dp), intent(in), target :: H(maxnh,nspin_in), S(maxnh)
139    real(dp), intent(in)         :: energy, broadening
140    real(dp), intent(out)        :: LDOS_DM(maxnh,nspin_in)
141#+END_SRC
142
143*** Used modules
144#+BEGIN_SRC f90 :noweb-ref used-modules
145    use precision, only  : dp
146    use fdf
147    use units,       only: eV, pi
148    use sys,         only: die
149    use m_mpi_utils, only: broadcast
150    use parallel, only   : SIESTA_worker, BlockSize
151    use parallel, only   : SIESTA_Group, SIESTA_Comm
152    use m_redist_spmatrix, only: aux_matrix, redistribute_spmatrix
153    use class_Distribution
154    use alloc,             only: re_alloc, de_alloc
155    use mpi_siesta
156    use iso_c_binding
157    use f_ppexsi_interface, only: f_ppexsi_options
158    use f_ppexsi_interface, only: f_ppexsi_plan_finalize
159    use f_ppexsi_interface, only: f_ppexsi_plan_initialize
160    use f_ppexsi_interface, only: f_ppexsi_selinv_complex_symmetric_matrix
161    use f_ppexsi_interface, only: f_ppexsi_load_real_symmetric_hs_matrix
162    use f_ppexsi_interface, only: f_ppexsi_set_default_options
163    use f_ppexsi_interface, &
164          only: f_ppexsi_symbolic_factorize_complex_symmetric_matrix
165#+END_SRC
166
167** Routine variables
168
169The local variables for the routine must be declared in a certain
170place for the compiler, but it is more clear to introduce them as they
171are needed. The =routine-variables= noweb-ref will be used for this
172throughout this document.
173
174#+BEGIN_SRC f90 :noweb-ref routine-variables
175integer        :: ih, i
176integer        :: info
177logical        :: write_ok
178!------------
179external         :: timer
180#+END_SRC
181
182** Define communicators
183
184=World_Comm=, which is in principle set to Siesta's copy of
185=MPI_Comm_World=, is the global communicator.
186
187#+BEGIN_SRC f90 :noweb-ref routine-variables
188integer          :: World_Comm, mpirank, ierr
189!
190integer   :: norbs
191integer   :: nspin
192#+END_SRC
193
194#+BEGIN_SRC f90 :noweb-ref define-communicators
195!
196! Our global communicator is a duplicate of the passed communicator
197!
198call MPI_Comm_Dup(true_MPI_Comm_World, World_Comm, ierr)
199call mpi_comm_rank( World_Comm, mpirank, ierr )
200
201call timer("pexsi-ldos", 1)
202
203if (SIESTA_worker) then
204   ! rename some intent(in) variables, which are only
205   ! defined for the Siesta subset
206   norbs = no_u
207   nspin = nspin_in
208endif
209!
210call broadcast(norbs,comm=World_Comm)
211call broadcast(nspin,World_Comm)
212#+END_SRC
213
214Now we need to define the Siesta distribution object and the
215communicator and distribution object for the first team of PEXSI
216workers, for the purposes of re-distribution of the relevant
217matrices.
218
219For spin, things are a bit more complicated. We need to make sure that
220the distributions are defined and known to all processors (via actual
221ranks) with respect to the same reference bridge communicator. For
222now, this is World_Comm.
223
224#+BEGIN_SRC f90 :noweb-ref routine-variables
225integer :: PEXSI_Pole_Group, PEXSI_Spatial_Group, World_Group
226integer, allocatable :: pexsi_pole_ranks_in_world(:)
227integer  :: nworkers_SIESTA
228integer, allocatable :: siesta_ranks_in_world(:)
229integer :: PEXSI_Pole_Group_in_World
230integer, allocatable :: PEXSI_Pole_ranks_in_World_Spin(:,:)
231integer :: PEXSI_Pole_Comm, PEXSI_Spatial_Comm, PEXSI_Spin_Comm
232integer :: numNodesTotal
233integer :: npPerPole
234logical  :: PEXSI_worker
235!
236type(Distribution)   :: dist1
237type(Distribution), allocatable, target   :: dist2_spin(:)
238type(Distribution), pointer :: dist2
239integer  :: pbs, color, spatial_rank, spin_rank
240#+END_SRC
241
242Define the Siesta distribution. Note that this is known to all nodes.
243
244#+BEGIN_SRC f90 :noweb-ref define-communicators
245  call MPI_Comm_Group(World_Comm,World_Group, ierr)
246  call MPI_Group_Size(SIESTA_Group, nworkers_SIESTA, ierr)
247  allocate(siesta_ranks_in_world(nworkers_SIESTA))
248  call MPI_Group_translate_ranks( SIESTA_Group, nworkers_SIESTA, &
249       (/ (i,i=0,nworkers_SIESTA-1) /), &
250       World_Group, siesta_ranks_in_world, ierr )
251  call newDistribution(dist1,World_Comm,siesta_ranks_in_world, &
252                       TYPE_BLOCK_CYCLIC,BlockSize,"bc dist")
253  deallocate(siesta_ranks_in_world)
254  call MPI_Barrier(World_Comm,ierr)
255#+end_src
256
257For possibly spin-polarized calculations, we split the communicator.
258Note that we give the user the option of requesting more processors
259per pole for the LDOS calculation.
260
261#+BEGIN_SRC f90 :noweb-ref define-communicators
262
263    call mpi_comm_size( World_Comm, numNodesTotal, ierr )
264
265    npPerPole  = fdf_get("PEXSI.np-per-pole",4)
266    npPerPole  = fdf_get("PEXSI.LDOS.np-per-pole",npPerPole)
267    if (nspin*npPerPole > numNodesTotal) &
268         call die("PEXSI.np-per-pole is too big for MPI size")
269
270    ! "Row" communicator for independent PEXSI operations on each spin
271    ! The name refers to "spatial" degrees of freedom.
272    color = mod(mpirank,nspin)    ! {0,1} for nspin = 2, or {0} for nspin = 1
273    call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spatial_Comm, ierr)
274
275    ! "Column" communicator for spin reductions
276    color = mpirank/nspin
277    call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spin_Comm, ierr)
278
279    ! Group and Communicator for first-pole team of PEXSI workers
280    !
281    call MPI_Comm_Group(PEXSI_Spatial_Comm, PEXSI_Spatial_Group, Ierr)
282    call MPI_Group_incl(PEXSI_Spatial_Group, npPerPole,   &
283         (/ (i,i=0,npPerPole-1) /),&
284         PEXSI_Pole_Group, Ierr)
285    call MPI_Comm_create(PEXSI_Spatial_Comm, PEXSI_Pole_Group,&
286         PEXSI_Pole_Comm, Ierr)
287
288
289    call mpi_comm_rank( PEXSI_Spatial_Comm, spatial_rank, ierr )
290    call mpi_comm_rank( PEXSI_Spin_Comm, spin_rank, ierr )
291    PEXSI_worker = (spatial_rank < npPerPole)   ! Could be spin up or spin down
292
293    ! PEXSI blocksize
294    pbs = norbs/npPerPole
295
296    ! Careful with this. For the purposes of matrix transfers,
297    ! we need the ranks of the Pole group
298    ! in the "bridge" communicator/group (World)
299
300    allocate(pexsi_pole_ranks_in_world(npPerPole))
301    call MPI_Comm_Group(World_Comm, World_Group, Ierr)
302
303    call MPI_Group_translate_ranks( PEXSI_Pole_Group, npPerPole, &
304         (/ (i,i=0,npPerPole-1) /), &
305         World_Group, pexsi_pole_ranks_in_world, ierr )
306
307    ! What we need is to include the actual world ranks
308    ! in the distribution object
309    allocate (PEXSI_Pole_ranks_in_World_Spin(npPerPole,nspin))
310    call MPI_AllGather(pexsi_pole_ranks_in_world,npPerPole,MPI_integer,&
311         PEXSI_Pole_Ranks_in_World_Spin(1,1),npPerPole, &
312         MPI_integer,PEXSI_Spin_Comm,ierr)
313
314    ! Create distributions known to all nodes
315    allocate(dist2_spin(nspin))
316    do ispin = 1, nspin
317       call newDistribution(dist2_spin(ispin), World_Comm, &
318            PEXSI_Pole_Ranks_in_World_Spin(:,ispin),  &
319            TYPE_PEXSI, pbs, "px dist")
320    enddo
321    deallocate(pexsi_pole_ranks_in_world,PEXSI_Pole_Ranks_in_World_Spin)
322    call MPI_Barrier(World_Comm,ierr)
323
324#+end_src
325
326** Re-distribute matrices
327
328This is slightly unseemly, but it works. The =aux_matrix= derived
329types are used to store and retrieve the matrices in either side. The
330code is in external auxiliary modules.
331
332#+BEGIN_SRC f90 :noweb-ref routine-variables
333type(aux_matrix), allocatable, target :: m1_spin(:)
334type(aux_matrix) :: m2
335type(aux_matrix), pointer :: m1
336integer :: nrows, nnz, nnzLocal, numColLocal
337integer, pointer, dimension(:) ::  colptrLocal=> null(), rowindLocal=>null()
338!
339real(dp), pointer, dimension(:) :: &
340        HnzvalLocal=>null(), SnzvalLocal=>null(),  &
341        DMnzvalLocal => null()
342!
343integer :: ispin, pexsi_spin
344#+END_SRC
345#+BEGIN_SRC f90 :noweb-ref re-distribute-matrices
346
347  pexsi_spin = spin_rank+1  ! {1,2}
348  ! This is done serially on the Siesta side, each time
349  ! filling in the structures in one PEXSI set
350
351  allocate(m1_spin(nspin))
352  do ispin = 1, nspin
353
354     m1 => m1_spin(ispin)
355
356     if (SIESTA_worker) then
357        m1%norbs = norbs
358        m1%no_l  = no_l
359        m1%nnzl  = sum(numH(1:no_l))
360        m1%numcols => numH
361        m1%cols    => listH
362        allocate(m1%vals(2))
363        m1%vals(1)%data => S(:)
364        m1%vals(2)%data => H(:,ispin)
365
366     endif  ! SIESTA_worker
367
368     call timer("redist_orbs_fwd", 1)
369
370     ! Note that we cannot simply wrap this in a pexsi_spin test, as
371     ! there are Siesta nodes in both spin sets.
372     ! We must discriminate the PEXSI workers by the distribution info
373     dist2 => dist2_spin(ispin)
374     call redistribute_spmatrix(norbs,m1,dist1,m2,dist2,World_Comm)
375
376     call timer("redist_orbs_fwd", 2)
377
378     if (PEXSI_worker .and. (pexsi_spin == ispin) ) then
379
380        nrows = m2%norbs          ! or simply 'norbs'
381        numColLocal = m2%no_l
382        nnzLocal    = m2%nnzl
383        call MPI_AllReduce(nnzLocal,nnz,1,MPI_integer,MPI_sum,PEXSI_Pole_Comm,ierr)
384
385        call re_alloc(colptrLocal,1,numColLocal+1,"colptrLocal","pexsi_ldos")
386        colptrLocal(1) = 1
387        do ih = 1,numColLocal
388           colptrLocal(ih+1) = colptrLocal(ih) + m2%numcols(ih)
389        enddo
390
391        rowindLocal => m2%cols
392        SnzvalLocal => m2%vals(1)%data
393        HnzvalLocal => m2%vals(2)%data
394
395        call re_alloc(DMnzvalLocal,1,nnzLocal,"DMnzvalLocal","pexsi_ldos")
396
397        call memory_all("after transferring H+S for PEXSI-LDOS",PEXSI_Pole_Comm)
398
399     endif ! PEXSI worker
400  enddo
401
402  ! Make these available to all
403  ! (Note that the values are those on process 0, which is in the spin=1 set
404  ! In fact, they are only needed for calls to the interface, so the broadcast
405  ! could be over PEXSI_Spatial_Comm only.
406
407  call MPI_Bcast(nrows,1,MPI_integer,0,World_Comm,ierr)
408  call MPI_Bcast(nnz,1,MPI_integer,0,World_Comm,ierr)
409
410  call memory_all("after setting up H+S for PEXSI LDOS",World_comm)
411
412#+END_SRC
413
414** Set options
415
416We use the options interface to get a template with default values,
417and then fill in a few custom options based on fdf variables.
418
419#+BEGIN_SRC f90 :noweb-ref routine-variables
420type(f_ppexsi_options) :: options
421!
422integer                :: isSIdentity
423integer                :: verbosity
424#+end_src
425
426#+BEGIN_SRC f90 :noweb-ref set-options
427! --
428  isSIdentity = 0
429!
430  call f_ppexsi_set_default_options( options )
431  ! Ordering flag:
432  !   1: Use METIS
433  !   0: Use PARMETIS/PTSCOTCH
434  options%ordering = fdf_get("PEXSI.ordering",1)
435  ! Number of processors for symbolic factorization
436  ! Only relevant for PARMETIS/PT_SCOTCH
437  options%npSymbFact = fdf_get("PEXSI.np-symbfact",1)
438  options%verbosity = fdf_get("PEXSI.verbosity",1)
439#+END_SRC
440
441** Prepare plan
442Each spin-set of PEXSI processors has its own plan, but we only
443include the first-pole group of nodes...
444#+BEGIN_SRC  f90 :noweb-ref routine-variables
445integer(c_intptr_t)    :: plan
446  integer :: numProcRow, numProcCol
447  integer :: outputFileIndex
448#+END_SRC
449
450#+BEGIN_SRC f90 :noweb-ref prepare-plan
451  call get_row_col(npPerPole,numProcRow,numProcCol)
452
453  ! Set the outputFileIndex to be the pole index.
454  ! Starting from PEXSI v0.8.0, the first processor for each pole outputs
455  ! information
456
457  if( mod( mpirank, npPerPole ) .eq. 0 ) then
458    outputFileIndex = mpirank / npPerPole;
459  else
460    outputFileIndex = -1;
461  endif
462!
463! Note that even though we only use one pole's worth of processors, we
464! still use the full spatial PEXSI communicator in the plan.
465! Failing to do so leads to an error. This is not sufficiently documented.
466!
467  plan = f_ppexsi_plan_initialize(&
468      PEXSI_Spatial_Comm,&
469      numProcRow,&
470      numProcCol,&
471      outputFileIndex,&
472      info)
473
474call check_info(info,"plan_initialize in LDOS")
475#+END_SRC
476
477** Load H and S matrices
478
479In this version H and S are symmetric. We associate them with the plan
480(I really do not know very well what happens behind the
481scenes. Presumably no copy is made.)
482
483#+BEGIN_SRC f90 :noweb-ref load-hs-matrices
484call f_ppexsi_load_real_symmetric_hs_matrix(&
485      plan,&
486      options,&
487      nrows,&
488      nnz,&
489      nnzLocal,&
490      numColLocal,&
491      colptrLocal,&
492      rowindLocal,&
493      HnzvalLocal,&
494      isSIdentity,&
495      SnzvalLocal,&
496      info)
497
498call check_info(info,"load_real_sym_hs_matrix in LDOS")
499
500#+END_SRC
501
502** Factorization
503
504This is a bit ambiguous, as we have loaded a "symmetric" matrix
505(actually H and S), but I believe that inside (and in the plan)
506specifically complex structures are handled and filled in.
507
508#+BEGIN_SRC f90 :noweb-ref factorization
509    call f_ppexsi_symbolic_factorize_complex_symmetric_matrix(&
510         plan, &
511         options,&
512         info)
513    call check_info(info,"factorize complex matrix in LDOS")
514
515  options%isSymbolicFactorize = 0 ! We do not need it anymore
516#+END_SRC
517
518** Compute the LDOS
519
520#+BEGIN_SRC f90 :noweb-ref routine-variables
521real(dp), pointer, dimension(:) :: AnzvalLocal => null()
522real(dp), pointer, dimension(:) :: AinvnzvalLocal => null()
523integer :: loc
524#+END_SRC
525
526Note that only the first-pole team does this.
527
528#+BEGIN_SRC f90 :noweb-ref compute-ldos
529    if (PEXSI_worker) then
530
531       if(mpirank == 0) then
532          write(6,"(a,f16.5,f10.5)") &
533               'Calling PEXSI LDOS routine. Energy and broadening (eV) ', &
534               energy/eV, broadening/eV
535          write(6,"(a,i4)") &
536               'Processors working on selected inversion: ', npPerPole
537       endif
538
539       call timer("pexsi-ldos-selinv", 1)
540
541       ! Build AnzvalLocal as H-zS, where z=(E,broadening), and treat
542       ! it as a one-dimensional real array with 2*nnzlocal entries
543
544       call re_alloc(AnzvalLocal,1,2*nnzLocal,"AnzvalLocal","pexsi_ldos")
545       call re_alloc(AinvnzvalLocal,1,2*nnzLocal,"AinvnzvalLocal","pexsi_ldos")
546
547       loc = 1
548       do i = 1, nnzLocal
549          AnzvalLocal(loc) = Hnzvallocal(i) - energy*Snzvallocal(i)
550          AnzvalLocal(loc+1) =  - broadening*Snzvallocal(i)
551          loc = loc + 2
552       enddo
553
554       call f_ppexsi_selinv_complex_symmetric_matrix(&
555            plan,&
556            options,&
557            AnzvalLocal,&
558            AinvnzvalLocal,&
559            info)
560
561       call check_info(info,"selinv complex matrix in LDOS")
562
563       ! Get DMnzvalLocal as 1/pi * Imag(Ainv...)
564
565       loc = 1
566       do i = 1, nnzLocal
567          DMnzvalLocal(i) = (1.0_dp/pi) * AinvnzvalLocal(loc+1)
568          loc = loc + 2
569       enddo
570       call de_alloc(AnzvalLocal,"AnzvalLocal","pexsi_ldos")
571       call de_alloc(AinvnzvalLocal,"AinvnzvalLocal","pexsi_ldos")
572
573       call timer("pexsi-ldos-selinv", 2)
574       !
575    endif ! PEXSI_worker
576#+END_SRC
577
578** Copy information to Siesta side
579
580#+BEGIN_SRC f90 :noweb-ref copy-to-siesta-side
581
582  do ispin = 1, nspin
583
584     m1 => m1_spin(ispin)
585
586     if (PEXSI_worker .and. (pexsi_spin == ispin)) then
587        ! Prepare m2 to transfer
588
589        call de_alloc(colPtrLocal,"colPtrLocal","pexsi_ldos")
590
591        call de_alloc(m2%vals(1)%data,"m2%vals(1)%data","pexsi_ldos")
592        call de_alloc(m2%vals(2)%data,"m2%vals(2)%data","pexsi_ldos")
593
594       deallocate(m2%vals)
595       allocate(m2%vals(1))
596       m2%vals(1)%data => DMnzvalLocal(1:nnzLocal)
597
598     endif
599
600     ! Prepare m1 to receive the results
601     if (SIESTA_worker) then
602        nullify(m1%vals(1)%data)    ! formerly pointing to S
603        nullify(m1%vals(2)%data)    ! formerly pointing to H
604        deallocate(m1%vals)
605        nullify(m1%numcols)         ! formerly pointing to numH
606        nullify(m1%cols)            ! formerly pointing to listH
607     endif
608
609     call timer("redist_orbs_bck", 1)
610     dist2 => dist2_spin(ispin)
611     call redistribute_spmatrix(norbs,m2,dist2,m1,dist1,World_Comm)
612     call timer("redist_orbs_bck", 2)
613
614     if (PEXSI_worker .and. (pexsi_spin == ispin)) then
615        call de_alloc(DMnzvalLocal, "DMnzvalLocal", "pexsi_ldos")
616
617        nullify(m2%vals(1)%data)    ! formerly pointing to DM
618        deallocate(m2%vals)
619        ! allocated in the direct transfer
620        call de_alloc(m2%numcols,"m2%numcols","pexsi_ldos")
621        call de_alloc(m2%cols,   "m2%cols",   "pexsi_ldos")
622     endif
623
624     if (SIESTA_worker) then
625
626        LDOS_DM(:,ispin)  = m1%vals(1)%data(:)
627        ! Check no_l
628        if (no_l /= m1%no_l) then
629           call die("Mismatch in no_l")
630        endif
631        ! Check listH
632        if (any(listH(:) /= m1%cols(:))) then
633           call die("Mismatch in listH")
634        endif
635
636        call de_alloc(m1%vals(1)%data,"m1%vals(1)%data","pexsi_ldos")
637        deallocate(m1%vals)
638        ! allocated in the direct transfer
639        call de_alloc(m1%numcols,"m1%numcols","pexsi_ldos")
640        call de_alloc(m1%cols,   "m1%cols",   "pexsi_ldos")
641
642     endif
643  enddo
644  call timer("pexsi-ldos", 2)
645
646#+END_SRC
647
648** Clean up
649
650#+BEGIN_SRC f90 :noweb-ref clean-up
651
652    call delete(dist1)
653    do ispin = 1, nspin
654       call delete(dist2_spin(ispin))
655    enddo
656    deallocate(dist2_spin)
657    deallocate(m1_spin)
658
659    call MPI_Comm_Free(PEXSI_Spatial_Comm, ierr)
660    call MPI_Comm_Free(PEXSI_Spin_Comm, ierr)
661    call MPI_Comm_Free(World_Comm, ierr)
662
663    ! This communicator was created from a subgroup.
664    ! As such, it is MPI_COMM_NULL for those processes
665    ! not in the subgroup (non PEXSI_workers). Only
666    ! defined communicators can be freed
667    if (PEXSI_worker) then
668       call MPI_Comm_Free(PEXSI_Pole_Comm, ierr)
669    endif
670
671    ! We finalize the plan here
672    call f_ppexsi_plan_finalize( plan, info )
673
674    call MPI_Group_Free(PEXSI_Spatial_Group, ierr)
675    call MPI_Group_Free(PEXSI_Pole_Group, ierr)
676    call MPI_Group_Free(World_Group, ierr)
677#+END_SRC
678
679** Support routines
680
681A couple of routines
682
683#+BEGIN_SRC f90 :noweb-ref support-routines
684 <<get-row-col>>
685 <<check-info>>
686#+END_SRC
687
688*** Row and column partition of npPerPole
689#+BEGIN_SRC f90 :noweb-ref get-row-col
690subroutine get_row_col(np,nrow,ncol)
691integer, intent(in)  :: np
692integer, intent(out) :: nrow, ncol
693!
694! Finds the factors nrow and ncol such that nrow*ncol=np,
695! are as similar as possible, and nrow>=ncol.
696! For prime np, ncol=1, nrow=np.
697
698ncol  = floor(sqrt(dble(np)))
699do
700  nrow = np/ncol
701  if (nrow*ncol == np) exit
702  ncol = ncol - 1
703enddo
704end subroutine get_row_col
705#+END_SRC
706*** Error dispatcher
707#+BEGIN_SRC f90 :noweb-ref check-info
708
709subroutine check_info(info,str)
710integer, intent(in) :: info
711character(len=*), intent(in) :: str
712
713    if(mpirank == 0) then
714       if (info /= 0) then
715          write(6,*) trim(str) // " info : ", info
716          call die("Error exit from " // trim(str) // " routine")
717       endif
718      call pxfflush(6)
719    endif
720end subroutine check_info
721#+END_SRC
722
723
724