1-*- mode: Org -*- 2#+TITLE: Literate version of PEXSI LDOS calculator 3#+AUTHOR: Alberto Garcia 4 5* Introduction 6 7This is a spin-polarized version of the SIESTA-PEXSI interface for the calculation of 8the LDOS (which is a "partial" DM computed in a restricted energy window). 9Module structure: 10 11#+BEGIN_SRC f90 :noweb-ref module-structure 12 MODULE m_pexsi_local_dos 13#ifdef SIESTA__PEXSI 14 private 15 public :: pexsi_local_dos 16 17 CONTAINS 18 <<siesta-side-parent-routine>> 19 <<pexsi-ldos-routine>> 20#endif 21 end module m_pexsi_local_dos 22#+END_SRC 23 24#+BEGIN_SRC f90 :noweb yes :tangle m_pexsi_local_dos.F90 :exports none 25 ! Tangled code 26 <<module-structure>> 27 ! End of tangled code 28#+END_SRC 29 30* Parent routine with dispatch to Siesta post-processing 31 32This routine defines the energy window (=energy= and =broadening=), 33and calls the PEXSI routine to compute the partial DM, which is then 34passed to the =dhscf= machinery for the calculation of the 35corresponding charge, which is just the LDOS needed. The output file 36will have extension =.LDSI=. 37 38#+BEGIN_SRC f90 :noweb-ref siesta-side-parent-routine 39 subroutine pexsi_local_dos( ) 40 use m_energies 41 42 use sparse_matrices 43 USE siesta_options 44 use siesta_geom 45 use atomlist, only: indxuo, indxua 46 use atomlist, only: qtot, no_u, no_l 47 use atomlist, only: iphorb 48 use atomlist, only: datm, no_s, iaorb 49 use fdf 50 use files, only : slabel 51 use files, only : filesOut_t ! type for output file names 52 use parallel, only: SIESTA_worker 53 use m_ntm 54 use m_forces, only: fa 55 use m_spin, only: nspin 56 use m_dhscf, only: dhscf 57 58 implicit none 59 60 integer :: dummy_iscf = 1 61 real(dp) :: dummy_str(3,3), dummy_strl(3,3) ! for dhscf call 62 real(dp) :: dummy_dipol(3) 63 64 real(dp) :: factor, g2max 65 real(dp) :: energy, broadening 66 67 type(filesOut_t) :: filesOut ! blank output file names 68 69 energy = fdf_get('PEXSI.LDOS.Energy',0.0_dp,"Ry") 70 broadening = fdf_get('PEXSI.LDOS.Broadening',0.01_dp,"Ry") 71 72 ! Note that we re-use Dscf, so it will be obliterated 73 call get_LDOS_SI( no_u, no_l, nspin, & 74 maxnh, numh, listh, H, S, & 75 Dscf, energy, broadening) 76 77 if (SIESTA_worker) then 78 !Find the LDOS in the real space mesh 79 filesOut%rho = trim(slabel) // '.LDSI' 80 g2max = g2cut 81 82 ! There is too much clutter here, because dhscf is 83 ! a "kitchen-sink" routine that does too many things 84 85 call dhscf( nspin, no_s, iaorb, iphorb, no_l, & 86 no_u, na_u, na_s, isa, xa_last, indxua, & 87 ntm, 0, 0, 0, filesOut, & 88 maxnh, numh, listhptr, listh, Dscf, Datm, maxnh, H, & 89 Enaatm, Enascf, Uatm, Uscf, DUscf, DUext, Exc, Dxc, & 90 dummy_dipol, dummy_str, fa, dummy_strl ) 91 endif 92 93 END subroutine pexsi_local_dos 94#+end_src 95 96 97* PEXSI LDOS routine 98 99** Main structure 100 101This is the main structure of the LDOS routine. 102 103#+begin_src f90 :noweb-ref pexsi-ldos-routine 104<<routine-header>> 105<<routine-variables>> 106! -------- for serial compilation 107<<define-communicators>> 108<<re-distribute-matrices>> 109<<set-options>> 110<<prepare-plan>> 111<<load-hs-matrices>> 112<<factorization>> 113<<compute-ldos>> 114<<copy-to-siesta-side>> 115<<clean-up>> 116 117CONTAINS 118 119<<support-routines>> 120 121end subroutine get_LDOS_SI 122#+end_src 123 124 125** Routine header 126 127#+BEGIN_SRC f90 :noweb-ref routine-header 128 subroutine get_LDOS_SI( no_u, no_l, nspin_in, & 129 maxnh, numh, listh, H, S, & 130 LDOS_DM, energy, broadening) 131 132 <<used-modules>> 133 134 implicit none 135 136 integer, intent(in) :: maxnh, no_u, no_l, nspin_in 137 integer, intent(in), target :: listh(maxnh), numh(no_l) 138 real(dp), intent(in), target :: H(maxnh,nspin_in), S(maxnh) 139 real(dp), intent(in) :: energy, broadening 140 real(dp), intent(out) :: LDOS_DM(maxnh,nspin_in) 141#+END_SRC 142 143*** Used modules 144#+BEGIN_SRC f90 :noweb-ref used-modules 145 use precision, only : dp 146 use fdf 147 use units, only: eV, pi 148 use sys, only: die 149 use m_mpi_utils, only: broadcast 150 use parallel, only : SIESTA_worker, BlockSize 151 use parallel, only : SIESTA_Group, SIESTA_Comm 152 use m_redist_spmatrix, only: aux_matrix, redistribute_spmatrix 153 use class_Distribution 154 use alloc, only: re_alloc, de_alloc 155 use mpi_siesta 156 use iso_c_binding 157 use f_ppexsi_interface, only: f_ppexsi_options 158 use f_ppexsi_interface, only: f_ppexsi_plan_finalize 159 use f_ppexsi_interface, only: f_ppexsi_plan_initialize 160 use f_ppexsi_interface, only: f_ppexsi_selinv_complex_symmetric_matrix 161 use f_ppexsi_interface, only: f_ppexsi_load_real_symmetric_hs_matrix 162 use f_ppexsi_interface, only: f_ppexsi_set_default_options 163 use f_ppexsi_interface, & 164 only: f_ppexsi_symbolic_factorize_complex_symmetric_matrix 165#+END_SRC 166 167** Routine variables 168 169The local variables for the routine must be declared in a certain 170place for the compiler, but it is more clear to introduce them as they 171are needed. The =routine-variables= noweb-ref will be used for this 172throughout this document. 173 174#+BEGIN_SRC f90 :noweb-ref routine-variables 175integer :: ih, i 176integer :: info 177logical :: write_ok 178!------------ 179external :: timer 180#+END_SRC 181 182** Define communicators 183 184=World_Comm=, which is in principle set to Siesta's copy of 185=MPI_Comm_World=, is the global communicator. 186 187#+BEGIN_SRC f90 :noweb-ref routine-variables 188integer :: World_Comm, mpirank, ierr 189! 190integer :: norbs 191integer :: nspin 192#+END_SRC 193 194#+BEGIN_SRC f90 :noweb-ref define-communicators 195! 196! Our global communicator is a duplicate of the passed communicator 197! 198call MPI_Comm_Dup(true_MPI_Comm_World, World_Comm, ierr) 199call mpi_comm_rank( World_Comm, mpirank, ierr ) 200 201call timer("pexsi-ldos", 1) 202 203if (SIESTA_worker) then 204 ! rename some intent(in) variables, which are only 205 ! defined for the Siesta subset 206 norbs = no_u 207 nspin = nspin_in 208endif 209! 210call broadcast(norbs,comm=World_Comm) 211call broadcast(nspin,World_Comm) 212#+END_SRC 213 214Now we need to define the Siesta distribution object and the 215communicator and distribution object for the first team of PEXSI 216workers, for the purposes of re-distribution of the relevant 217matrices. 218 219For spin, things are a bit more complicated. We need to make sure that 220the distributions are defined and known to all processors (via actual 221ranks) with respect to the same reference bridge communicator. For 222now, this is World_Comm. 223 224#+BEGIN_SRC f90 :noweb-ref routine-variables 225integer :: PEXSI_Pole_Group, PEXSI_Spatial_Group, World_Group 226integer, allocatable :: pexsi_pole_ranks_in_world(:) 227integer :: nworkers_SIESTA 228integer, allocatable :: siesta_ranks_in_world(:) 229integer :: PEXSI_Pole_Group_in_World 230integer, allocatable :: PEXSI_Pole_ranks_in_World_Spin(:,:) 231integer :: PEXSI_Pole_Comm, PEXSI_Spatial_Comm, PEXSI_Spin_Comm 232integer :: numNodesTotal 233integer :: npPerPole 234logical :: PEXSI_worker 235! 236type(Distribution) :: dist1 237type(Distribution), allocatable, target :: dist2_spin(:) 238type(Distribution), pointer :: dist2 239integer :: pbs, color, spatial_rank, spin_rank 240#+END_SRC 241 242Define the Siesta distribution. Note that this is known to all nodes. 243 244#+BEGIN_SRC f90 :noweb-ref define-communicators 245 call MPI_Comm_Group(World_Comm,World_Group, ierr) 246 call MPI_Group_Size(SIESTA_Group, nworkers_SIESTA, ierr) 247 allocate(siesta_ranks_in_world(nworkers_SIESTA)) 248 call MPI_Group_translate_ranks( SIESTA_Group, nworkers_SIESTA, & 249 (/ (i,i=0,nworkers_SIESTA-1) /), & 250 World_Group, siesta_ranks_in_world, ierr ) 251 call newDistribution(dist1,World_Comm,siesta_ranks_in_world, & 252 TYPE_BLOCK_CYCLIC,BlockSize,"bc dist") 253 deallocate(siesta_ranks_in_world) 254 call MPI_Barrier(World_Comm,ierr) 255#+end_src 256 257For possibly spin-polarized calculations, we split the communicator. 258Note that we give the user the option of requesting more processors 259per pole for the LDOS calculation. 260 261#+BEGIN_SRC f90 :noweb-ref define-communicators 262 263 call mpi_comm_size( World_Comm, numNodesTotal, ierr ) 264 265 npPerPole = fdf_get("PEXSI.np-per-pole",4) 266 npPerPole = fdf_get("PEXSI.LDOS.np-per-pole",npPerPole) 267 if (nspin*npPerPole > numNodesTotal) & 268 call die("PEXSI.np-per-pole is too big for MPI size") 269 270 ! "Row" communicator for independent PEXSI operations on each spin 271 ! The name refers to "spatial" degrees of freedom. 272 color = mod(mpirank,nspin) ! {0,1} for nspin = 2, or {0} for nspin = 1 273 call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spatial_Comm, ierr) 274 275 ! "Column" communicator for spin reductions 276 color = mpirank/nspin 277 call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spin_Comm, ierr) 278 279 ! Group and Communicator for first-pole team of PEXSI workers 280 ! 281 call MPI_Comm_Group(PEXSI_Spatial_Comm, PEXSI_Spatial_Group, Ierr) 282 call MPI_Group_incl(PEXSI_Spatial_Group, npPerPole, & 283 (/ (i,i=0,npPerPole-1) /),& 284 PEXSI_Pole_Group, Ierr) 285 call MPI_Comm_create(PEXSI_Spatial_Comm, PEXSI_Pole_Group,& 286 PEXSI_Pole_Comm, Ierr) 287 288 289 call mpi_comm_rank( PEXSI_Spatial_Comm, spatial_rank, ierr ) 290 call mpi_comm_rank( PEXSI_Spin_Comm, spin_rank, ierr ) 291 PEXSI_worker = (spatial_rank < npPerPole) ! Could be spin up or spin down 292 293 ! PEXSI blocksize 294 pbs = norbs/npPerPole 295 296 ! Careful with this. For the purposes of matrix transfers, 297 ! we need the ranks of the Pole group 298 ! in the "bridge" communicator/group (World) 299 300 allocate(pexsi_pole_ranks_in_world(npPerPole)) 301 call MPI_Comm_Group(World_Comm, World_Group, Ierr) 302 303 call MPI_Group_translate_ranks( PEXSI_Pole_Group, npPerPole, & 304 (/ (i,i=0,npPerPole-1) /), & 305 World_Group, pexsi_pole_ranks_in_world, ierr ) 306 307 ! What we need is to include the actual world ranks 308 ! in the distribution object 309 allocate (PEXSI_Pole_ranks_in_World_Spin(npPerPole,nspin)) 310 call MPI_AllGather(pexsi_pole_ranks_in_world,npPerPole,MPI_integer,& 311 PEXSI_Pole_Ranks_in_World_Spin(1,1),npPerPole, & 312 MPI_integer,PEXSI_Spin_Comm,ierr) 313 314 ! Create distributions known to all nodes 315 allocate(dist2_spin(nspin)) 316 do ispin = 1, nspin 317 call newDistribution(dist2_spin(ispin), World_Comm, & 318 PEXSI_Pole_Ranks_in_World_Spin(:,ispin), & 319 TYPE_PEXSI, pbs, "px dist") 320 enddo 321 deallocate(pexsi_pole_ranks_in_world,PEXSI_Pole_Ranks_in_World_Spin) 322 call MPI_Barrier(World_Comm,ierr) 323 324#+end_src 325 326** Re-distribute matrices 327 328This is slightly unseemly, but it works. The =aux_matrix= derived 329types are used to store and retrieve the matrices in either side. The 330code is in external auxiliary modules. 331 332#+BEGIN_SRC f90 :noweb-ref routine-variables 333type(aux_matrix), allocatable, target :: m1_spin(:) 334type(aux_matrix) :: m2 335type(aux_matrix), pointer :: m1 336integer :: nrows, nnz, nnzLocal, numColLocal 337integer, pointer, dimension(:) :: colptrLocal=> null(), rowindLocal=>null() 338! 339real(dp), pointer, dimension(:) :: & 340 HnzvalLocal=>null(), SnzvalLocal=>null(), & 341 DMnzvalLocal => null() 342! 343integer :: ispin, pexsi_spin 344#+END_SRC 345#+BEGIN_SRC f90 :noweb-ref re-distribute-matrices 346 347 pexsi_spin = spin_rank+1 ! {1,2} 348 ! This is done serially on the Siesta side, each time 349 ! filling in the structures in one PEXSI set 350 351 allocate(m1_spin(nspin)) 352 do ispin = 1, nspin 353 354 m1 => m1_spin(ispin) 355 356 if (SIESTA_worker) then 357 m1%norbs = norbs 358 m1%no_l = no_l 359 m1%nnzl = sum(numH(1:no_l)) 360 m1%numcols => numH 361 m1%cols => listH 362 allocate(m1%vals(2)) 363 m1%vals(1)%data => S(:) 364 m1%vals(2)%data => H(:,ispin) 365 366 endif ! SIESTA_worker 367 368 call timer("redist_orbs_fwd", 1) 369 370 ! Note that we cannot simply wrap this in a pexsi_spin test, as 371 ! there are Siesta nodes in both spin sets. 372 ! We must discriminate the PEXSI workers by the distribution info 373 dist2 => dist2_spin(ispin) 374 call redistribute_spmatrix(norbs,m1,dist1,m2,dist2,World_Comm) 375 376 call timer("redist_orbs_fwd", 2) 377 378 if (PEXSI_worker .and. (pexsi_spin == ispin) ) then 379 380 nrows = m2%norbs ! or simply 'norbs' 381 numColLocal = m2%no_l 382 nnzLocal = m2%nnzl 383 call MPI_AllReduce(nnzLocal,nnz,1,MPI_integer,MPI_sum,PEXSI_Pole_Comm,ierr) 384 385 call re_alloc(colptrLocal,1,numColLocal+1,"colptrLocal","pexsi_ldos") 386 colptrLocal(1) = 1 387 do ih = 1,numColLocal 388 colptrLocal(ih+1) = colptrLocal(ih) + m2%numcols(ih) 389 enddo 390 391 rowindLocal => m2%cols 392 SnzvalLocal => m2%vals(1)%data 393 HnzvalLocal => m2%vals(2)%data 394 395 call re_alloc(DMnzvalLocal,1,nnzLocal,"DMnzvalLocal","pexsi_ldos") 396 397 call memory_all("after transferring H+S for PEXSI-LDOS",PEXSI_Pole_Comm) 398 399 endif ! PEXSI worker 400 enddo 401 402 ! Make these available to all 403 ! (Note that the values are those on process 0, which is in the spin=1 set 404 ! In fact, they are only needed for calls to the interface, so the broadcast 405 ! could be over PEXSI_Spatial_Comm only. 406 407 call MPI_Bcast(nrows,1,MPI_integer,0,World_Comm,ierr) 408 call MPI_Bcast(nnz,1,MPI_integer,0,World_Comm,ierr) 409 410 call memory_all("after setting up H+S for PEXSI LDOS",World_comm) 411 412#+END_SRC 413 414** Set options 415 416We use the options interface to get a template with default values, 417and then fill in a few custom options based on fdf variables. 418 419#+BEGIN_SRC f90 :noweb-ref routine-variables 420type(f_ppexsi_options) :: options 421! 422integer :: isSIdentity 423integer :: verbosity 424#+end_src 425 426#+BEGIN_SRC f90 :noweb-ref set-options 427! -- 428 isSIdentity = 0 429! 430 call f_ppexsi_set_default_options( options ) 431 ! Ordering flag: 432 ! 1: Use METIS 433 ! 0: Use PARMETIS/PTSCOTCH 434 options%ordering = fdf_get("PEXSI.ordering",1) 435 ! Number of processors for symbolic factorization 436 ! Only relevant for PARMETIS/PT_SCOTCH 437 options%npSymbFact = fdf_get("PEXSI.np-symbfact",1) 438 options%verbosity = fdf_get("PEXSI.verbosity",1) 439#+END_SRC 440 441** Prepare plan 442Each spin-set of PEXSI processors has its own plan, but we only 443include the first-pole group of nodes... 444#+BEGIN_SRC f90 :noweb-ref routine-variables 445integer(c_intptr_t) :: plan 446 integer :: numProcRow, numProcCol 447 integer :: outputFileIndex 448#+END_SRC 449 450#+BEGIN_SRC f90 :noweb-ref prepare-plan 451 call get_row_col(npPerPole,numProcRow,numProcCol) 452 453 ! Set the outputFileIndex to be the pole index. 454 ! Starting from PEXSI v0.8.0, the first processor for each pole outputs 455 ! information 456 457 if( mod( mpirank, npPerPole ) .eq. 0 ) then 458 outputFileIndex = mpirank / npPerPole; 459 else 460 outputFileIndex = -1; 461 endif 462! 463! Note that even though we only use one pole's worth of processors, we 464! still use the full spatial PEXSI communicator in the plan. 465! Failing to do so leads to an error. This is not sufficiently documented. 466! 467 plan = f_ppexsi_plan_initialize(& 468 PEXSI_Spatial_Comm,& 469 numProcRow,& 470 numProcCol,& 471 outputFileIndex,& 472 info) 473 474call check_info(info,"plan_initialize in LDOS") 475#+END_SRC 476 477** Load H and S matrices 478 479In this version H and S are symmetric. We associate them with the plan 480(I really do not know very well what happens behind the 481scenes. Presumably no copy is made.) 482 483#+BEGIN_SRC f90 :noweb-ref load-hs-matrices 484call f_ppexsi_load_real_symmetric_hs_matrix(& 485 plan,& 486 options,& 487 nrows,& 488 nnz,& 489 nnzLocal,& 490 numColLocal,& 491 colptrLocal,& 492 rowindLocal,& 493 HnzvalLocal,& 494 isSIdentity,& 495 SnzvalLocal,& 496 info) 497 498call check_info(info,"load_real_sym_hs_matrix in LDOS") 499 500#+END_SRC 501 502** Factorization 503 504This is a bit ambiguous, as we have loaded a "symmetric" matrix 505(actually H and S), but I believe that inside (and in the plan) 506specifically complex structures are handled and filled in. 507 508#+BEGIN_SRC f90 :noweb-ref factorization 509 call f_ppexsi_symbolic_factorize_complex_symmetric_matrix(& 510 plan, & 511 options,& 512 info) 513 call check_info(info,"factorize complex matrix in LDOS") 514 515 options%isSymbolicFactorize = 0 ! We do not need it anymore 516#+END_SRC 517 518** Compute the LDOS 519 520#+BEGIN_SRC f90 :noweb-ref routine-variables 521real(dp), pointer, dimension(:) :: AnzvalLocal => null() 522real(dp), pointer, dimension(:) :: AinvnzvalLocal => null() 523integer :: loc 524#+END_SRC 525 526Note that only the first-pole team does this. 527 528#+BEGIN_SRC f90 :noweb-ref compute-ldos 529 if (PEXSI_worker) then 530 531 if(mpirank == 0) then 532 write(6,"(a,f16.5,f10.5)") & 533 'Calling PEXSI LDOS routine. Energy and broadening (eV) ', & 534 energy/eV, broadening/eV 535 write(6,"(a,i4)") & 536 'Processors working on selected inversion: ', npPerPole 537 endif 538 539 call timer("pexsi-ldos-selinv", 1) 540 541 ! Build AnzvalLocal as H-zS, where z=(E,broadening), and treat 542 ! it as a one-dimensional real array with 2*nnzlocal entries 543 544 call re_alloc(AnzvalLocal,1,2*nnzLocal,"AnzvalLocal","pexsi_ldos") 545 call re_alloc(AinvnzvalLocal,1,2*nnzLocal,"AinvnzvalLocal","pexsi_ldos") 546 547 loc = 1 548 do i = 1, nnzLocal 549 AnzvalLocal(loc) = Hnzvallocal(i) - energy*Snzvallocal(i) 550 AnzvalLocal(loc+1) = - broadening*Snzvallocal(i) 551 loc = loc + 2 552 enddo 553 554 call f_ppexsi_selinv_complex_symmetric_matrix(& 555 plan,& 556 options,& 557 AnzvalLocal,& 558 AinvnzvalLocal,& 559 info) 560 561 call check_info(info,"selinv complex matrix in LDOS") 562 563 ! Get DMnzvalLocal as 1/pi * Imag(Ainv...) 564 565 loc = 1 566 do i = 1, nnzLocal 567 DMnzvalLocal(i) = (1.0_dp/pi) * AinvnzvalLocal(loc+1) 568 loc = loc + 2 569 enddo 570 call de_alloc(AnzvalLocal,"AnzvalLocal","pexsi_ldos") 571 call de_alloc(AinvnzvalLocal,"AinvnzvalLocal","pexsi_ldos") 572 573 call timer("pexsi-ldos-selinv", 2) 574 ! 575 endif ! PEXSI_worker 576#+END_SRC 577 578** Copy information to Siesta side 579 580#+BEGIN_SRC f90 :noweb-ref copy-to-siesta-side 581 582 do ispin = 1, nspin 583 584 m1 => m1_spin(ispin) 585 586 if (PEXSI_worker .and. (pexsi_spin == ispin)) then 587 ! Prepare m2 to transfer 588 589 call de_alloc(colPtrLocal,"colPtrLocal","pexsi_ldos") 590 591 call de_alloc(m2%vals(1)%data,"m2%vals(1)%data","pexsi_ldos") 592 call de_alloc(m2%vals(2)%data,"m2%vals(2)%data","pexsi_ldos") 593 594 deallocate(m2%vals) 595 allocate(m2%vals(1)) 596 m2%vals(1)%data => DMnzvalLocal(1:nnzLocal) 597 598 endif 599 600 ! Prepare m1 to receive the results 601 if (SIESTA_worker) then 602 nullify(m1%vals(1)%data) ! formerly pointing to S 603 nullify(m1%vals(2)%data) ! formerly pointing to H 604 deallocate(m1%vals) 605 nullify(m1%numcols) ! formerly pointing to numH 606 nullify(m1%cols) ! formerly pointing to listH 607 endif 608 609 call timer("redist_orbs_bck", 1) 610 dist2 => dist2_spin(ispin) 611 call redistribute_spmatrix(norbs,m2,dist2,m1,dist1,World_Comm) 612 call timer("redist_orbs_bck", 2) 613 614 if (PEXSI_worker .and. (pexsi_spin == ispin)) then 615 call de_alloc(DMnzvalLocal, "DMnzvalLocal", "pexsi_ldos") 616 617 nullify(m2%vals(1)%data) ! formerly pointing to DM 618 deallocate(m2%vals) 619 ! allocated in the direct transfer 620 call de_alloc(m2%numcols,"m2%numcols","pexsi_ldos") 621 call de_alloc(m2%cols, "m2%cols", "pexsi_ldos") 622 endif 623 624 if (SIESTA_worker) then 625 626 LDOS_DM(:,ispin) = m1%vals(1)%data(:) 627 ! Check no_l 628 if (no_l /= m1%no_l) then 629 call die("Mismatch in no_l") 630 endif 631 ! Check listH 632 if (any(listH(:) /= m1%cols(:))) then 633 call die("Mismatch in listH") 634 endif 635 636 call de_alloc(m1%vals(1)%data,"m1%vals(1)%data","pexsi_ldos") 637 deallocate(m1%vals) 638 ! allocated in the direct transfer 639 call de_alloc(m1%numcols,"m1%numcols","pexsi_ldos") 640 call de_alloc(m1%cols, "m1%cols", "pexsi_ldos") 641 642 endif 643 enddo 644 call timer("pexsi-ldos", 2) 645 646#+END_SRC 647 648** Clean up 649 650#+BEGIN_SRC f90 :noweb-ref clean-up 651 652 call delete(dist1) 653 do ispin = 1, nspin 654 call delete(dist2_spin(ispin)) 655 enddo 656 deallocate(dist2_spin) 657 deallocate(m1_spin) 658 659 call MPI_Comm_Free(PEXSI_Spatial_Comm, ierr) 660 call MPI_Comm_Free(PEXSI_Spin_Comm, ierr) 661 call MPI_Comm_Free(World_Comm, ierr) 662 663 ! This communicator was created from a subgroup. 664 ! As such, it is MPI_COMM_NULL for those processes 665 ! not in the subgroup (non PEXSI_workers). Only 666 ! defined communicators can be freed 667 if (PEXSI_worker) then 668 call MPI_Comm_Free(PEXSI_Pole_Comm, ierr) 669 endif 670 671 ! We finalize the plan here 672 call f_ppexsi_plan_finalize( plan, info ) 673 674 call MPI_Group_Free(PEXSI_Spatial_Group, ierr) 675 call MPI_Group_Free(PEXSI_Pole_Group, ierr) 676 call MPI_Group_Free(World_Group, ierr) 677#+END_SRC 678 679** Support routines 680 681A couple of routines 682 683#+BEGIN_SRC f90 :noweb-ref support-routines 684 <<get-row-col>> 685 <<check-info>> 686#+END_SRC 687 688*** Row and column partition of npPerPole 689#+BEGIN_SRC f90 :noweb-ref get-row-col 690subroutine get_row_col(np,nrow,ncol) 691integer, intent(in) :: np 692integer, intent(out) :: nrow, ncol 693! 694! Finds the factors nrow and ncol such that nrow*ncol=np, 695! are as similar as possible, and nrow>=ncol. 696! For prime np, ncol=1, nrow=np. 697 698ncol = floor(sqrt(dble(np))) 699do 700 nrow = np/ncol 701 if (nrow*ncol == np) exit 702 ncol = ncol - 1 703enddo 704end subroutine get_row_col 705#+END_SRC 706*** Error dispatcher 707#+BEGIN_SRC f90 :noweb-ref check-info 708 709subroutine check_info(info,str) 710integer, intent(in) :: info 711character(len=*), intent(in) :: str 712 713 if(mpirank == 0) then 714 if (info /= 0) then 715 write(6,*) trim(str) // " info : ", info 716 call die("Error exit from " // trim(str) // " routine") 717 endif 718 call pxfflush(6) 719 endif 720end subroutine check_info 721#+END_SRC 722 723 724