1 2! Tangled code 3 MODULE m_pexsi_local_dos 4#ifdef SIESTA__PEXSI 5 private 6 public :: pexsi_local_dos 7 8 CONTAINS 9 subroutine pexsi_local_dos( ) 10 use m_energies 11 12 use sparse_matrices 13 USE siesta_options 14 use siesta_geom 15 use atomlist, only: indxuo, indxua 16 use atomlist, only: qtot, no_u, no_l 17 use atomlist, only: iphorb 18 use atomlist, only: datm, no_s, iaorb 19 use fdf 20 use files, only : slabel 21 use files, only : filesOut_t ! type for output file names 22 use parallel, only: SIESTA_worker 23 use m_ntm 24 use m_forces, only: fa 25 use m_spin, only: nspin 26 use atomlist, only: qs => qtots 27 use m_energies, only: efs 28 use m_dhscf, only: dhscf 29 30 implicit none 31 32 integer :: dummy_iscf = 1 33 real(dp) :: dummy_str(3,3), dummy_strl(3,3) ! for dhscf call 34 real(dp) :: dummy_dipol(3) 35 36 real(dp) :: factor, g2max 37 real(dp) :: energy, broadening 38 39 type(filesOut_t) :: filesOut ! blank output file names 40 41 energy = fdf_get('PEXSI.LDOS.Energy',0.0_dp,"Ry") 42 broadening = fdf_get('PEXSI.LDOS.Broadening',0.01_dp,"Ry") 43 44 ! Note that we re-use Dscf, so it will be obliterated 45 call get_LDOS_SI( no_u, no_l, nspin, & 46 maxnh, numh, listh, H, S, & 47 Dscf, energy, broadening) 48 49 if (SIESTA_worker) then 50 !Find the LDOS in the real space mesh 51 filesOut%rho = trim(slabel) // '.LDSI' 52 g2max = g2cut 53 54 ! There is too much clutter here, because dhscf is 55 ! a "kitchen-sink" routine that does too many things 56 57 call dhscf( nspin, no_s, iaorb, iphorb, no_l, & 58 no_u, na_u, na_s, isa, xa_last, indxua, & 59 ntm, 0, 0, 0, filesOut, & 60 maxnh, numh, listhptr, listh, Dscf, Datm, maxnh, H, & 61 Enaatm, Enascf, Uatm, Uscf, DUscf, DUext, Exc, Dxc, & 62 dummy_dipol, dummy_str, fa, dummy_strl ) 63 endif 64 65 END subroutine pexsi_local_dos 66 subroutine get_LDOS_SI( no_u, no_l, nspin_in, & 67 maxnh, numh, listh, H, S, & 68 LDOS_DM, energy, broadening) 69 70 use precision, only : dp 71 use fdf 72 use units, only: eV, pi 73 use sys, only: die 74 use m_mpi_utils, only: broadcast 75 use parallel, only : SIESTA_worker, BlockSize 76 use parallel, only : SIESTA_Group, SIESTA_Comm 77 use m_redist_spmatrix, only: aux_matrix, redistribute_spmatrix 78 use class_Distribution 79 use alloc, only: re_alloc, de_alloc 80 use mpi_siesta 81 use iso_c_binding 82 use f_ppexsi_interface, only: f_ppexsi_options 83 use f_ppexsi_interface, only: f_ppexsi_plan_finalize 84 use f_ppexsi_interface, only: f_ppexsi_plan_initialize 85 use f_ppexsi_interface, only: f_ppexsi_selinv_complex_symmetric_matrix 86 use f_ppexsi_interface, only: f_ppexsi_load_real_symmetric_hs_matrix 87 use f_ppexsi_interface, only: f_ppexsi_set_default_options 88 use f_ppexsi_interface, & 89 only: f_ppexsi_symbolic_factorize_complex_symmetric_matrix 90 91 implicit none 92 93 integer, intent(in) :: maxnh, no_u, no_l, nspin_in 94 integer, intent(in), target :: listh(maxnh), numh(no_l) 95 real(dp), intent(in), target :: H(maxnh,nspin_in), S(maxnh) 96 real(dp), intent(in) :: energy, broadening 97 real(dp), intent(out) :: LDOS_DM(maxnh,nspin_in) 98 integer :: ih, i 99 integer :: info 100 logical :: write_ok 101 !------------ 102 external :: timer 103 integer :: World_Comm, mpirank, ierr 104 ! 105 integer :: norbs 106 integer :: nspin 107 integer :: PEXSI_Pole_Group, PEXSI_Spatial_Group, World_Group 108 integer, allocatable :: pexsi_pole_ranks_in_world(:) 109 integer :: nworkers_SIESTA 110 integer, allocatable :: siesta_ranks_in_world(:) 111 integer :: PEXSI_Pole_Group_in_World 112 integer, allocatable :: PEXSI_Pole_ranks_in_World_Spin(:,:) 113 integer :: PEXSI_Pole_Comm, PEXSI_Spatial_Comm, PEXSI_Spin_Comm 114 integer :: numNodesTotal 115 integer :: npPerPole 116 logical :: PEXSI_worker 117 ! 118 type(Distribution) :: dist1 119 type(Distribution), allocatable, target :: dist2_spin(:) 120 type(Distribution), pointer :: dist2 121 integer :: pbs, color, spatial_rank, spin_rank 122 type(aux_matrix), allocatable, target :: m1_spin(:) 123 type(aux_matrix) :: m2 124 type(aux_matrix), pointer :: m1 125 integer :: nrows, nnz, nnzLocal, numColLocal 126 integer, pointer, dimension(:) :: colptrLocal=> null(), rowindLocal=>null() 127 ! 128 real(dp), pointer, dimension(:) :: & 129 HnzvalLocal=>null(), SnzvalLocal=>null(), & 130 DMnzvalLocal => null() 131 ! 132 integer :: ispin, pexsi_spin 133 type(f_ppexsi_options) :: options 134 ! 135 integer :: isSIdentity 136 integer :: verbosity 137 integer(c_intptr_t) :: plan 138 integer :: numProcRow, numProcCol 139 integer :: outputFileIndex 140 real(dp), pointer, dimension(:) :: AnzvalLocal => null() 141 real(dp), pointer, dimension(:) :: AinvnzvalLocal => null() 142 integer :: loc 143 ! -------- for serial compilation 144 ! 145 ! Our global communicator is a duplicate of the passed communicator 146 ! 147 call MPI_Comm_Dup(true_MPI_Comm_World, World_Comm, ierr) 148 call mpi_comm_rank( World_Comm, mpirank, ierr ) 149 150 call timer("pexsi-ldos", 1) 151 152 if (SIESTA_worker) then 153 ! rename some intent(in) variables, which are only 154 ! defined for the Siesta subset 155 norbs = no_u 156 nspin = nspin_in 157 endif 158 ! 159 call broadcast(norbs,comm=World_Comm) 160 call broadcast(nspin,World_Comm) 161 call MPI_Comm_Group(World_Comm,World_Group, ierr) 162 call MPI_Group_Size(SIESTA_Group, nworkers_SIESTA, ierr) 163 allocate(siesta_ranks_in_world(nworkers_SIESTA)) 164 call MPI_Group_translate_ranks( SIESTA_Group, nworkers_SIESTA, & 165 (/ (i,i=0,nworkers_SIESTA-1) /), & 166 World_Group, siesta_ranks_in_world, ierr ) 167 call newDistribution(dist1,World_Comm,siesta_ranks_in_world, & 168 TYPE_BLOCK_CYCLIC,BlockSize,"bc dist") 169 deallocate(siesta_ranks_in_world) 170 call MPI_Barrier(World_Comm,ierr) 171 172 call mpi_comm_size( World_Comm, numNodesTotal, ierr ) 173 174 npPerPole = fdf_get("PEXSI.np-per-pole",4) 175 npPerPole = fdf_get("PEXSI.LDOS.np-per-pole",npPerPole) 176 if (nspin*npPerPole > numNodesTotal) & 177 call die("PEXSI.np-per-pole is too big for MPI size") 178 179 ! "Row" communicator for independent PEXSI operations on each spin 180 ! The name refers to "spatial" degrees of freedom. 181 color = mod(mpirank,nspin) ! {0,1} for nspin = 2, or {0} for nspin = 1 182 call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spatial_Comm, ierr) 183 184 ! "Column" communicator for spin reductions 185 color = mpirank/nspin 186 call MPI_Comm_Split(World_Comm, color, mpirank, PEXSI_Spin_Comm, ierr) 187 188 ! Group and Communicator for first-pole team of PEXSI workers 189 ! 190 call MPI_Comm_Group(PEXSI_Spatial_Comm, PEXSI_Spatial_Group, Ierr) 191 call MPI_Group_incl(PEXSI_Spatial_Group, npPerPole, & 192 (/ (i,i=0,npPerPole-1) /),& 193 PEXSI_Pole_Group, Ierr) 194 call MPI_Comm_create(PEXSI_Spatial_Comm, PEXSI_Pole_Group,& 195 PEXSI_Pole_Comm, Ierr) 196 197 198 call mpi_comm_rank( PEXSI_Spatial_Comm, spatial_rank, ierr ) 199 call mpi_comm_rank( PEXSI_Spin_Comm, spin_rank, ierr ) 200 PEXSI_worker = (spatial_rank < npPerPole) ! Could be spin up or spin down 201 202 ! PEXSI blocksize 203 pbs = norbs/npPerPole 204 205 ! Careful with this. For the purposes of matrix transfers, 206 ! we need the ranks of the Pole group 207 ! in the "bridge" communicator/group (World) 208 209 allocate(pexsi_pole_ranks_in_world(npPerPole)) 210 call MPI_Comm_Group(World_Comm, World_Group, Ierr) 211 212 call MPI_Group_translate_ranks( PEXSI_Pole_Group, npPerPole, & 213 (/ (i,i=0,npPerPole-1) /), & 214 World_Group, pexsi_pole_ranks_in_world, ierr ) 215 216 ! What we need is to include the actual world ranks 217 ! in the distribution object 218 allocate (PEXSI_Pole_ranks_in_World_Spin(npPerPole,nspin)) 219 call MPI_AllGather(pexsi_pole_ranks_in_world,npPerPole,MPI_integer,& 220 PEXSI_Pole_Ranks_in_World_Spin(1,1),npPerPole, & 221 MPI_integer,PEXSI_Spin_Comm,ierr) 222 223 ! Create distributions known to all nodes 224 allocate(dist2_spin(nspin)) 225 do ispin = 1, nspin 226 call newDistribution(dist2_spin(ispin), World_Comm, & 227 PEXSI_Pole_Ranks_in_World_Spin(:,ispin), & 228 TYPE_PEXSI, pbs, "px dist") 229 enddo 230 deallocate(pexsi_pole_ranks_in_world,PEXSI_Pole_Ranks_in_World_Spin) 231 call MPI_Barrier(World_Comm,ierr) 232 233 234 pexsi_spin = spin_rank+1 ! {1,2} 235 ! This is done serially on the Siesta side, each time 236 ! filling in the structures in one PEXSI set 237 238 allocate(m1_spin(nspin)) 239 do ispin = 1, nspin 240 241 m1 => m1_spin(ispin) 242 243 if (SIESTA_worker) then 244 m1%norbs = norbs 245 m1%no_l = no_l 246 m1%nnzl = sum(numH(1:no_l)) 247 m1%numcols => numH 248 m1%cols => listH 249 allocate(m1%vals(2)) 250 m1%vals(1)%data => S(:) 251 m1%vals(2)%data => H(:,ispin) 252 253 endif ! SIESTA_worker 254 255 call timer("redist_orbs_fwd", 1) 256 257 ! Note that we cannot simply wrap this in a pexsi_spin test, as 258 ! there are Siesta nodes in both spin sets. 259 ! We must discriminate the PEXSI workers by the distribution info 260 dist2 => dist2_spin(ispin) 261 call redistribute_spmatrix(norbs,m1,dist1,m2,dist2,World_Comm) 262 263 call timer("redist_orbs_fwd", 2) 264 265 if (PEXSI_worker .and. (pexsi_spin == ispin) ) then 266 267 nrows = m2%norbs ! or simply 'norbs' 268 numColLocal = m2%no_l 269 nnzLocal = m2%nnzl 270 call MPI_AllReduce(nnzLocal,nnz,1,MPI_integer,MPI_sum,PEXSI_Pole_Comm,ierr) 271 272 call re_alloc(colptrLocal,1,numColLocal+1,"colptrLocal","pexsi_ldos") 273 colptrLocal(1) = 1 274 do ih = 1,numColLocal 275 colptrLocal(ih+1) = colptrLocal(ih) + m2%numcols(ih) 276 enddo 277 278 rowindLocal => m2%cols 279 SnzvalLocal => m2%vals(1)%data 280 HnzvalLocal => m2%vals(2)%data 281 282 call re_alloc(DMnzvalLocal,1,nnzLocal,"DMnzvalLocal","pexsi_ldos") 283 284 call memory_all("after transferring H+S for PEXSI-LDOS",PEXSI_Pole_Comm) 285 286 endif ! PEXSI worker 287 enddo 288 289 ! Make these available to all 290 ! (Note that the values are those on process 0, which is in the spin=1 set 291 ! In fact, they are only needed for calls to the interface, so the broadcast 292 ! could be over PEXSI_Spatial_Comm only. 293 294 call MPI_Bcast(nrows,1,MPI_integer,0,World_Comm,ierr) 295 call MPI_Bcast(nnz,1,MPI_integer,0,World_Comm,ierr) 296 297 call memory_all("after setting up H+S for PEXSI LDOS",World_comm) 298 299 ! -- 300 isSIdentity = 0 301 ! 302 call f_ppexsi_set_default_options( options ) 303 ! Ordering flag: 304 ! 1: Use METIS 305 ! 0: Use PARMETIS/PTSCOTCH 306 options%ordering = fdf_get("PEXSI.ordering",1) 307 ! Number of processors for symbolic factorization 308 ! Only relevant for PARMETIS/PT_SCOTCH 309 options%npSymbFact = fdf_get("PEXSI.np-symbfact",1) 310 options%verbosity = fdf_get("PEXSI.verbosity",1) 311 call get_row_col(npPerPole,numProcRow,numProcCol) 312 313 ! Set the outputFileIndex to be the pole index. 314 ! Starting from PEXSI v0.8.0, the first processor for each pole outputs 315 ! information 316 317 if( mod( mpirank, npPerPole ) .eq. 0 ) then 318 outputFileIndex = mpirank / npPerPole; 319 else 320 outputFileIndex = -1; 321 endif 322 ! 323 ! Note that even though we only use one pole's worth of processors, we 324 ! still use the full spatial PEXSI communicator in the plan. 325 ! Failing to do so leads to an error. This is not sufficiently documented. 326 ! 327 plan = f_ppexsi_plan_initialize(& 328 PEXSI_Spatial_Comm,& 329 numProcRow,& 330 numProcCol,& 331 outputFileIndex,& 332 info) 333 334 call check_info(info,"plan_initialize in LDOS") 335 call f_ppexsi_load_real_symmetric_hs_matrix(& 336 plan,& 337 options,& 338 nrows,& 339 nnz,& 340 nnzLocal,& 341 numColLocal,& 342 colptrLocal,& 343 rowindLocal,& 344 HnzvalLocal,& 345 isSIdentity,& 346 SnzvalLocal,& 347 info) 348 349 call check_info(info,"load_real_sym_hs_matrix in LDOS") 350 351 call f_ppexsi_symbolic_factorize_complex_symmetric_matrix(& 352 plan, & 353 options,& 354 info) 355 call check_info(info,"factorize complex matrix in LDOS") 356 357 options%isSymbolicFactorize = 0 ! We do not need it anymore 358 if (PEXSI_worker) then 359 360 if(mpirank == 0) then 361 write(6,"(a,f16.5,f10.5)") & 362 'Calling PEXSI LDOS routine. Energy and broadening (eV) ', & 363 energy/eV, broadening/eV 364 write(6,"(a,i4)") & 365 'Processors working on selected inversion: ', npPerPole 366 endif 367 368 call timer("pexsi-ldos-selinv", 1) 369 370 ! Build AnzvalLocal as H-zS, where z=(E,broadening), and treat 371 ! it as a one-dimensional real array with 2*nnzlocal entries 372 373 call re_alloc(AnzvalLocal,1,2*nnzLocal,"AnzvalLocal","pexsi_ldos") 374 call re_alloc(AinvnzvalLocal,1,2*nnzLocal,"AinvnzvalLocal","pexsi_ldos") 375 376 loc = 1 377 do i = 1, nnzLocal 378 AnzvalLocal(loc) = Hnzvallocal(i) - energy*Snzvallocal(i) 379 AnzvalLocal(loc+1) = - broadening*Snzvallocal(i) 380 loc = loc + 2 381 enddo 382 383 call f_ppexsi_selinv_complex_symmetric_matrix(& 384 plan,& 385 options,& 386 AnzvalLocal,& 387 AinvnzvalLocal,& 388 info) 389 390 call check_info(info,"selinv complex matrix in LDOS") 391 392 ! Get DMnzvalLocal as 1/pi * Imag(Ainv...) 393 394 loc = 1 395 do i = 1, nnzLocal 396 DMnzvalLocal(i) = (1.0_dp/pi) * AinvnzvalLocal(loc+1) 397 loc = loc + 2 398 enddo 399 call de_alloc(AnzvalLocal,"AnzvalLocal","pexsi_ldos") 400 call de_alloc(AinvnzvalLocal,"AinvnzvalLocal","pexsi_ldos") 401 402 call timer("pexsi-ldos-selinv", 2) 403 ! 404 endif ! PEXSI_worker 405 406 do ispin = 1, nspin 407 408 m1 => m1_spin(ispin) 409 410 if (PEXSI_worker .and. (pexsi_spin == ispin)) then 411 ! Prepare m2 to transfer 412 413 call de_alloc(colPtrLocal,"colPtrLocal","pexsi_ldos") 414 415 call de_alloc(m2%vals(1)%data,"m2%vals(1)%data","pexsi_ldos") 416 call de_alloc(m2%vals(2)%data,"m2%vals(2)%data","pexsi_ldos") 417 418 deallocate(m2%vals) 419 allocate(m2%vals(1)) 420 m2%vals(1)%data => DMnzvalLocal(1:nnzLocal) 421 422 endif 423 424 ! Prepare m1 to receive the results 425 if (SIESTA_worker) then 426 nullify(m1%vals(1)%data) ! formerly pointing to S 427 nullify(m1%vals(2)%data) ! formerly pointing to H 428 deallocate(m1%vals) 429 nullify(m1%numcols) ! formerly pointing to numH 430 nullify(m1%cols) ! formerly pointing to listH 431 endif 432 433 call timer("redist_orbs_bck", 1) 434 dist2 => dist2_spin(ispin) 435 call redistribute_spmatrix(norbs,m2,dist2,m1,dist1,World_Comm) 436 call timer("redist_orbs_bck", 2) 437 438 if (PEXSI_worker .and. (pexsi_spin == ispin)) then 439 call de_alloc(DMnzvalLocal, "DMnzvalLocal", "pexsi_ldos") 440 441 nullify(m2%vals(1)%data) ! formerly pointing to DM 442 deallocate(m2%vals) 443 ! allocated in the direct transfer 444 call de_alloc(m2%numcols,"m2%numcols","pexsi_ldos") 445 call de_alloc(m2%cols, "m2%cols", "pexsi_ldos") 446 endif 447 448 if (SIESTA_worker) then 449 450 LDOS_DM(:,ispin) = m1%vals(1)%data(:) 451 ! Check no_l 452 if (no_l /= m1%no_l) then 453 call die("Mismatch in no_l") 454 endif 455 ! Check listH 456 if (any(listH(:) /= m1%cols(:))) then 457 call die("Mismatch in listH") 458 endif 459 460 call de_alloc(m1%vals(1)%data,"m1%vals(1)%data","pexsi_ldos") 461 deallocate(m1%vals) 462 ! allocated in the direct transfer 463 call de_alloc(m1%numcols,"m1%numcols","pexsi_ldos") 464 call de_alloc(m1%cols, "m1%cols", "pexsi_ldos") 465 466 endif 467 enddo 468 call timer("pexsi-ldos", 2) 469 470 471 call delete(dist1) 472 do ispin = 1, nspin 473 call delete(dist2_spin(ispin)) 474 enddo 475 deallocate(dist2_spin) 476 deallocate(m1_spin) 477 478 call MPI_Comm_Free(PEXSI_Spatial_Comm, ierr) 479 call MPI_Comm_Free(PEXSI_Spin_Comm, ierr) 480 call MPI_Comm_Free(World_Comm, ierr) 481 482 ! This communicator was created from a subgroup. 483 ! As such, it is MPI_COMM_NULL for those processes 484 ! not in the subgroup (non PEXSI_workers). Only 485 ! defined communicators can be freed 486 if (PEXSI_worker) then 487 call MPI_Comm_Free(PEXSI_Pole_Comm, ierr) 488 endif 489 490 ! We finalize the plan here 491 call f_ppexsi_plan_finalize( plan, info ) 492 493 call MPI_Group_Free(PEXSI_Spatial_Group, ierr) 494 call MPI_Group_Free(PEXSI_Pole_Group, ierr) 495 call MPI_Group_Free(World_Group, ierr) 496 497 CONTAINS 498 499 subroutine get_row_col(np,nrow,ncol) 500 integer, intent(in) :: np 501 integer, intent(out) :: nrow, ncol 502 ! 503 ! Finds the factors nrow and ncol such that nrow*ncol=np, 504 ! are as similar as possible, and nrow>=ncol. 505 ! For prime np, ncol=1, nrow=np. 506 507 ncol = floor(sqrt(dble(np))) 508 do 509 nrow = np/ncol 510 if (nrow*ncol == np) exit 511 ncol = ncol - 1 512 enddo 513 end subroutine get_row_col 514 515 subroutine check_info(info,str) 516 integer, intent(in) :: info 517 character(len=*), intent(in) :: str 518 519 if(mpirank == 0) then 520 if (info /= 0) then 521 write(6,*) trim(str) // " info : ", info 522 call die("Error exit from " // trim(str) // " routine") 523 endif 524 call pxfflush(6) 525 endif 526 end subroutine check_info 527 528 end subroutine get_LDOS_SI 529#endif 530 end module m_pexsi_local_dos 531! End of tangled code 532