1! 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt. 6! See Docs/Contributors.txt for a list of contributors. 7! 8module m_rhoofd 9 10 implicit none 11 12 private 13 public :: rhoofd 14 15contains 16 17 18subroutine rhoofd( no, np, maxnd, numd, listdptr, listd, nspin, & 19 Dscf, rhoscf, nuo, nuotot, iaorb, iphorb, isa ) 20! ******************************************************************** 21! Finds the SCF density at the mesh points from the density matrix. 22! Written by P.Ordejon and J.M.Soler. May'95. 23! Re-ordered so that mesh is the outer loop and the orbitals are 24! handled as lower-half triangular. J.D.Gale and J.M.Soler, Feb'99 25! Version of rhoofd that optionally uses a direct algorithm to save 26! memory. Modified by J.D.Gale, November'99 27! *********************** InpUT ************************************** 28! integer no : Number of basis orbitals 29! integer np : Number of mesh points 30! integer maxnd : First dimension of listD and Dscf, and 31! maximum number of nonzero elements in 32! any row of Dscf 33! integer numd(nuo) : Number of nonzero elemts in each row of Dscf 34! integer listdptr(nuo) : Pointer to start of rows in listd 35! integer listd(maxnd) : List of nonzero elements in each row of Dscf 36! integer nspin : Number of spin components 37! real*8 Dscf(maxnd) : Rows of Dscf that are non-zero 38! integer nuo : Number of orbitals in unit cell locally 39! integer nuotot : Number of orbitals in unit cell in total 40! integer iaorb(*) : Pointer to atom to which orbital belongs 41! integer iphorb(*) : Orbital index within each atom 42! integer isa(*) : Species index of all atoms 43! *********************** OUTPUT ************************************** 44! real rhoscf(nsp,np) : SCF density at mesh points 45! ********************************************************************* 46 47! Modules 48 49 use precision, only: dp, grid_p 50 use atmfuncs, only: rcut, all_phi 51 use atm_types, only: nsmax=>nspecies 52 use atomlist, only: indxuo 53 use m_spin, only: SpOrb 54 use listsc_module, only: LISTSC 55 use mesh, only: nsp, dxa, xdop, xdsp, meshLim 56 use meshdscf, only: matrixOtoM 57 use meshdscf, only: nrowsDscfL, listdl, listdlptr, NeedDscfL, & 58 numdl, DscfL 59 use meshphi, only: DirectPhi, endpht, lstpht, listp2, phi 60 use parallel, only: Nodes, node 61 use sys, only: die 62 use alloc, only: re_alloc, de_alloc 63 use parallelsubs, only: GlobalToLocalOrb 64#ifdef MPI 65 use mpi_siesta 66#endif 67 implicit none 68 69! Argument types and dimensions 70 integer, intent(in) :: no, np, nuo, maxnd, numd(nuo), nspin, & 71 nuotot, iaorb(*), iphorb(*), & 72 isa(*), listdptr(nuo), listd(maxnd) 73 real(dp), intent(in) :: Dscf(:,:) 74 real(grid_p), intent(out) :: rhoscf(nsp,np,nspin) 75 external :: memory, timer 76! Internal variables and arrays 77 integer, parameter :: minloc = 1000 ! Min buffer size for local copy of Dscf 78 integer, parameter :: maxoa = 100 ! Max # of orbitals per atom 79 logical :: ParallelLocal 80 integer :: i, ia, ic, ii, ijl, il, imp, ind 81 integer :: ispin, io, iop, ip, iphi, is 82 integer :: isp, iu, iul, j, jc, last, lasta 83 integer :: lastop, maxloc, maxloc2, triang, nc 84 integer :: maxndl, nphiloc, lenx, leny, lenxy, lenz 85 86 ! Total hamiltonian size 87 integer :: h_spin_dim 88 89 real(dp) :: r2sp, dxsp(3) 90 integer, pointer :: ilc(:), ilocal(:), iorb(:) 91 real(dp), pointer :: r2cut(:), Clocal(:,:), Dlocal(:,:), phia(:,:) 92#ifdef _TRACE_ 93 integer :: MPIerror 94#endif 95 96#ifdef DEBUG 97 call write_debug( ' PRE rhoofd' ) 98#endif 99#ifdef _TRACE_ 100 call MPI_Barrier( MPI_Comm_World, MPIerror ) 101 call MPItrace_event( 1000, 1 ) 102#endif 103! Start time counter 104 call timer('rhoofd',1) 105 106 ! Get spin-size 107 h_spin_dim = size(Dscf, 2) 108 109! Set algorithm logical 110 ParallelLocal = (Nodes > 1) 111 112 if (ParallelLocal) then 113 if (nrowsDscfL > 0) then 114 maxndl = listdlptr(nrowsDscfL) + numdl(nrowsDscfL) 115 else 116 maxndl = 1 117 end if 118 nullify(DscfL) 119 call re_alloc( DscfL, 1, maxndl, 1, h_spin_dim, 'DscfL', 'rhoofd' ) 120! Redistribute Dscf to DscfL form 121 call matrixOtoM( maxnd, numd, listdptr, maxndl, nuo, & 122 h_spin_dim, Dscf, DscfL ) 123 end if 124 125! Find atomic cutoff radii 126 nullify(r2cut) 127 call re_alloc( r2cut, 1, nsmax, 'r2cut', 'rhoofd' ) 128 r2cut = 0.0_dp 129 do i = 1,nuotot 130 ia = iaorb(i) 131 is = isa(ia) 132 io = iphorb(i) 133 r2cut(is) = max( r2cut(is), rcut(is,io)**2 ) 134 end do 135 136! Find size of buffers to store partial copies of Dscf and C 137 maxloc2 = maxval(endpht(1:np)-endpht(0:np-1)) 138 maxloc = maxloc2 + minloc 139 maxloc = min( maxloc, no ) 140 triang = (maxloc+1)*(maxloc+2)/2 141 142 lenx = meshLim(2,1) - meshLim(1,1) + 1 143 leny = meshLim(2,2) - meshLim(1,2) + 1 144 lenz = meshLim(2,3) - meshLim(1,3) + 1 145 lenxy = lenx*leny 146 147!$OMP parallel default(shared), & 148!$OMP&shared(rhoscf), & 149!$OMP&private(ilocal,ilc,iorb,Dlocal,Clocal,phia), & 150!$OMP&private(ip,nc,ic,imp,i,il,last,j,iu,iul,ii,ind,io), & 151!$OMP&private(ijl,ispin,lasta,lastop,ia,is,iop,isp,iphi), & 152!$OMP&private(jc,nphiloc,dxsp,r2sp) 153 154! Allocate local memory 155 nullify ( ilocal, ilc, iorb, Dlocal, Clocal, phia ) 156!$OMP critical 157 allocate( ilocal(no), ilc(maxloc2), iorb(maxloc) ) 158 allocate( Dlocal(triang,nspin), Clocal(nsp,maxloc2) ) 159 if ( DirectPhi ) allocate( phia(maxoa,nsp) ) 160!$OMP end critical 161 162! Initializations 163 Dlocal(:,:) = 0.0_dp 164 ilocal(:) = 0 165 iorb(:) = 0 166 167 last = 0 168 169!$OMP do 170 do ip = 1,np 171 172! Initializations 173 rhoscf(:,ip,:) = 0.0_grid_p 174 175! Find number of nonzero orbitals at this point 176 nc = endpht(ip) - endpht(ip-1) 177! iorb(il)>0 means that row il of Dlocal must not be overwritten 178! iorb(il)=0 means that row il of Dlocal is empty 179! iorb(il)<0 means that row il of Dlocal contains a valid row of 180! Dscf, but which is not required at this point 181 do ic = 1,nc 182 imp = endpht(ip-1) + ic 183 i = lstpht(imp) 184 il = ilocal(i) 185 if (il > 0) iorb(il) = i 186 end do 187 188! Look for required rows of Dscf not yet stored in Dlocal 189 do ic = 1,nc 190 imp = endpht(ip-1) + ic 191 i = lstpht(imp) 192 if (ilocal(i) == 0) then 193! Look for an available row in Dlocal 194 do il = 1,maxloc 195! last runs circularly over rows of Dlocal 196 last = last + 1 197 if (last > maxloc) last = 1 198 if (iorb(last) <= 0) goto 10 199 end do 200 call die('rhoofd: no slot available in Dlocal') 20110 continue 202! Copy row i of Dscf into row last of Dlocal 203 j = abs(iorb(last)) 204 if (j /= 0) ilocal(j) = 0 205 ilocal(i) = last 206 iorb(last) = i 207 il = last 208 iu = indxuo(i) 209 if ( ParallelLocal ) then 210 iul = NeedDscfL(iu) 211 if ( i == iu ) then 212 do ii = 1, numdl(iul) 213 ind = listdlptr(iul) + ii 214 j = listdl(ind) 215 ijl = idx_ijl(il,ilocal(j)) 216 if ( SpOrb ) then 217 Dlocal(ijl,1) = DscfL(ind,1) 218 Dlocal(ijl,2) = DscfL(ind,2) 219 Dlocal(ijl,3) = 0.5*(DscfL(ind,3)+DscfL(ind,7)) 220 Dlocal(ijl,4) = 0.5*(DscfL(ind,4)+DscfL(ind,8)) 221 else 222 Dlocal(ijl,:) = DscfL(ind,:) 223 end if 224 end do 225 else 226 do ii = 1, numdl(iul) 227 ind = listdlptr(iul)+ii 228 j = LISTSC( i, iu, listdl(ind) ) 229 ijl = idx_ijl(il,ilocal(j)) 230 if ( SpOrb ) then 231 Dlocal(ijl,1) = DscfL(ind,1) 232 Dlocal(ijl,2) = DscfL(ind,2) 233 Dlocal(ijl,3) = 0.5*(DscfL(ind,3)+DscfL(ind,7)) 234 Dlocal(ijl,4) = 0.5*(DscfL(ind,4)+DscfL(ind,8)) 235 else 236 Dlocal(ijl,:) = DscfL(ind,:) 237 end if 238 end do 239 end if 240 else 241 call GlobalToLocalOrb( iu, Node, Nodes, iul ) 242 if ( i == iu ) then 243 do ii = 1, numd(iul) 244 ind = listdptr(iul)+ii 245 j = listd(ind) 246 ijl = idx_ijl(il,ilocal(j)) 247 if ( SpOrb ) then 248 Dlocal(ijl,1) = Dscf(ind,1) 249 Dlocal(ijl,2) = Dscf(ind,2) 250 Dlocal(ijl,3) = 0.5*(Dscf(ind,3)+Dscf(ind,7)) 251 Dlocal(ijl,4) = 0.5*(Dscf(ind,4)+Dscf(ind,8)) 252 else 253 Dlocal(ijl,:) = Dscf(ind,:) 254 end if 255 end do 256 else 257 do ii = 1, numd(iul) 258 ind = listdptr(iul)+ii 259 j = LISTSC( i, iu, listd(ind) ) 260 ijl = idx_ijl(il,ilocal(j)) 261 if ( SpOrb ) then 262 Dlocal(ijl,1) = Dscf(ind,1) 263 Dlocal(ijl,2) = Dscf(ind,2) 264 Dlocal(ijl,3) = 0.5*(Dscf(ind,3)+Dscf(ind,7)) 265 Dlocal(ijl,4) = 0.5*(Dscf(ind,4)+Dscf(ind,8)) 266 else 267 Dlocal(ijl,:) = Dscf(ind,:) 268 end if 269 end do 270 end if 271 end if 272 end if 273 end do 274 275! Check algorithm 276 if ( DirectPhi ) then 277 lasta = 0 278 lastop = 0 279 do ic = 1,nc 280 imp = endpht(ip-1) + ic 281 i = lstpht(imp) 282 il = ilocal(i) 283 ia = iaorb(i) 284 iop = listp2(imp) 285 ilc(ic) = il 286 287! Generate or retrieve phi values 288 if ( ia /= lasta .or. iop /= lastop ) then 289 lasta = ia 290 lastop = iop 291 is = isa(ia) 292 do isp = 1 , nsp 293 dxsp(:) = xdsp(:,isp) + xdop(:,iop) - dxa(:,ia) 294 r2sp = sum(dxsp**2) 295 if ( r2sp < r2cut(is) ) then 296!$OMP critical 297 call all_phi( is, +1, dxsp, maxoa, nphiloc, phia(:,isp) ) 298!$OMP end critical 299 else 300 phia(:,isp) = 0.0_dp 301 end if 302 end do 303 end if 304 iphi = iphorb(i) 305 306! Retrieve phi values 307 Clocal(:,ic) = dsqrt(2._dp) * phia(iphi,:) 308 309! Loop on second orbital of mesh point 310 do jc = 1, ic - 1 311 ijl = idx_ijl(il,ilc(jc)) 312 313 do ispin = 1,nspin 314! Loop over sub-points 315 do isp = 1,nsp 316 rhoscf(isp,ip,ispin) = rhoscf(isp,ip,ispin) + & 317 Dlocal(ijl,ispin) * Clocal(isp,ic) * Clocal(isp,jc) 318 end do 319 end do 320 321 end do 322 323! ilc(ic) == il 324 ijl = idx_ijl(il,ilc(ic)) 325 326 do ispin = 1,nspin 327! Loop over sub-points 328 do isp = 1,nsp 329 rhoscf(isp,ip,ispin) = rhoscf(isp,ip,ispin) + & 330 Dlocal(ijl,ispin) * 0.5_dp * Clocal(isp,ic) ** 2 331 end do 332 333 end do 334 335 end do 336 337 else 338 339! Store values 340 do ic = 1 , nc 341 imp = endpht(ip-1) + ic 342 il = ilocal(lstpht(imp)) 343 ilc(ic) = il 344 345! Retrieve phi values 346 Clocal(:,ic) = dsqrt(2._dp) * phi(:,imp) 347 348! Loop on second orbital of mesh point 349 do jc = 1, ic - 1 350 ijl = idx_ijl(il,ilc(jc)) 351 352 do ispin = 1,nspin 353! Loop over sub-points 354 do isp = 1,nsp 355 rhoscf(isp,ip,ispin) = rhoscf(isp,ip,ispin) + & 356 Dlocal(ijl,ispin) * Clocal(isp,ic) * Clocal(isp,jc) 357 end do 358 end do 359 360 end do 361 362! ilc(ic) == il 363 ijl = idx_ijl(il,ilc(ic)) 364 365 do ispin = 1,nspin 366! Loop over sub-points 367 do isp = 1,nsp 368 rhoscf(isp,ip,ispin) = rhoscf(isp,ip,ispin) + & 369 Dlocal(ijl,ispin) * 0.5_dp * Clocal(isp,ic) ** 2 370 end do 371 372 end do 373 374 end do 375 376 end if 377 378! Restore iorb for next point 379 do imp = 1+endpht(ip-1), endpht(ip) 380 i = lstpht(imp) 381 il = ilocal(i) 382 iorb(il) = -i 383 end do 384 385 end do 386!$OMP end do 387 388! Free local memory 389!$OMP critical 390 deallocate( ilocal, ilc, iorb, Dlocal, Clocal ) 391 if ( DirectPhi ) deallocate( phia ) 392!$OMP end critical 393 394!$OMP end parallel 395 396 call de_alloc( r2cut, 'r2cut', 'rhoofd' ) 397 if (ParallelLocal) then 398 call de_alloc( DscfL, 'DscfL', 'rhoofd' ) 399 end if 400 401#ifdef _TRACE_ 402 call MPI_Barrier( MPI_Comm_World, MPIerror ) 403 call MPItrace_event( 1000, 0 ) 404#endif 405 call timer('rhoofd',2) 406 407#ifdef DEBUG 408 call write_debug( ' POS rhoofd' ) 409#endif 410 411contains 412 413 ! In any case will the compiler most likely inline this 414 ! small routine. So it should not pose any problem. 415 pure function idx_ijl(i,j) result(ij) 416 integer, intent(in) :: i,j 417 integer :: ij 418 if ( i > j ) then 419 ij = i * (i + 1)/2 + j + 1 420 else 421 ij = j * (j + 1)/2 + i + 1 422 end if 423 end function idx_ijl 424 425end subroutine rhoofd 426 427end module m_rhoofd 428