1! 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt. 6! See Docs/Contributors.txt for a list of contributors. 7! 8module m_vmatsp 9 10 implicit none 11 12 private 13 public :: vmatsp 14 15contains 16 17 subroutine vmatsp( no, nml, nmpl, dvol, nspin, V, nvmax, & 18 numVs, listVsptr, listVs, Vs, & 19 nuo, nuotot, iaorb, iphorb, isa, q ) 20 21! ******************************************************************** 22! Finds the matrix elements of the potential. 23! First version written by P.Ordejon. 24! Name and interface modified by J.M.Soler. May'95. 25! Re-ordered so that mesh is the outer loop and the orbitals are 26! handled as lower-half triangular. J.D.Gale and J.M.Soler, Feb'99 27! Version of vmat that use a direct algorithm to save memory. 28! Modified by J.D.Gale, November'99 29! Spiral version written by V. M. Garcia-Suarez. June 2002. 30! Fixed for parallel runs by Nick Papior, June 2020. 31! *********************** INPUT ************************************** 32! integer no : Number of basis orbitals 33! integer nml : Local numer of mesh divisions in each cell vector 34! integer nmpl : Local numer of mesh divisions 35! real*8 dvol : Volume per mesh point 36! integer nspin : Number of spin components 37! real*4 V(nsp,nmpl,nspin): Value of the potential at the mesh points 38! integer nvmax : First dimension of listV and Vs, and maxim. 39! number of nonzero elements in any row of Vs 40! integer numVs(nuo) : Number of non-zero elements in a row of Vs 41! integer listVsptr(nuo) : Pointer to the start of rows in listVs 42! integer listVs(nvmax) : List of non-zero elements of Vs 43! integer iaorb(*) : Pointer to atom to which orbital belongs 44! integer iphorb(*) : Orbital index within each atom 45! integer isa(*) : Species index of all atoms 46! real*8 q(3) : Wave vector for spiral configuration. 47! ******************** INPUT AND OUTPUT ******************************* 48! real*8 Vs(nvmax,nspin) : Value of nonzero elements in each row 49! of Vs to which the potential matrix 50! elements are summed up 51! ********************************************************************* 52 53! Modules 54 use precision, only: dp, grid_p 55 use parallel, only: Nodes 56 use atmfuncs, only: rcut, all_phi 57 use atm_types, only: nsmax=>nspecies 58 use atomlist, only: indxuo 59 use listsc_module, only: listsc 60 use mesh, only: dxa, nsp, xdop, xdsp, cmesh, idop, nsm, meshLim 61 use meshdscf 62 use meshphi 63 use alloc, only: re_alloc, de_alloc 64 65! Argument types and dimensions 66 integer :: no, nml(3), nmpl, nvmax, nuo, nuotot, iaorb(*), nspin, & 67 iphorb(*), isa(*), numVs(nuo), listVsptr(nuo), listVs(nvmax) 68 real(grid_p), intent(in) :: V(nsp,nmpl,nspin) 69 real(dp) :: dvol, Vs(nvmax,nspin), q(3) 70 71! Internal variables and arrays 72 integer, parameter :: minloc = 1000 ! Min buffer size for local copy of Dscf 73 integer, parameter :: maxoa = 100 ! Max # of orbitals per atom 74 integer, parameter :: nsd = 2 ! spin-box size 75 integer :: & 76 i, ia, ic, ii, ijl, il, imp, ind, iop, ip, iphi, io, & 77 is, isp, ispin, iu, iul, ix, j, jc, jl, triang, & 78 last, lasta, lastop, maxloc, maxloc2, nc, nlocal, & 79 nphiloc, nvmaxl, iii(3) 80 81 logical :: ParallelLocal 82 real(dp) :: Vij(4), dxsp(3), & 83 r2cut(nsmax), r2sp, xr(3), Rdi, qRdi, cqRdi, sqRdi 84 85 integer, pointer :: ilc(:) => null() 86 integer, pointer :: ilocal(:) => null() 87 integer, pointer :: iorb(:) => null() 88 real(dp), pointer :: phia(:,:) => null() 89 real(dp), pointer :: VClocal(:,:) => null() 90 real(dp), pointer :: Clocal(:,:) => null() 91 real(dp), pointer :: Vlocal(:,:) => null() 92 real(dp), pointer :: VlocalSp(:,:,:) => null() 93 94 ! Start time counter 95 call timer('vmatsp',1) 96 97 ! Find atomic cutoff radiae 98 r2cut(:) = 0.0_dp 99 do i = 1,nuotot 100 ia = iaorb(i) 101 is = isa(ia) 102 io = iphorb(i) 103 r2cut(is) = max( r2cut(is), rcut(is,io)**2 ) 104 end do 105 106 ! Set algorithm logical 107 ParallelLocal = (Nodes > 1) 108 109 ! Find value of maxloc 110 maxloc2 = maxval(endpht(1:nmpl)-endpht(0:nmpl-1)) 111 maxloc = maxloc2 + minloc 112 maxloc = min( maxloc, no ) 113 triang = (maxloc+1)*(maxloc+2)/2 114 if ( ParallelLocal ) then 115 if ( nrowsDscfL > 0 ) then 116 nvmaxl = listdlptr(nrowsDscfL) + numdl(nrowsDscfL) 117 else 118 nvmaxl = 1 119 end if 120 end if 121 122 ! Allocate local memory 123 nullify(ilocal, ilc, iorb, phia, Vlocal, VlocalSp, Clocal, VClocal, DscfL) 124 125 call re_alloc( ilocal, 1, no, 'ilocal', 'vmatsp' ) 126 call re_alloc( ilc, 1, maxloc2, 'ilc', 'vmatsp' ) 127 call re_alloc( iorb, 0, maxloc, 'iorb', 'vmatsp' ) 128 129 call re_alloc( Vlocal, 1, nsd, 1, triang, 'Vlocal', 'vmatsp' ) 130 call re_alloc( VlocalSp, 1, nsd, 0, maxloc, 0, maxloc, 'VlocalSp', 'vmatsp' ) 131 call re_alloc( Clocal, 1, nsp, 1, maxloc2, 'Clocal', 'vmatsp' ) 132 call re_alloc( VClocal, 1, nspin, 1, nsp, 'VClocal', 'vmatsp' ) 133 if ( DirectPhi ) then 134 call re_alloc(phia, 1, maxoa, 1, nsp, "phia", "vmatsp") 135 end if 136 if ( ParallelLocal ) then 137 call re_alloc( DscfL, 1, nvmaxl, 1, nspin, 'DscfL', 'vmatsp' ) 138 DscfL(1:nvmaxl,1:nspin) = 0.0_dp 139 end if 140 141 ! Full initializations done only once 142 ilocal(1:no) = 0 143 iorb(0:maxloc) = 0 144 Vlocal(:,:) = 0.0_dp 145 VlocalSp(:,:,:) = 0.0_dp 146 last = 0 147 148 ! Loop over (big) grid points 149 do ip = 1 , nmpl 150 151 ! Find point coordinates 152 call ipack(-1,3,nml,iii,ip) 153 ! Correct for processor offset 154 iii(:) = iii(:) + meshLim(1,:) - 1 155 do ix = 1,3 156 xr(ix) = iii(1)*cmesh(ix,1) + iii(2)*cmesh(ix,2) + & 157 iii(3)*cmesh(ix,3) 158 end do 159 160 ! Find number of nonzero orbitals at this point 161 nc = endpht(ip) - endpht(ip-1) 162 163 ! Find new required size of Vlocal 164 nlocal = last 165 do ic = 1,nc 166 imp = endpht(ip-1) + ic 167 i = lstpht(imp) 168 if ( ilocal(i) == 0 ) nlocal = nlocal + 1 169 end do 170 171 ! If overflooded, add Vlocal to Vs and reinitialize it 172 if ( nlocal > maxloc .and. last > 0 ) then 173 if ( ParallelLocal ) then 174 do il = 1,last 175 i = iorb(il) 176 iu = indxuo(i) 177 iul = NeedDscfL(iu) 178 if ( i == iu ) then 179 do ii = 1, numdl(iul) 180 ind = listdlptr(iul) + ii 181 j = listdl(ind) 182 jl = ilocal(j) 183 ijl = idx_ijl(il,jl) 184 do ispin = 1, nsd 185 DscfL(ind,ispin) = DscfL(ind,ispin) + dVol * Vlocal(ispin,ijl) 186 DscfL(ind,ispin+2) = DscfL(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il) 187 end do 188 end do 189 else 190 do ii = 1, numdl(iul) 191 ind = listdlptr(iul) + ii 192 j = LISTSC( i, iu, listdl(ind) ) 193 jl = ilocal(j) 194 ijl = idx_ijl(il, jl) 195 do ispin = 1,nsd 196 DscfL(ind,ispin) = DscfL(ind,ispin) + dVol * Vlocal(ispin,ijl) 197 DscfL(ind,ispin+2) = DscfL(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il) 198 end do 199 end do 200 end if 201 end do 202 else ! ParallelLocal 203 do il = 1,last 204 i = iorb(il) 205 iu = indxuo(i) 206 !iul = iu ! Nodes == 1 207 if ( i == iu ) then 208 do ii = 1, numVs(iu) 209 ind = listVsptr(iu) + ii 210 j = listVs(ind) 211 jl = ilocal(j) 212 ijl = idx_ijl(il, jl) 213 do ispin = 1, nsd 214 Vs(ind,ispin) = Vs(ind,ispin) + dVol * Vlocal(ispin,ijl) 215 Vs(ind,ispin+2) = Vs(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il) 216 end do 217 end do 218 else 219 do ii = 1, numVs(iu) 220 ind = listVsptr(iu) + ii 221 j = LISTSC( i, iu, listVs(ind) ) 222 jl = ilocal(j) 223 ijl = idx_ijl(il, jl) 224 do ispin = 1,nsd 225 Vs(ind,ispin) = Vs(ind,ispin) + dVol * Vlocal(ispin,ijl) 226 Vs(ind,ispin+2) = Vs(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il) 227 end do 228 end do 229 end if 230 end do 231 end if 232 ! Reset local arrays 233 do i = 1, last 234 ilocal(iorb(i)) = 0 235 iorb(i) = 0 236 end do 237 ijl = (last+1)*(last+2)/2 238 Vlocal(1:nsd,1:ijl) = 0.0_dp 239 VlocalSp(1:nsd,0:last,0:last) = 0.0_dp 240 last = 0 241 endif 242 243 ! Look for required orbitals not yet in Vlocal 244 if (nlocal > last) then 245 do ic = 1, nc 246 imp = endpht(ip-1) + ic 247 i = lstpht(imp) 248 if ( ilocal(i) == 0 ) then 249 last = last + 1 250 ilocal(i) = last 251 iorb(last) = i 252 end if 253 end do 254 end if 255 256 if ( DirectPhi ) then 257 lasta = 0 258 lastop = 0 259 260 ! Generate or retrieve phi values for all orbitals up to nc 261 do ic = 1,nc 262 imp = endpht(ip-1) + ic 263 i = lstpht(imp) 264 il = ilocal(i) 265 ia = iaorb(i) 266 iop = listp2(imp) 267 ilc(ic) = il 268 269 if ( ia /= lasta .or. iop /= lastop ) then 270 lasta = ia 271 lastop = iop 272 is = isa(ia) 273 do isp = 1,nsp 274 do ix = 1,3 275 dxsp(ix) = xdsp(ix,isp) + xdop(ix,iop) - dxa(ix,ia) 276 enddo 277 r2sp = sum(dxsp**2) 278 if ( r2sp < r2cut(is) ) then 279 call all_phi( is, +1, dxsp, maxoa, nphiloc, phia(:,isp) ) 280 else 281 phia(:,isp) = 0.0_dp 282 end if 283 end do 284 end if 285 iphi = iphorb(i) 286 do isp = 1,nsp 287 Clocal(isp,ic) = phia(iphi,isp) 288 end do 289 290 end do 291 292 else ! DirectPhi 293 ! Generate or retrieve phi values for all orbitals up to nc 294 do ic = 1,nc 295 imp = endpht(ip-1) + ic 296 il = ilocal(lstpht(imp)) 297 ilc(ic) = il 298 299 Clocal(:,ic) = phi(:,imp) 300 end do 301 302 end if 303 304 ! Loop on first orbital of mesh point 305 do ic = 1,nc 306 imp = endpht(ip-1) + ic 307 i = lstpht(imp) 308 il = ilocal(i) 309 iu = indxuo(i) 310 ia = iaorb(i) 311 is = isa(ia) 312 iop = listp2(imp) 313 314 ! Pre-multiply V and Clocal(,ic) 315 do isp = 1,nsp 316 ! Calculate spiral phase 317 qRdi = 0._dp 318 do ix = 1, 3 319 Rdi = xr(ix) + xdsp(ix,isp) - xdop(ix,iop) + dxa(ix,ia) 320 qRdi = qRdi + q(ix) * Rdi 321 end do 322 cqRdi = cos(qRdi) 323 sqRdi = sin(qRdi) 324 325 VClocal(1,isp) = V(isp,ip,1) * Clocal(isp,ic) 326 VClocal(2,isp) = V(isp,ip,2) * Clocal(isp,ic) 327 VClocal(3,isp) = (V(isp,ip,3)*cqRdi - V(isp,ip,4)*sqRdi) * Clocal(isp,ic) 328 VClocal(4,isp) = (V(isp,ip,4)*cqRdi + V(isp,ip,3)*sqRdi) * Clocal(isp,ic) 329 enddo 330 331 ! Loop on second orbital of mesh point (NOT only for jc.le.ic) 332 do jc = 1,nc 333 jl = ilc(jc) 334 335 ! Loop over sub-points 336 Vij(:) = 0.0_dp 337 do isp = 1,nsp 338 Vij(:) = Vij(:) + VClocal(:,isp) * Clocal(isp,jc) 339 end do 340 341 ijl = idx_ijl(il, jl) 342 343 ! Here we need the spiral contribution for jc > ic 344 ! So an explicit check is required 345 if ( jc == ic ) then 346 Vlocal(1,ijl) = Vlocal(1,ijl) + Vij(1) 347 Vlocal(2,ijl) = Vlocal(2,ijl) + Vij(2) 348 else if ( jc < ic ) then 349 if ( il == jl ) then 350 Vlocal(1,ijl) = Vlocal(1,ijl) + 2.0_dp*Vij(1) 351 Vlocal(2,ijl) = Vlocal(2,ijl) + 2.0_dp*Vij(2) 352 else 353 Vlocal(1,ijl) = Vlocal(1,ijl) + Vij(1) 354 Vlocal(2,ijl) = Vlocal(2,ijl) + Vij(2) 355 end if 356 end if 357 358 VlocalSp(1,jl,il) = VlocalSp(1,jl,il) + Vij(3) 359 VlocalSp(2,jl,il) = VlocalSp(2,jl,il) + Vij(4) 360 361 end do 362 363 end do 364 end do 365 366 ! Add final Vlocal to Vs 367 if ( ParallelLocal .and. last > 0 ) then 368 369 do il = 1,last 370 i = iorb(il) 371 iu = indxuo(i) 372 iul = NeedDscfL(iu) 373 if ( i == iu ) then 374 do ii = 1, numdl(iul) 375 ind = listdlptr(iul) + ii 376 j = listdl(ind) 377 jl = ilocal(j) 378 ijl = idx_ijl(il, jl) 379 do ispin = 1, nsd 380 DscfL(ind,ispin) = DscfL(ind,ispin) + dVol * Vlocal(ispin,ijl) 381 DscfL(ind,ispin+2) = DscfL(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il) 382 end do 383 end do 384 else 385 do ii = 1, numdl(iul) 386 ind = listdlptr(iul)+ii 387 j = LISTSC( i, iu, listdl(ind) ) 388 jl = ilocal(j) 389 ijl = idx_ijl(il, jl) 390 do ispin = 1, nsd 391 DscfL(ind,ispin) = DscfL(ind,ispin) + dVol * Vlocal(ispin,ijl) 392 DscfL(ind,ispin+2) = DscfL(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il) 393 end do 394 end do 395 end if 396 end do 397 398 else if ( last > 0 ) then ! ParallelLocal 399 400 do il = 1 , last 401 i = iorb(il) 402 iu = indxuo(i) 403 if ( i == iu ) then 404 do ii = 1, numVs(iu) 405 ind = listVsptr(iu) + ii 406 j = listVs(ind) 407 jl = ilocal(j) 408 ijl = idx_ijl(il, jl) 409 do ispin = 1 , nsd 410 Vs(ind,ispin) = Vs(ind,ispin) + dVol * Vlocal(ispin,ijl) 411 Vs(ind,ispin+2) = Vs(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il) 412 end do 413 end do 414 else 415 do ii = 1, numVs(iu) 416 ind = listVsptr(iu) + ii 417 j = LISTSC( i, iu, listVs(ind) ) 418 jl = ilocal(j) 419 ijl = idx_ijl(il, jl) 420 do ispin = 1,nsd 421 Vs(ind,ispin) = Vs(ind,ispin) + dVol * Vlocal(ispin,ijl) 422 Vs(ind,ispin+2) = Vs(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il) 423 end do 424 end do 425 end if 426 end do 427 end if 428 429 ! Free local memory 430 call de_alloc( ilocal, 'ilocal', 'vmatsp' ) 431 call de_alloc( ilc, 'ilc', 'vmatsp' ) 432 call de_alloc( iorb, 'iorb', 'vmatsp' ) 433 call de_alloc( Vlocal, 'Vlocal', 'vmatsp' ) 434 call de_alloc( VlocalSp, 'VlocalSp', 'vmatsp' ) 435 call de_alloc( Clocal, 'Clocal', 'vmatsp' ) 436 call de_alloc( VClocal, 'VClocal', 'vmatsp' ) 437 if ( DirectPhi ) then 438 call de_alloc( phia, 'phia', 'vmatsp' ) 439 end if 440 441 if ( ParallelLocal ) then 442 ! Redistribute Hamiltonian from mesh to orbital based distribution 443 call matrixMtoO( nvmaxl, nvmax, numVs, listVsptr, nuo, & 444 nspin, DscfL, Vs) 445 ! Free memory 446 call de_alloc( DscfL, 'DscfL', 'vmatsp' ) 447 endif 448 449 call timer('vmatsp',2) 450 451 contains 452 453 ! In any case will the compiler most likely inline this 454 ! small routine. So it should not pose any problem. 455 pure function idx_ijl(i,j) result(ij) 456 integer, intent(in) :: i,j 457 integer :: ij 458 if ( i > j ) then 459 ij = i * (i + 1)/2 + j + 1 460 else 461 ij = j * (j + 1)/2 + i + 1 462 end if 463 end function idx_ijl 464 465 end subroutine vmatsp 466 467end module m_vmatsp 468