1! 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt. 6! See Docs/Contributors.txt for a list of contributors. 7! 8module m_vmat 9 10 implicit none 11 12 private 13 public :: vmat 14 15contains 16 17 subroutine vmat( no, np, dvol, spin, V, nvmax, & 18 numVs, listVsptr, listVs, Vs, & 19 nuo, nuotot, iaorb, iphorb, isa ) 20 21! ******************************************************************** 22! Finds the matrix elements of the potential. 23! First version written by P.Ordejon. 24! Name and interface modified by J.M.Soler. May'95. 25! Re-ordered so that mesh is the outer loop and the orbitals are 26! handled as lower-half triangular. J.D.Gale and J.M.Soler, Feb'99 27! Version of vmat that use a direct algorithm to save memory. 28! Modified by J.D.Gale, November'99 29! *********************** INPUT ************************************** 30! integer no : Number of basis orbitals 31! integer np : Number of columns in C (local) 32! real*8 dvol : Volume per mesh point 33! type(tSpin) spin : Spin configuration 34! real*4 V(np,spin%Grid) : Value of the potential at the mesh points 35! integer nvmax : First dimension of listV and Vs, and maxim. 36! number of nonzero elements in any row of Vs 37! integer numVs(nuo) : Number of non-zero elements in a row of Vs 38! integer listVsptr(nuo) : Pointer to the start of rows in listVs 39! integer listVs(nvmax) : List of non-zero elements of Vs 40! integer iaorb(*) : Pointer to atom to which orbital belongs 41! integer iphorb(*) : Orbital index within each atom 42! integer isa(*) : Species index of all atoms 43! ******************** INPUT AND OUTPUT ******************************* 44! real*8 Vs(nvmax,spin%H): Value of nonzero elements in each row 45! of Vs to which the potential matrix 46! elements are summed up 47! ********************************************************************* 48 49! Modules 50 use precision, only: dp, grid_p 51 use atmfuncs, only: rcut, all_phi 52 use atm_types, only: nsmax=>nspecies 53 use atomlist, only: indxuo 54 use t_spin, only: tSpin 55 use listsc_module, only: LISTSC 56 use mesh, only: dxa, nsp, xdop, xdsp, meshLim 57 use meshdscf, only: matrixMtoO 58 use meshdscf, only: needdscfl, listdl, numdl, nrowsdscfl, listdlptr 59 use meshphi, only: directphi, endpht, lstpht, listp2, phi 60 use parallel, only: Nodes, Node 61 use alloc, only: re_alloc, de_alloc 62 use parallelsubs, only: GlobalToLocalOrb 63#ifdef MPI 64 use mpi_siesta 65#endif 66#ifdef _OPENMP 67 use omp_lib 68#endif 69 70! Argument types and dimensions 71 integer :: no, np, nvmax, nuo, nuotot, iaorb(*) 72 type(tSpin), intent(in) :: spin 73 integer :: iphorb(*), isa(*), numVs(nuo) 74 integer :: listVsptr(nuo), listVs(nvmax) 75 real(grid_p), intent(in) :: V(nsp,np,spin%Grid) 76 real(dp) :: dvol 77 real(dp), target :: Vs(nvmax,spin%H) 78! Internal variables and arrays 79 integer, parameter :: minloc = 1000 ! Min buffer size 80 integer, parameter :: maxoa = 100 ! Max # of orb/atom 81 integer :: i, ia, ic, ii, ijl, il, imp, ind, iop 82 integer :: ip, iphi, io, is, isp, ispin, iu, iul 83 integer :: j, jc, jl, last, lasta, lastop 84 integer :: maxloc, maxloc2, nc, nlocal, nphiloc 85 integer :: nvmaxl, triang, lenx, leny, lenz,lenxy 86 87 ! Size of Hamiltonian 88 logical :: ParallelLocal 89 real(dp) :: Vij, r2sp, dxsp(3), VClocal(nsp) 90 91 integer, pointer :: ilc(:), ilocal(:), iorb(:) 92 real(dp), pointer :: DscfL(:,:), t_DscfL(:,:,:) 93 real(dp), pointer :: Vss(:,:), t_Vss(:,:,:), Clocal(:,:) 94 real(dp), pointer :: Vlocal(:,:), phia(:,:), r2cut(:) 95 integer :: NTH, TID 96#ifdef _TRACE_ 97 integer :: MPIerror 98#endif 99 100#ifdef DEBUG 101 call write_debug( ' PRE vmat' ) 102#endif 103 104#ifdef _TRACE_ 105 call MPI_Barrier( MPI_Comm_World, MPIerror ) 106 call MPItrace_event( 1000, 4 ) 107#endif 108 109! Start time counter 110 call timer('vmat',1) 111 112! Find atomic cutoff radii 113 nullify(r2cut) 114 call re_alloc( r2cut, 1, nsmax, 'r2cut', 'vmat' ) 115 r2cut = 0.0_dp 116 do i = 1,nuotot 117 ia = iaorb(i) 118 is = isa(ia) 119 io = iphorb(i) 120 r2cut(is) = max( r2cut(is), rcut(is,io)**2 ) 121 end do 122 123! Set algorithm logical 124 ParallelLocal = (Nodes > 1) 125 lenx = meshLim(2,1) - meshLim(1,1) + 1 126 leny = meshLim(2,2) - meshLim(1,2) + 1 127 lenz = meshLim(2,3) - meshLim(1,3) + 1 128 lenxy = lenx*leny 129 130! Find value of maxloc 131 maxloc2 = maxval(endpht(1:np)-endpht(0:np-1)) 132 maxloc = maxloc2 + minloc 133 maxloc = min( maxloc, no ) 134 triang = (maxloc+1)*(maxloc+2)/2 135 if ( ParallelLocal ) then 136 if ( nrowsDscfL > 0 ) then 137 nvmaxl = listdlptr(nrowsDscfL) + numdl(nrowsDscfL) 138 else 139 nvmaxl = 1 140 end if 141 end if 142 143! Allocate local memory 144!$OMP parallel default(shared), & 145!$OMP&shared(NTH,t_DscfL,t_Vss,spin), & 146!$OMP&private(TID,last), & 147!$OMP&private(ip,nc,nlocal,ic,imp,i,il,iu,iul,ii,ind,j,ijl,ispin), & 148!$OMP&private(lasta,lastop,ia,is,iop,isp,dxsp,r2sp,nphiloc,iphi,jc,jl), & 149!$OMP&private(Vij,VClocal,DscfL,Vss,ilocal,ilc,iorb,Vlocal,Clocal,phia) 150 151!$OMP single 152#ifdef _OPENMP 153 NTH = omp_get_num_threads( ) 154#else 155 NTH = 1 156#endif 157!$OMP end single ! implicit barrier, IMPORTANT 158 159#ifdef _OPENMP 160 TID = omp_get_thread_num( ) + 1 161#else 162 TID = 1 163#endif 164 165 nullify(Clocal,phia,ilocal,ilc,iorb,Vlocal) 166!$OMP critical 167 ! Perhaps the critical section is not needed, 168 ! however it "tells" the OS to allocate per 169 ! thread, possibly waiting for each thread to 170 ! place the memory in the best position. 171 allocate( Clocal(nsp,maxloc2) ) 172 allocate( ilocal(no) , ilc(maxloc2) , iorb(maxloc) ) 173 allocate( Vlocal(triang,spin%Grid) ) 174 if ( DirectPhi ) allocate( phia(maxoa,nsp) ) 175!$OMP end critical 176 177!$OMP single 178 if ( ParallelLocal ) then 179 nullify( t_DscfL ) 180 call re_alloc( t_DscfL, 1, nvmaxl, 1, spin%H, 1, NTH, & 181 'DscfL', 'vmat' ) 182 else 183 if ( NTH > 1 ) then 184 nullify( t_Vss ) 185 call re_alloc( t_Vss, 1, nvmax, 1, spin%H, 2, NTH, & 186 'Vss', 'vmat' ) 187 end if 188 end if 189!$OMP end single ! implicit barrier 190 191 if ( ParallelLocal ) then 192 DscfL => t_DscfL(1:nvmaxl,:,TID) 193 DscfL(1:nvmaxl,:) = 0._dp 194 else 195 if ( NTH > 1 ) then 196 if ( TID == 1 ) then 197 Vss => Vs 198 else 199 Vss => t_Vss(1:nvmax,:,TID) 200 Vss(1:nvmax,:) = 0._dp 201 end if 202 else 203 Vss => Vs 204 end if 205 end if 206 207! Full initializations done only once 208 ilocal(1:no) = 0 209 iorb(1:maxloc) = 0 210 Vlocal(1:triang,:) = 0._dp 211 last = 0 212 213! Loop over grid points 214!$OMP do 215 do ip = 1,np 216! Find number of nonzero orbitals at this point 217 nc = endpht(ip) - endpht(ip-1) 218! Find new required size of Vlocal 219 nlocal = last 220 do ic = 1,nc 221 imp = endpht(ip-1) + ic 222 i = lstpht(imp) 223 if (ilocal(i) == 0) nlocal = nlocal + 1 224 end do 225 226! If overflooded, add Vlocal to Vs and reinitialize it 227 if (nlocal > maxloc .and. last > 0 ) then 228 if ( ParallelLocal ) then 229 do il = 1,last 230 i = iorb(il) 231 iu = indxuo(i) 232 iul = NeedDscfL(iu) 233 if ( i == iu ) then 234 do ii = 1, numdl(iul) 235 ind = listdlptr(iul) + ii 236 j = listdl(ind) 237 ijl = idx_ijl(il,ilocal(j)) 238 do ispin = 1, spin%Grid 239 DscfL(ind,ispin) = DscfL(ind,ispin) + Vlocal(ijl,ispin) * dVol 240 end do 241 if ( spin%SO ) then 242 DscfL(ind,7:8) = DscfL(ind,7:8) + & 243 Vlocal(ijl,3:4) * dVol 244 end if 245 end do 246 else 247 do ii = 1, numdl(iul) 248 ind = listdlptr(iul) + ii 249 j = LISTSC( i, iu, listdl(ind) ) 250 ijl = idx_ijl(il,ilocal(j)) 251 do ispin = 1, spin%Grid 252 DscfL(ind,ispin) = DscfL(ind,ispin) + Vlocal(ijl,ispin) * dVol 253 end do 254 if ( spin%SO ) then 255 DscfL(ind,7:8) = DscfL(ind,7:8) + & 256 Vlocal(ijl,3:4) * dVol 257 end if 258 end do 259 end if 260 end do 261 else 262 do il = 1,last 263 i = iorb(il) 264 iu = indxuo(i) 265 call GlobalToLocalOrb( iu, Node, Nodes, iul ) 266 if ( i == iu ) then 267 do ii = 1, numVs(iul) 268 ind = listVsptr(iul) + ii 269 j = listVs(ind) 270 ijl = idx_ijl(il,ilocal(j)) 271 do ispin = 1, spin%Grid 272 Vss(ind,ispin) = Vss(ind,ispin) + Vlocal(ijl,ispin) * dVol 273 end do 274 if ( spin%SO ) then 275 Vss(ind,7:8) = Vss(ind,7:8) + Vlocal(ijl,3:4) * dVol 276 end if 277 end do 278 else 279 do ii = 1, numVs(iul) 280 ind = listVsptr(iul) + ii 281 j = LISTSC( i, iu, listVs(ind) ) 282 ijl = idx_ijl(il,ilocal(j)) 283 do ispin = 1, spin%Grid 284 Vss(ind,ispin) = Vss(ind,ispin) + Vlocal(ijl,ispin) * dVol 285 end do 286 if ( spin%SO ) then 287 Vss(ind,7:8) = Vss(ind,7:8) + Vlocal(ijl,3:4) * dVol 288 end if 289 end do 290 end if 291 end do 292 end if 293! Reset local arrays 294 do i = 1, last 295 ilocal(iorb(i)) = 0 296 end do 297 iorb(1:last) = 0 298 ijl = (last+1)*(last+2)/2 299 do ispin = 1 , spin%Grid 300 do i = 1 , ijl 301 Vlocal(i,ispin) = 0._dp 302 end do 303 end do 304 last = 0 305 end if 306 307! Look for required orbitals not yet in Vlocal 308 if ( nlocal > last ) then 309 do ic = 1,nc 310 imp = endpht(ip-1) + ic 311 i = lstpht(imp) 312 if ( ilocal(i) == 0 ) then 313 last = last + 1 314 ilocal(i) = last 315 iorb(last) = i 316 end if 317 end do 318 end if 319 320! Check algorithm 321 if ( DirectPhi ) then 322 lasta = 0 323 lastop = 0 324 do ic = 1 , nc 325 imp = endpht(ip-1) + ic 326 i = lstpht(imp) 327 il = ilocal(i) 328 ia = iaorb(i) 329 iop = listp2(imp) 330 ilc(ic) = il 331 332! Generate or retrieve phi values 333 if ( ia /= lasta .or. iop /= lastop ) then 334 lasta = ia 335 lastop = iop 336 is = isa(ia) 337 do isp = 1,nsp 338 dxsp(:) = xdsp(:,isp) + xdop(:,iop) - dxa(:,ia) 339 r2sp = sum(dxsp**2) 340 if ( r2sp < r2cut(is) ) then 341!$OMP critical 342 call all_phi( is, +1, dxsp, maxoa, nphiloc, phia(:,isp) ) 343!$OMP end critical 344 else 345 phia(:,isp) = 0.0_dp 346 end if 347 end do 348 end if 349 iphi = iphorb(i) 350 351 Clocal(:,ic) = phia(iphi,:) 352 353 do ispin = 1 , spin%Grid 354 355! Create VClocal for the first orbital of mesh point 356 Vij = 0._dp 357 do isp = 1,nsp 358 VClocal(isp) = V(isp,ip,ispin) * Clocal(isp,ic) 359! This is the jc == ic value 360 Vij = Vij + VClocal(isp) * Clocal(isp,ic) 361 end do 362 363! ic == jc, hence we simply do 364 ijl = idx_ijl(il,ilc(ic)) 365 Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij 366 367! Loop on second orbital of mesh point 368 do jc = 1 , ic - 1 369 jl = ilc(jc) 370 371! Calculate ic * jc 372 Vij = 0._dp 373 do isp = 1,nsp 374 Vij = Vij + VClocal(isp) * Clocal(isp,jc) 375 end do 376 377 ijl = idx_ijl(il,jl) 378 if ( il == jl ) then 379 Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij * 2._dp 380 else 381 Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij 382 end if 383 384 end do 385 end do 386 387 end do 388 389 else 390 391 do ic = 1 , nc 392 imp = endpht(ip-1) + ic 393 il = ilocal(lstpht(imp)) 394 ilc(ic) = il 395 396 Clocal(:,ic) = phi(:,imp) 397 398 do ispin = 1 , spin%Grid 399 400! Create VClocal for the first orbital of mesh point 401 Vij = 0._dp 402 do isp = 1 , nsp 403 VClocal(isp) = V(isp,ip,ispin) * Clocal(isp,ic) 404! This is the jc == ic value 405 Vij = Vij + VClocal(isp) * Clocal(isp,ic) 406 end do 407 408! ic == jc, hence we simply do 409 ijl = idx_ijl(il,ilc(ic)) 410 Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij 411 412! Loop on second orbital of mesh point 413 do jc = 1 , ic - 1 414 jl = ilc(jc) 415 416! Calculate ic * jc 417 Vij = 0._dp 418 do isp = 1 , nsp 419 Vij = Vij + VClocal(isp) * Clocal(isp,jc) 420 end do 421 422 ijl = idx_ijl(il,jl) 423 if ( il == jl ) then 424 Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij * 2._dp 425 else 426 Vlocal(ijl,ispin) = Vlocal(ijl,ispin) + Vij 427 end if 428 429 end do 430 end do 431 432 end do 433 434 end if 435 436 end do 437!$OMP end do nowait 438 439!$OMP barrier 440 441! Note that this is already performed in parallel! 442! Add final Vlocal to Vs 443 if ( ParallelLocal .and. last > 0 ) then 444 do il = 1 , last 445 i = iorb(il) 446 iu = indxuo(i) 447 iul = NeedDscfL(iu) 448 if ( i == iu ) then 449 do ii = 1, numdl(iul) 450 ind = listdlptr(iul) + ii 451 j = listdl(ind) 452 ijl = idx_ijl(il,ilocal(j)) 453 do ispin = 1, spin%Grid 454 DscfL(ind,ispin) = DscfL(ind,ispin) + Vlocal(ijl,ispin) * dVol 455 end do 456 if ( spin%SO ) then 457 DscfL(ind,7:8) = DscfL(ind,7:8) + Vlocal(ijl,3:4) * dVol 458 end if 459 end do 460 else 461 do ii = 1, numdl(iul) 462 ind = listdlptr(iul) + ii 463 j = LISTSC( i, iu, listdl(ind) ) 464 ijl = idx_ijl(il,ilocal(j)) 465 do ispin = 1, spin%Grid 466 DscfL(ind,ispin) = DscfL(ind,ispin) + Vlocal(ijl,ispin) * dVol 467 end do 468 if ( spin%SO ) then 469 DscfL(ind,7:8) = DscfL(ind,7:8) + Vlocal(ijl,3:4) * dVol 470 end if 471 end do 472 end if 473 end do 474 else if ( last > 0 ) then 475 do il = 1 , last 476 i = iorb(il) 477 iu = indxuo(i) 478 if ( i == iu ) then 479 do ii = 1, numVs(iu) 480 ind = listVsptr(iu)+ii 481 j = listVs(ind) 482 ijl = idx_ijl(il,ilocal(j)) 483 do ispin = 1, spin%Grid 484 Vss(ind,ispin) = Vss(ind,ispin) + Vlocal(ijl,ispin) * dVol 485 end do 486 if ( spin%SO ) then 487 Vss(ind,7:8) = Vss(ind,7:8) + Vlocal(ijl,3:4) * dVol 488 end if 489 end do 490 else 491 do ii = 1, numVs(iu) 492 ind = listVsptr(iu)+ii 493 j = LISTSC( i, iu, listVs(ind) ) 494 ijl = idx_ijl(il,ilocal(j)) 495 do ispin = 1, spin%Grid 496 Vss(ind,ispin) = Vss(ind,ispin) + Vlocal(ijl,ispin) * dVol 497 end do 498 if ( spin%SO ) then 499 Vss(ind,7:8) = Vss(ind,7:8) + Vlocal(ijl,3:4) * dVol 500 end if 501 end do 502 end if 503 end do 504 end if 505 506!$OMP barrier 507 508 if ( ParallelLocal .and. NTH > 1 ) then 509 do ispin = 1 , spin%H 510!$OMP do 511 do ind = 1, nvmaxl 512 do ii = 2, NTH 513 t_DscfL(ind,ispin,1) = t_DscfL(ind,ispin,1) + & 514 t_DscfL(ind,ispin,ii) 515 end do 516 end do 517!$OMP end do nowait 518 end do 519!$OMP barrier 520 else if ( NTH > 1 ) then 521 do ispin = 1 , spin%H 522!$OMP do 523 do ind = 1, nvmax 524 do ii = 2, NTH 525 Vs(ind,ispin) = Vs(ind,ispin) + t_Vss(ind,ispin,ii) 526 end do 527 end do 528!$OMP end do nowait 529 end do 530!$OMP barrier 531 end if 532 533! Free memory 534 deallocate(Clocal,ilocal,ilc,iorb,Vlocal) 535 if ( DirectPhi ) deallocate( phia ) 536 537!$OMP master 538 if ( ParallelLocal ) then 539! Redistribute Hamiltonian from mesh to orbital based distribution 540 DscfL => t_DscfL(1:nvmaxl,1:spin%H,1) 541 call matrixMtoO( nvmaxl, nvmax, numVs, listVsptr, nuo, & 542 spin%H, DscfL, Vs ) 543 call de_alloc( t_DscfL, 'DscfL', 'vmat' ) 544 else if ( NTH > 1 ) then 545 call de_alloc( t_Vss, 'Vss', 'vmat' ) 546 end if 547!$OMP end master 548 549!$OMP end parallel 550 551 call de_alloc( r2cut, 'r2cut', 'vmat' ) 552 553#ifdef _TRACE_ 554 call MPI_Barrier( MPI_Comm_World, MPIerror ) 555 call MPItrace_event( 1000, 0 ) 556#endif 557 558 call timer('vmat',2) 559 560#ifdef DEBUG 561 call write_debug( ' POS vmat' ) 562#endif 563 564 contains 565 566 ! In any case will the compiler most likely inline this 567 ! small routine. So it should not pose any problem. 568 pure function idx_ijl(i,j) result(ij) 569 integer, intent(in) :: i,j 570 integer :: ij 571 if ( i > j ) then 572 ij = i * (i + 1)/2 + j + 1 573 else 574 ij = j * (j + 1)/2 + i + 1 575 end if 576 end function idx_ijl 577 578 end subroutine vmat 579 580end module m_vmat 581