1!
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt.
6! See Docs/Contributors.txt for a list of contributors.
7!
8module m_vmatsp
9
10  implicit none
11
12  private
13  public :: vmatsp
14
15contains
16
17  subroutine vmatsp( no, nml, nmpl, dvol, nspin, V, nvmax, &
18      numVs, listVsptr, listVs, Vs, &
19      nuo, nuotot, iaorb, iphorb, isa, q )
20
21! ********************************************************************
22! Finds the matrix elements of the potential.
23! First version written by P.Ordejon.
24! Name and interface modified by J.M.Soler. May'95.
25! Re-ordered so that mesh is the outer loop and the orbitals are
26! handled as lower-half triangular. J.D.Gale and J.M.Soler, Feb'99
27! Version of vmat that use a direct algorithm to save memory.
28! Modified by J.D.Gale, November'99
29! Spiral version written by V. M. Garcia-Suarez. June 2002.
30! Fixed for parallel runs by Nick Papior, June 2020.
31! *********************** INPUT **************************************
32! integer no              : Number of basis orbitals
33! integer nml             : Local numer of mesh divisions in each cell vector
34! integer nmpl            : Local numer of mesh divisions
35! real*8  dvol            : Volume per mesh point
36! integer nspin           : Number of spin components
37! real*4  V(nsp,nmpl,nspin): Value of the potential at the mesh points
38! integer nvmax           : First dimension of listV and Vs, and maxim.
39!                           number of nonzero elements in any row of Vs
40! integer numVs(nuo)      : Number of non-zero elements in a row of Vs
41! integer listVsptr(nuo)  : Pointer to the start of rows in listVs
42! integer listVs(nvmax)   : List of non-zero elements of Vs
43! integer iaorb(*)        : Pointer to atom to which orbital belongs
44! integer iphorb(*)       : Orbital index within each atom
45! integer isa(*)          : Species index of all atoms
46! real*8 q(3)             : Wave vector for spiral configuration.
47! ******************** INPUT AND OUTPUT *******************************
48! real*8  Vs(nvmax,nspin) : Value of nonzero elements in each row
49!                           of Vs to which the potential matrix
50!                           elements are summed up
51! *********************************************************************
52
53!  Modules
54    use precision, only: dp, grid_p
55    use parallel, only: Nodes
56    use atmfuncs,  only: rcut, all_phi
57    use atm_types, only: nsmax=>nspecies
58    use atomlist,  only: indxuo
59    use listsc_module, only: listsc
60    use mesh, only: dxa, nsp, xdop, xdsp, cmesh, idop, nsm, meshLim
61    use meshdscf
62    use meshphi
63    use alloc, only: re_alloc, de_alloc
64
65! Argument types and dimensions
66    integer :: no, nml(3), nmpl, nvmax, nuo, nuotot, iaorb(*), nspin, &
67        iphorb(*), isa(*), numVs(nuo), listVsptr(nuo), listVs(nvmax)
68    real(grid_p), intent(in) :: V(nsp,nmpl,nspin)
69    real(dp) :: dvol, Vs(nvmax,nspin), q(3)
70
71! Internal variables and arrays
72    integer, parameter :: minloc = 1000 ! Min buffer size for local copy of Dscf
73    integer, parameter :: maxoa  = 100  ! Max # of orbitals per atom
74    integer, parameter :: nsd = 2  ! spin-box size
75    integer :: &
76        i, ia, ic, ii, ijl, il, imp, ind, iop, ip, iphi, io, &
77        is, isp, ispin, iu, iul, ix, j, jc, jl, triang, &
78        last, lasta, lastop, maxloc, maxloc2, nc, nlocal,  &
79        nphiloc, nvmaxl, iii(3)
80
81    logical :: ParallelLocal
82    real(dp) :: Vij(4), dxsp(3), &
83        r2cut(nsmax), r2sp, xr(3), Rdi, qRdi, cqRdi, sqRdi
84
85    integer, pointer :: ilc(:) => null()
86    integer, pointer :: ilocal(:) => null()
87    integer, pointer :: iorb(:) => null()
88    real(dp), pointer :: phia(:,:) => null()
89    real(dp), pointer :: VClocal(:,:) => null()
90    real(dp), pointer :: Clocal(:,:) => null()
91    real(dp), pointer :: Vlocal(:,:) => null()
92    real(dp), pointer :: VlocalSp(:,:,:) => null()
93
94    ! Start time counter
95    call timer('vmatsp',1)
96
97    ! Find atomic cutoff radiae
98    r2cut(:) = 0.0_dp
99    do i = 1,nuotot
100      ia = iaorb(i)
101      is = isa(ia)
102      io = iphorb(i)
103      r2cut(is) = max( r2cut(is), rcut(is,io)**2 )
104    end do
105
106    ! Set algorithm logical
107    ParallelLocal = (Nodes > 1)
108
109    ! Find value of maxloc
110    maxloc2 = maxval(endpht(1:nmpl)-endpht(0:nmpl-1))
111    maxloc = maxloc2 + minloc
112    maxloc = min( maxloc, no )
113    triang  = (maxloc+1)*(maxloc+2)/2
114    if ( ParallelLocal ) then
115      if ( nrowsDscfL > 0 ) then
116        nvmaxl = listdlptr(nrowsDscfL) + numdl(nrowsDscfL)
117      else
118        nvmaxl = 1
119      end if
120    end if
121
122    ! Allocate local memory
123    nullify(ilocal, ilc, iorb, phia, Vlocal, VlocalSp, Clocal, VClocal, DscfL)
124
125    call re_alloc( ilocal, 1, no, 'ilocal', 'vmatsp' )
126    call re_alloc( ilc, 1, maxloc2, 'ilc', 'vmatsp' )
127    call re_alloc( iorb, 0, maxloc, 'iorb', 'vmatsp' )
128
129    call re_alloc( Vlocal, 1, nsd, 1, triang, 'Vlocal', 'vmatsp' )
130    call re_alloc( VlocalSp, 1, nsd, 0, maxloc, 0, maxloc, 'VlocalSp', 'vmatsp' )
131    call re_alloc( Clocal, 1, nsp, 1, maxloc2, 'Clocal', 'vmatsp' )
132    call re_alloc( VClocal, 1, nspin, 1, nsp, 'VClocal', 'vmatsp' )
133    if ( DirectPhi ) then
134      call re_alloc(phia, 1, maxoa, 1, nsp, "phia", "vmatsp")
135    end if
136    if ( ParallelLocal ) then
137      call re_alloc( DscfL, 1, nvmaxl, 1, nspin, 'DscfL', 'vmatsp' )
138      DscfL(1:nvmaxl,1:nspin) = 0.0_dp
139    end if
140
141    ! Full initializations done only once
142    ilocal(1:no) = 0
143    iorb(0:maxloc) = 0
144    Vlocal(:,:) = 0.0_dp
145    VlocalSp(:,:,:) = 0.0_dp
146    last = 0
147
148    ! Loop over (big) grid points
149    do ip = 1 , nmpl
150
151      ! Find point coordinates
152      call ipack(-1,3,nml,iii,ip)
153      ! Correct for processor offset
154      iii(:) = iii(:) + meshLim(1,:) - 1
155      do ix = 1,3
156        xr(ix) = iii(1)*cmesh(ix,1) + iii(2)*cmesh(ix,2) + &
157            iii(3)*cmesh(ix,3)
158      end do
159
160      ! Find number of nonzero orbitals at this point
161      nc = endpht(ip) - endpht(ip-1)
162
163      ! Find new required size of Vlocal
164      nlocal = last
165      do ic = 1,nc
166        imp = endpht(ip-1) + ic
167        i = lstpht(imp)
168        if ( ilocal(i) == 0 ) nlocal = nlocal + 1
169      end do
170
171      ! If overflooded, add Vlocal to Vs and reinitialize it
172      if ( nlocal > maxloc .and. last > 0 ) then
173        if ( ParallelLocal ) then
174          do il = 1,last
175            i = iorb(il)
176            iu = indxuo(i)
177            iul = NeedDscfL(iu)
178            if ( i == iu ) then
179              do ii = 1, numdl(iul)
180                ind = listdlptr(iul) + ii
181                j = listdl(ind)
182                jl = ilocal(j)
183                ijl = idx_ijl(il,jl)
184                do ispin = 1, nsd
185                  DscfL(ind,ispin) = DscfL(ind,ispin) + dVol * Vlocal(ispin,ijl)
186                  DscfL(ind,ispin+2) = DscfL(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il)
187                end do
188              end do
189            else
190              do ii = 1, numdl(iul)
191                ind = listdlptr(iul) + ii
192                j = LISTSC( i, iu, listdl(ind) )
193                jl = ilocal(j)
194                ijl = idx_ijl(il, jl)
195                do ispin = 1,nsd
196                  DscfL(ind,ispin) = DscfL(ind,ispin) + dVol * Vlocal(ispin,ijl)
197                  DscfL(ind,ispin+2) = DscfL(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il)
198                end do
199              end do
200            end if
201          end do
202        else ! ParallelLocal
203          do il = 1,last
204            i = iorb(il)
205            iu = indxuo(i)
206            !iul = iu ! Nodes == 1
207            if ( i == iu ) then
208              do ii = 1, numVs(iu)
209                ind = listVsptr(iu) + ii
210                j = listVs(ind)
211                jl = ilocal(j)
212                ijl = idx_ijl(il, jl)
213                do ispin = 1, nsd
214                  Vs(ind,ispin) = Vs(ind,ispin) + dVol * Vlocal(ispin,ijl)
215                  Vs(ind,ispin+2) = Vs(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il)
216                end do
217              end do
218            else
219              do ii = 1, numVs(iu)
220                ind = listVsptr(iu) + ii
221                j = LISTSC( i, iu, listVs(ind) )
222                jl = ilocal(j)
223                ijl = idx_ijl(il, jl)
224                do ispin = 1,nsd
225                  Vs(ind,ispin) = Vs(ind,ispin) + dVol * Vlocal(ispin,ijl)
226                  Vs(ind,ispin+2) = Vs(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il)
227                end do
228              end do
229            end if
230          end do
231        end if
232        ! Reset local arrays
233        do i = 1, last
234          ilocal(iorb(i)) = 0
235          iorb(i) = 0
236        end do
237        ijl = (last+1)*(last+2)/2
238        Vlocal(1:nsd,1:ijl) = 0.0_dp
239        VlocalSp(1:nsd,0:last,0:last) = 0.0_dp
240        last = 0
241      endif
242
243      !  Look for required orbitals not yet in Vlocal
244      if (nlocal > last) then
245        do ic = 1, nc
246          imp = endpht(ip-1) + ic
247          i = lstpht(imp)
248          if ( ilocal(i) == 0 ) then
249            last = last + 1
250            ilocal(i) = last
251            iorb(last) = i
252          end if
253        end do
254      end if
255
256      if ( DirectPhi ) then
257        lasta = 0
258        lastop = 0
259
260        !  Generate or retrieve phi values for all orbitals up to nc
261        do ic = 1,nc
262          imp = endpht(ip-1) + ic
263          i = lstpht(imp)
264          il = ilocal(i)
265          ia = iaorb(i)
266          iop = listp2(imp)
267          ilc(ic) = il
268
269          if ( ia /= lasta .or. iop /= lastop ) then
270            lasta = ia
271            lastop = iop
272            is = isa(ia)
273            do isp = 1,nsp
274              do ix = 1,3
275                dxsp(ix) = xdsp(ix,isp) + xdop(ix,iop) - dxa(ix,ia)
276              enddo
277              r2sp = sum(dxsp**2)
278              if ( r2sp < r2cut(is) ) then
279                call all_phi( is, +1, dxsp, maxoa, nphiloc, phia(:,isp) )
280              else
281                phia(:,isp) = 0.0_dp
282              end if
283            end do
284          end if
285          iphi = iphorb(i)
286          do isp = 1,nsp
287            Clocal(isp,ic) = phia(iphi,isp)
288          end do
289
290        end do
291
292      else ! DirectPhi
293        !  Generate or retrieve phi values for all orbitals up to nc
294        do ic = 1,nc
295          imp = endpht(ip-1) + ic
296          il = ilocal(lstpht(imp))
297          ilc(ic) = il
298
299          Clocal(:,ic) = phi(:,imp)
300        end do
301
302      end if
303
304      ! Loop on first orbital of mesh point
305      do ic = 1,nc
306        imp = endpht(ip-1) + ic
307        i = lstpht(imp)
308        il = ilocal(i)
309        iu = indxuo(i)
310        ia = iaorb(i)
311        is = isa(ia)
312        iop = listp2(imp)
313
314        ! Pre-multiply V and Clocal(,ic)
315        do isp = 1,nsp
316          ! Calculate spiral phase
317          qRdi = 0._dp
318          do ix = 1, 3
319            Rdi = xr(ix) + xdsp(ix,isp) - xdop(ix,iop) + dxa(ix,ia)
320            qRdi = qRdi + q(ix) * Rdi
321          end do
322          cqRdi = cos(qRdi)
323          sqRdi = sin(qRdi)
324
325          VClocal(1,isp) = V(isp,ip,1) * Clocal(isp,ic)
326          VClocal(2,isp) = V(isp,ip,2) * Clocal(isp,ic)
327          VClocal(3,isp) = (V(isp,ip,3)*cqRdi - V(isp,ip,4)*sqRdi) * Clocal(isp,ic)
328          VClocal(4,isp) = (V(isp,ip,4)*cqRdi + V(isp,ip,3)*sqRdi) * Clocal(isp,ic)
329        enddo
330
331        ! Loop on second orbital of mesh point (NOT only for jc.le.ic)
332        do jc = 1,nc
333          jl = ilc(jc)
334
335          ! Loop over sub-points
336          Vij(:) = 0.0_dp
337          do isp = 1,nsp
338            Vij(:) = Vij(:) + VClocal(:,isp) * Clocal(isp,jc)
339          end do
340
341          ijl = idx_ijl(il, jl)
342
343          ! Here we need the spiral contribution for jc > ic
344          ! So an explicit check is required
345          if ( jc == ic ) then
346            Vlocal(1,ijl) = Vlocal(1,ijl) + Vij(1)
347            Vlocal(2,ijl) = Vlocal(2,ijl) + Vij(2)
348          else if ( jc < ic ) then
349            if ( il == jl ) then
350              Vlocal(1,ijl) = Vlocal(1,ijl) + 2.0_dp*Vij(1)
351              Vlocal(2,ijl) = Vlocal(2,ijl) + 2.0_dp*Vij(2)
352            else
353              Vlocal(1,ijl) = Vlocal(1,ijl) + Vij(1)
354              Vlocal(2,ijl) = Vlocal(2,ijl) + Vij(2)
355            end if
356          end if
357
358          VlocalSp(1,jl,il) = VlocalSp(1,jl,il) + Vij(3)
359          VlocalSp(2,jl,il) = VlocalSp(2,jl,il) + Vij(4)
360
361        end do
362
363      end do
364    end do
365
366    ! Add final Vlocal to Vs
367    if ( ParallelLocal .and. last > 0 ) then
368
369      do il = 1,last
370        i = iorb(il)
371        iu = indxuo(i)
372        iul = NeedDscfL(iu)
373        if ( i == iu ) then
374          do ii = 1, numdl(iul)
375            ind = listdlptr(iul) + ii
376            j = listdl(ind)
377            jl = ilocal(j)
378            ijl = idx_ijl(il, jl)
379            do ispin = 1, nsd
380              DscfL(ind,ispin) = DscfL(ind,ispin) + dVol * Vlocal(ispin,ijl)
381              DscfL(ind,ispin+2) = DscfL(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il)
382            end do
383          end do
384        else
385          do ii = 1, numdl(iul)
386            ind = listdlptr(iul)+ii
387            j = LISTSC( i, iu, listdl(ind) )
388            jl = ilocal(j)
389            ijl = idx_ijl(il, jl)
390            do ispin = 1, nsd
391              DscfL(ind,ispin) = DscfL(ind,ispin) + dVol * Vlocal(ispin,ijl)
392              DscfL(ind,ispin+2) = DscfL(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il)
393            end do
394          end do
395        end if
396      end do
397
398    else if ( last > 0 ) then ! ParallelLocal
399
400      do il = 1 , last
401        i = iorb(il)
402        iu = indxuo(i)
403        if ( i == iu ) then
404          do ii = 1, numVs(iu)
405            ind = listVsptr(iu) + ii
406            j = listVs(ind)
407            jl = ilocal(j)
408            ijl = idx_ijl(il, jl)
409            do ispin = 1 , nsd
410              Vs(ind,ispin) = Vs(ind,ispin) + dVol * Vlocal(ispin,ijl)
411              Vs(ind,ispin+2) = Vs(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il)
412            end do
413          end do
414        else
415          do ii = 1, numVs(iu)
416            ind = listVsptr(iu) + ii
417            j = LISTSC( i, iu, listVs(ind) )
418            jl = ilocal(j)
419            ijl = idx_ijl(il, jl)
420            do ispin = 1,nsd
421              Vs(ind,ispin) = Vs(ind,ispin) + dVol * Vlocal(ispin,ijl)
422              Vs(ind,ispin+2) = Vs(ind,ispin+2) + dVol * VlocalSp(ispin,jl,il)
423            end do
424          end do
425        end if
426      end do
427    end if
428
429    !  Free local memory
430    call de_alloc( ilocal, 'ilocal', 'vmatsp' )
431    call de_alloc( ilc, 'ilc', 'vmatsp' )
432    call de_alloc( iorb, 'iorb', 'vmatsp' )
433    call de_alloc( Vlocal, 'Vlocal', 'vmatsp' )
434    call de_alloc( VlocalSp, 'VlocalSp', 'vmatsp' )
435    call de_alloc( Clocal, 'Clocal', 'vmatsp' )
436    call de_alloc( VClocal, 'VClocal', 'vmatsp' )
437    if ( DirectPhi ) then
438      call de_alloc( phia, 'phia', 'vmatsp' )
439    end if
440
441    if ( ParallelLocal ) then
442      ! Redistribute Hamiltonian from mesh to orbital based distribution
443      call matrixMtoO( nvmaxl, nvmax, numVs, listVsptr, nuo, &
444          nspin, DscfL, Vs)
445      ! Free memory
446      call de_alloc( DscfL, 'DscfL', 'vmatsp' )
447    endif
448
449    call timer('vmatsp',2)
450
451  contains
452
453    ! In any case will the compiler most likely inline this
454    ! small routine. So it should not pose any problem.
455    pure function idx_ijl(i,j) result(ij)
456      integer, intent(in) :: i,j
457      integer :: ij
458      if ( i > j ) then
459        ij = i * (i + 1)/2 + j + 1
460      else
461        ij = j * (j + 1)/2 + i + 1
462      end if
463    end function idx_ijl
464
465  end subroutine vmatsp
466
467end module m_vmatsp
468