1!
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt.
6! See Docs/Contributors.txt for a list of contributors.
7!
8      module m_nlefsm
9
10      implicit none
11
12      public :: nlefsm
13
14      private
15
16      CONTAINS
17
18      subroutine nlefsm( scell, nua, na, isa, xa, indxua,
19     .                   maxnh, maxnd, lasto, lastkb, iphorb,
20     .                   iphKB, numd, listdptr, listd, numh,
21     .                   listhptr, listh, nspin, Dscf, Enl,
22     .                   fa, stress, H , matrix_elements_only)
23C *********************************************************************
24C Calculates non-local (NL) pseudopotential contribution to total
25C energy, atomic forces, stress and hamiltonian matrix elements.
26C Energies in Ry. Lengths in Bohr.
27C Writen by J.Soler and P.Ordejon, June 1997.
28C **************************** INPUT **********************************
29C real*8  scell(3,3)       : Supercell vectors SCELL(IXYZ,IVECT)
30C integer nua              : Number of atoms in unit cell
31C integer na               : Number of atoms in supercell
32C integer isa(na)          : Species index of each atom
33C real*8  xa(3,na)         : Atomic positions in cartesian coordinates
34C integer indxua(na)       : Index of equivalent atom in unit cell
35C integer maxnh            : First dimension of H and listh
36C integer maxnd            : Maximum number of elements of the
37C                            density matrix
38C integer lasto(0:na)      : Position of last orbital of each atom
39C integer lastkb(0:na)     : Position of last KB projector of each atom
40C integer iphorb(no)       : Orbital index of each orbital in its atom,
41C                            where no=lasto(na)
42C integer iphKB(nokb)      : Index of each KB projector in its atom,
43C                            where nokb=lastkb(na)
44C integer numd(nuo)        : Number of nonzero elements of each row of the
45C                            density matrix
46C integer listdptr(nuo)    : Pointer to the start of each row (-1) of the
47C                            density matrix
48C integer listd(maxnd)     : Nonzero hamiltonian-matrix element column
49C                            indexes for each matrix row
50C integer numh(nuo)        : Number of nonzero elements of each row of the
51C                            hamiltonian matrix
52C integer listhptr(nuo)    : Pointer to the start of each row (-1) of the
53C                            hamiltonian matrix
54C integer listh(maxnh)     : Nonzero hamiltonian-matrix element column
55C                            indexes for each matrix row
56C integer nspin            : Number of spin components of Dscf and H
57C                            If computing only matrix elements, it
58C                            can be set to 1.
59C logical matrix_elements_only:
60C integer Dscf(maxnd,nspin): Density matrix. Not touched if computing
61C                            only matrix elements.
62C ******************* INPUT and OUTPUT *********************************
63C real*8 fa(3,na)          : NL forces (added to input fa)
64C real*8 stress(3,3)       : NL stress (added to input stress)
65C real*8 H(maxnh,nspin)    : NL Hamiltonian (added to input H)
66C **************************** OUTPUT *********************************
67C real*8 Enl               : NL energy
68C *********************************************************************
69C
70C  Modules
71C
72      use precision,     only : dp
73      use parallel,      only : Node, Nodes
74      use parallelsubs,  only : GetNodeOrbs, LocalToGlobalOrb
75      use parallelsubs,  only : GlobalToLocalOrb
76      use atm_types,     only : nspecies
77      use atomlist,      only : in_kb_orb_u_range
78      use atmfuncs,      only : rcut, epskb, orb_gindex, kbproj_gindex
79      use atmfuncs,      only : nofis, nkbfis
80      use chemical,      only : is_floating
81      use neighbour,     only : iana=>jan, r2ki=>r2ij, xki=>xij
82      use neighbour,     only : mneighb, reset_neighbour_arrays
83      use alloc,         only : re_alloc, de_alloc
84      use m_new_matel,   only : new_matel
85
86      integer, intent(in) ::
87     .   maxnh, na, maxnd, nspin, nua
88
89      integer, intent(in)  ::
90     .  indxua(na), iphKB(*), iphorb(*), isa(na),
91     .  lasto(0:na), lastkb(0:na), listd(maxnd), listh(maxnh),
92     .  numd(*), numh(*), listdptr(*), listhptr(*)
93
94      real(dp), intent(in) :: scell(3,3), Dscf(maxnd,nspin),
95     .                        xa(3,na)
96      real(dp), intent(inout) :: fa(3,nua), stress(3,3)
97      real(dp), intent(inout) :: H(maxnh,nspin)
98      real(dp), intent(out)   :: Enl
99      logical, intent(in)     :: matrix_elements_only
100
101      real(dp) ::   volcel
102      external ::   timer, volcel
103
104C Internal variables ................................................
105C maxno  = maximum number of basis orbitals overlapping a KB projector
106! This should be estimated beforehand, to avoid checks,
107! or a "guard" region implemented for a single check at the end
108
109      integer, save ::  maxno = 500
110
111      integer
112     .  ia, ikb, ina, ind, ino,
113     .  io, iio, ioa, is, ispin, ix, ig, kg,
114     .  j, jno, jo, jx, ka, ko, koa, ks, kua,
115     .  nkb, nna, nno, no, nuo, nuotot, maxkba
116      integer :: natoms_k_over, max_nno_used
117
118      integer, dimension(:), pointer :: iano, iono
119
120      real(dp)
121     .  Cijk, epsk, fik, rki, rmax, rmaxkb, rmaxo,
122     .  Sik, Sjk, volume
123
124      real(dp), dimension(:), pointer :: Di, Vi
125      real(dp), dimension(:,:), pointer :: Ski, xno
126      real(dp), dimension(:,:,:), pointer :: grSki
127
128      logical ::   within
129      logical, dimension(:), pointer ::  listed, listedall
130      !
131      real(dp), allocatable :: rorbmax(:), rkbmax(:)
132C ......................
133
134C Start time counter
135      call timer( 'nlefsm', 1 )
136
137C Find unit cell volume
138      volume = volcel( scell ) * nua / na
139
140C Find maximum range and maximum number of KB projectors
141      maxkba = 0
142
143      allocate(rorbmax(nspecies),rkbmax(nspecies))
144      do is = 1, nspecies
145
146         ! Species orbital range
147         rorbmax(is) = 0.0_dp
148         do io = 1, nofis(is)
149            rorbmax(is) = max(rorbmax(is), rcut(is,io))
150         enddo
151
152         ! Species KB range
153         io = nkbfis(is)
154         rkbmax(is) = 0.0_dp
155         do ikb = 1, io
156            rkbmax(is) = max(rkbmax(is), rcut(is,-ikb))
157         enddo
158         maxkba = max(maxkba,io)
159
160      enddo
161      rmaxo = maxval(rorbmax(1:nspecies))
162      rmaxkb = maxval(rkbmax(1:nspecies))
163      ! Calculate max extend
164      rmax = rmaxo + rmaxkb
165
166C Initialize arrays Di and Vi only once
167
168      no = lasto(na)
169      nuotot = lasto(nua)
170      call GetNodeOrbs(nuotot,Node,Nodes,nuo)
171
172C Allocate local memory
173
174      nullify( Vi )
175      call re_alloc( Vi, 1, no, 'Vi', 'nlefsm' )
176      Vi(1:no) = 0.0_dp         ! OK. Later on, any non-zero elements
177                                ! will be zero-ed out explicitly
178      nullify( listed )
179      call re_alloc( listed, 1, no, 'listed', 'nlefsm' )
180      listed(1:no) = .false.
181      nullify( listedall )
182      call re_alloc( listedall, 1, no, 'listedall', 'nlefsm' )
183      listedall(1:no) = .false.
184
185      if (.not. matrix_elements_only) then
186         nullify( Di )
187         call re_alloc( Di, 1, no, 'Di', 'nlefsm' )
188         Di(1:no) = 0.0_dp
189      endif
190
191      Enl = 0.0d0
192
193!     Make list of all orbitals needed for this node
194
195      do io = 1,nuo
196        ! we need this process's orbitals...
197        call LocalToGlobalOrb(io,Node,Nodes,iio)
198        listedall(iio) = .true.
199        ! ... and those with which they interact
200        do j = 1,numh(io)
201          jo = listh(listhptr(io)+j)
202          listedall(jo) = .true.
203        enddo
204      enddo
205
206C Allocate local arrays that depend on saved parameters
207      nullify( iano )
208      call re_alloc( iano, 1, maxno, 'iano', 'nlefsm' )
209      nullify( iono )
210      call re_alloc( iono, 1, maxno, 'iono', 'nlefsm' )
211      nullify( xno )
212      call re_alloc( xno, 1, 3, 1, maxno, 'xno',  'nlefsm' )
213      nullify( Ski )
214      call re_alloc( Ski, 1, maxkba, 1, maxno, 'Ski', 'nlefsm' )
215      nullify( grSki )
216      call re_alloc( grSki, 1, 3, 1, maxkba, 1, maxno, 'grSki',
217     &               'nlefsm' )
218
219C     Initialize neighb subroutine
220      call mneighb( scell, rmax, na, xa, 0, 0, nna )
221
222! Loop on atoms with KB projectors
223! All processes will be doing this loop over atoms.
224! This is one reason for non-scalability
225!
226!        And what happens if there is no supercell?
227!        How do we count out-of-unit-cell interactions?
228!        ... they are automatically accounted for, in
229!        the same way as the Hmu_nu terms themselves.
230!
231      natoms_k_over = 0
232      max_nno_used = 0
233      do ka = 1,na
234!        Only the atoms within the proper
235!        distance of a unit cell orbital (in our process) should
236!        be considered, not the whole supercell.
237!        This array was initialized in hsparse
238
239        if (.not. in_kb_orb_u_range(ka)) CYCLE
240
241        ks = isa(ka)
242        ! Cycle also if ghost-orbital species...
243        if (is_floating(ks)) CYCLE
244
245        kua = indxua(ka)  ! Used only if forces and energies are comp.
246
247C       Find neighbour atoms
248        call mneighb( scell, rmax, na, xa, ka, 0, nna )
249
250        nno = 0
251        do ina = 1,nna
252          rki = sqrt(r2ki(ina))
253          ia = iana(ina)
254          is = isa(ia)
255          !     Early exit if too far
256          !     This duplicates the test in hsparse...
257          if (rki - rkbmax(ks) - rorbmax(is) > 0.d0) CYCLE
258
259          ! Loop over orbitals close enough to overlap
260          do io = lasto(ia-1)+1,lasto(ia)
261
262C           Only calculate if needed locally in our MPI process
263             if (.not. listedall(io)) CYCLE
264
265             ioa = iphorb(io)
266             ! rki_minus_rc_orb= rki - rcut(is,ioa)
267
268             ! Find if orbital is within range
269             ! This can be done with rkbmax(ks):
270             within = (rki-rkbmax(ks)) < rcut(is,ioa)
271             if (.not. within) CYCLE
272
273!             Find overlap between neighbour orbitals and KB projectors
274
275             if (nno.eq.maxno) call increase_maxno()
276
277              nno = nno + 1  ! Update number of overlaps to keep
278              iono(nno) = io
279              iano(nno) = ia
280              do ix = 1,3
281                 xno(ix,nno) = xki(ix,ina)
282              enddo
283
284! For each overlap family we keep the individual
285! KB-orb matrix elements
286! This will store some zeros sometimes, as some
287! of the KBs might not actually overlap
288! We could re-check the distances...
289! Not worth it, as then we would have different
290! numbers of matrix elements for different orbitals, and
291! the bookeeping would get messy
292
293
294              ikb = 0
295              ig = orb_gindex(is,ioa)
296              do ko = lastkb(ka-1)+1,lastkb(ka)
297                 koa = iphKB(ko)
298                 ! if ( rki_minus_rc_orb > rcut(ks,koa) CYCLE
299                 ikb = ikb + 1
300                 ! epsk_sqrt = sqrt(epskb(ks,koa))
301                 kg = kbproj_gindex(ks,koa)
302                 call new_MATEL( 'S', kg, ig, xki(1:3,ina),
303     &                Ski(ikb,nno), grSki(1:3,ikb,nno) )
304                 !  Maybe: Ski = epskb_sqrt * Ski
305                 !         grSki = epskb_sqrt * grSki
306              enddo
307
308           enddo ! loop over orbitals
309
310        enddo ! loop over neighbor atoms
311
312!     Now we check which of the overlaps of our atom's KB's involve
313!     two orbitals: one in the unit cell, and handled by our process,
314!     and the other unrestricted
315
316        max_nno_used = max(max_nno_used, nno)
317        do ino = 1,nno    ! loop over overlaps
318          ia = iano(ino)
319          if (ia > nua) CYCLE  ! We want the 1st orb to be in the unit cell
320
321          io = iono(ino)
322          ! Note that if ia is in the unit cell, io is <= nuo,
323          ! so that this call makes sense
324          call GlobalToLocalOrb(io,Node,Nodes,iio)
325          if (iio == 0) CYCLE
326
327          !  Scatter filter of desired matrix elements
328          do j = 1,numh(iio)
329             ind = listhptr(iio)+j
330             jo = listh(ind)
331             listed(jo) = .true.
332             if (.not. matrix_elements_only) then
333                do ispin = 1,nspin ! Both spins add up...
334                   Di(jo) = Di(jo) + Dscf(ind,ispin)
335                enddo
336             endif
337          enddo
338
339! Find matrix elements with other neighbour orbitals
340! Note that several overlaps might contribute to the
341! same matrix element, hence the additions above (Dscf) and below (H)
342
343          do jno = 1,nno
344             jo = iono(jno)
345             ! Check whether there is H_io_jo...
346             if (.not. listed(jo)) CYCLE
347
348! Loop on KB projectors again. Note that ikb and ko run
349! in step. ko is only needed for the Epskb factor.
350! maybe we can store it with the value of the projector.
351
352             ikb = 0
353             do ko = lastkb(ka-1)+1,lastkb(ka)
354                ikb = ikb + 1
355                koa = iphKB(ko)
356                epsk = epskb(ks,koa)
357                Sik = Ski(ikb,ino)
358                Sjk = Ski(ikb,jno)
359                Vi(jo) = Vi(jo) + epsk * Sik * Sjk
360                ! We should distinguish "energy-only" and
361                ! "forces-and-stress"
362                if (.not. matrix_elements_only) then
363                   Cijk = Di(jo) * epsk
364                   Enl = Enl + Cijk * Sik * Sjk
365                   do ix = 1,3
366                      fik = 2.d0 * Cijk * Sjk * grSki(ix,ikb,ino)
367                      fa(ix,ia)  = fa(ix,ia)  - fik
368                      fa(ix,kua) = fa(ix,kua) + fik
369                      do jx = 1,3
370                         stress(jx,ix) = stress(jx,ix) +
371     &                        xno(jx,ino) * fik / volume
372                      enddo
373                   enddo
374                endif
375
376             enddo
377
378          enddo ! loop over second orbitals
379
380C         Pick up contributions to H and restore Di and Vi
381          do j = 1,numh(iio)
382             ind = listhptr(iio)+j
383             jo = listh(ind)
384             do ispin = 1,nspin
385                H(ind,ispin) = H(ind,ispin) + Vi(jo)
386             enddo
387             Vi(jo) = 0.0d0     ! See initial zero-out at top
388             listed(jo) = .false.
389             if (.not. matrix_elements_only) Di(jo) = 0.0d0
390          enddo
391
392       enddo  ! loop over 1st orbitals
393       natoms_k_over = natoms_k_over + 1
394      enddo   ! loop over atoms holding KB projectors
395
396      if (Node == 0) then
397         ! For future diagnostics
398         ! Currently only the root process outputs info
399         write(6,"(a,2i8)")
400     $     "No. of atoms with KB's overlaping orbs in proc 0." //
401     $     " Max # of overlaps:", natoms_k_over, max_nno_used
402      endif
403
404C     Deallocate local memory
405!      call new_MATEL( 'S', 0, 0, 0, 0, xki, Ski, grSki )
406      call reset_neighbour_arrays( )
407      call de_alloc( grSki, 'grSki', 'nlefsm' )
408      call de_alloc( Ski, 'Ski', 'nlefsm' )
409      call de_alloc( xno, 'xno', 'nlefsm' )
410      call de_alloc( iono, 'iono', 'nlefsm' )
411      call de_alloc( iano, 'iano', 'nlefsm' )
412      call de_alloc( listedall, 'listedall', 'nlefsm' )
413      call de_alloc( listed, 'listed', 'nlefsm' )
414      call de_alloc( Vi, 'Vi', 'nlefsm' )
415      if (.not. matrix_elements_only) then
416         call de_alloc( Di, 'Di', 'nlefsm' )
417      endif
418
419      deallocate(rkbmax,rorbmax)
420
421      call timer( 'nlefsm', 2 )
422
423      CONTAINS
424
425      subroutine increase_maxno()
426
427! if too small then increase array sizes
428      maxno = maxno + 10
429      call re_alloc( iano, 1, maxno, 'iano', 'nlefsm',
430     &        .true. )
431      call re_alloc( iono, 1, maxno, 'iono', 'nlefsm',
432     &        .true. )
433      call re_alloc( xno, 1, 3, 1, maxno, 'xno',  'nlefsm',
434     &        .true. )
435      call re_alloc( Ski, 1, maxkba, 1, maxno, 'Ski',
436     &        'nlefsm', .true. )
437      call re_alloc( grSki, 1, 3, 1, maxkba, 1, maxno,
438     &        'grSki', 'nlefsm', .true. )
439
440      end subroutine increase_maxno
441
442      end subroutine nlefsm
443
444      end module m_nlefsm
445