1!
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt.
6! See Docs/Contributors.txt for a list of contributors.
7!
8      module m_gradient
9      public :: gradient
10      CONTAINS
11      subroutine gradient(c,eta,qs,h,s,no_u,no_l,nbands,ncmax,
12     .                    nctmax,nfmax,nftmax,nhmax,nhijmax,
13     .                    numc,listc,numct,listct,cttoc,numf,listf,
14     .                    numft,listft,fttof,numh,listh,
15     .                    listhptr,numhij,listhij,f,fs,grad,ener,
16     .                    no_cl,nspin,Node)
17
18C ************************************************************************
19C Finds the energy and gradient at point C.
20C Uses the functional of Kim et al (PRB 52, 1640 (95))
21C Works only with spin-unpolarized systems
22C Written by P.Ordejon. October'96
23C Last modified: J.M.Soler. 30/04/97
24C ****************************** INPUT ***********************************
25C real*8 c(ncmax,no_u)       : Current point (wave function coeff.
26C                                  in sparse form)
27C real*8 eta(nspin)            : Fermi level parameter of Kim et al.
28C real*8 qs(nspin)             : Total number of electrons
29C real*8 h(nhmax)              : Hamiltonian matrix (sparse)
30C real*8 s(nhmax)              : Overlap matrix (sparse)
31C integer no_u               : Global number of basis orbitals
32C integer no_l            : Local number of basis orbitals
33C integer nbands               : Number of LWF's
34C integer ncmax                : Max num of <>0 elements of each row of C
35C integer nctmax               : Max num of <>0 elements of each col of C
36C integer nfmax                : Max num of <>0 elements of each row of
37C                                   F = Ct x H
38C integer nftmax               : Max num of <>0 elements of each col of F
39C integer nhmax                : Max num of <>0 elements of each row of H
40C integer nhijmax              : Max num of <>0 elements of each row of
41C                                   Hij=Ct x H x C
42C integer numc(no_u)         : Control vector of C matrix
43C                                (number of <>0  elements of each row of C)
44C integer listc(ncmax,no_u)  : Control vector of C matrix
45C                               (list of <>0  elements of each row of C)
46C integer numct(nbands)        : Control vector of C transpose matrix
47C                               (number of <>0  elements of each col of C)
48C integer listct(ncmax,nbands) : Control vector of C transpose matrix
49C                               (list of <>0  elements of each col of C)
50C integer cttoc(ncmax,nbands)  : Map from Ct to C indexing
51C integer numf(nbands)         : Control vector of F matrix
52C                                (number of <>0  elements of each row of F)
53C integer listf(nfmax,nbands)  : Control vector of F matrix
54C                                (list of <>0  elements of each row of F)
55C integer numft(no_u)        : Control vector of F transpose matrix
56C                               (number of <>0  elements of each col of F)
57C integer listft(nfmax,no_u) : Control vector of F transpose matrix
58C                               (list of <>0  elements of each col of F)
59C integer fttof(nfmax,no_u)  : Map from Ft to F indexing
60C integer numh(no_u)         : Control vector of H matrix
61C                                (number of <>0  elements of each row of H)
62C integer listh(nhmax)         : Control vector of H matrix
63C                               (list of <>0  elements of each row of H)
64C integer listhptr(no_u)     : Control vector of H matrix
65C                               (pointer to start of rows in listh/h/s)
66C integer numhij(nbands)       : Control vector of Hij matrix
67C                                (number of <>0  elements of each row of Hij)
68C integer listhij(nhijmax,nbands): Control vector of Hij matrix
69C                                (list of <>0  elements of each row of Hij)
70C real*8 f(nfmax,nbands)       : Auxiliary space
71C real*8 fs(nfmax,nbands)      : Auxiliary space
72C ***************************** OUTPUT ***********************************
73C real*8 ener                  : Energy at point C
74C real*8 grad(ncmax,no_u)    : Gradient of functional (sparse)
75C ************************************************************************
76
77      use precision
78      use on_main,     only : ncG2L, ncT2P, nbL2G, nbG2L, nbandsloc
79      use alloc,       only : re_alloc, de_alloc
80#ifdef MPI
81      use globalise,   only : globalinitB, globalisef, globalisec
82      use globalise,   only : globalloadb2, globalcommb
83      use globalise,   only : globalreloadb2, globalrezerob2
84      use globalise,   only : maxft2, numft2, listft2
85#endif
86      use m_mpi_utils, only : globalize_sum
87
88      implicit none
89
90      integer, intent(in) ::
91     .  no_u,no_l,nbands,ncmax,nctmax,nfmax,nftmax,nhmax,
92     .  nhijmax,no_cl,nspin,Node
93
94      integer, intent(in) ::
95     .  cttoc(nctmax,nbandsloc),fttof(nftmax,no_cl),
96     .  listc(ncmax,no_cl),listct(nctmax,nbandsloc),
97     .  listf(nfmax,nbandsloc),listft(nftmax,no_cl),
98     .  listh(nhmax),listhptr(no_l),listhij(nhijmax,nbandsloc),
99     .  numc(no_cl),numct(nbandsloc),numf(nbandsloc),
100     .  numft(no_cl),numh(no_l),numhij(nbandsloc)
101
102      real(dp), intent(in) ::
103     .  c(ncmax,no_cl,nspin),eta(nspin),qs(nspin),
104     .  h(nhmax,nspin),s(nhmax)
105
106      real(dp), intent(inout) ::
107     .     f(nfmax,nbandsloc),fs(nfmax,nbandsloc)
108
109      real(dp), intent(out) :: grad(ncmax,no_cl,nspin), ener
110
111C Internal variables ......................................................
112
113      integer
114     .  i,ik,il,in,indk,is,j,jk,jl,jn,k,kk,kn,mu,muk
115
116#ifdef MPI
117      real(dp) ::
118     .  ftmp(2), ftmp2(2)
119      real(dp), pointer, save ::
120     .  buxg(:,:),bux1save(:,:),bux2save(:,:), ftG(:,:), fstG(:,:)
121#endif
122      real(dp), pointer, save ::
123     .  aux1(:), aux2(:), bux1(:), bux2(:), ft(:,:), fst(:,:)
124
125      real(dp) ::
126     .  a0,b0,p0,func1,func2,spinfct
127
128C..................
129
130      call timer('gradient',1)
131
132C Allocate local arrays
133      call re_alloc( aux1, 1, no_u, 'aux1', 'gradient' )
134      call re_alloc( aux2, 1, no_u, 'aux2', 'gradient' )
135      call re_alloc( bux1, 1, nbands, 'bux1', 'gradient' )
136      call re_alloc( bux2, 1, nbands, 'bux2', 'gradient' )
137      call re_alloc( ft, 1, nftmax, 1, no_cl, 'ft', 'gradient' )
138      call re_alloc( fst, 1, nftmax, 1, no_cl, 'fst', 'gradient' )
139#ifdef MPI
140      call re_alloc( ftG, 1, MaxFt2, 1, no_cl, 'ftG', 'gradient' )
141      call re_alloc( fstG, 1, MaxFt2, 1, no_cl, 'fstG', 'gradient')
142      call re_alloc( bux1save, 1, nhijmax, 1, nbandsloc, 'bux1save',
143     &               'gradient' )
144      call re_alloc( bux2save, 1, nhijmax, 1, nbandsloc, 'bux2save',
145     &               'gradient' )
146      call re_alloc( buxg, 1, nbands, 1, 2, 'buxg', 'gradient')
147#endif
148
149C Initialize output and auxiliary varialbles ...............................
150
151      if (nspin.eq.1) then
152        spinfct = 2.0d0
153      else
154        spinfct = 1.0d0
155      endif
156
157      ener = 0.0d0
158
159      do i = 1,no_u
160        aux1(i) = 0.0d0
161        aux2(i) = 0.0d0
162      enddo
163
164      do i = 1,nbands
165        bux1(i) = 0.0d0
166        bux2(i) = 0.0d0
167#ifdef MPI
168        buxg(i,1) = 0.0d0
169        buxg(i,2) = 0.0d0
170#endif
171      enddo
172
173#ifdef MPI
174      do i = 1,nbandsloc
175        do j = 1,nhijmax
176          bux1save(j,i) = 0.0d0
177          bux2save(j,i) = 0.0d0
178        enddo
179      enddo
180#endif
181
182C Initialise gradient vector
183      grad = 0.0d0
184
185C Calculate Functional .....................................................
186
187C Loop over spin
188      do is = 1,nspin
189
190#ifdef MPI
191C Initialise communication arrays
192        call globalinitB(2)
193#endif
194
195C Initialise variables
196        func1 = 0.0d0
197        func2 = 0.0d0
198
199C F=CtH  ---> JMS: F=Ct*(H-eta*S)
200C Fs=CtS
201        do il = 1,nbandsloc
202          do in = 1,numct(il)
203            k = listct(in,il)
204            kk = ncT2P(k)
205            if (kk.gt.0) then
206              ik = cttoc(in,il)
207              p0 = c(ik,k,is)
208              indk = listhptr(kk)
209              do kn = 1,numh(kk)
210                aux1(listh(indk+kn)) = aux1(listh(indk+kn)) +
211     .            p0*( h(indk+kn,is) - eta(is)*s(indk+kn) )
212                aux2(listh(indk+kn)) = aux2(listh(indk+kn)) +
213     .            p0*s(indk+kn)
214              enddo
215            endif
216          enddo
217          do in = 1,numf(il)
218            k = listf(in,il)
219            f(in,il) = aux1(k)
220            fs(in,il) = aux2(k)
221            aux1(k) = 0.0d0
222            aux2(k) = 0.0d0
223          enddo
224        enddo
225
226C-JMS Find transpose of F and Fs
227        do mu = 1,no_cl
228          do muk = 1,numft(mu)
229            j = listft(muk,mu)
230            jl = nbG2L(j)
231            if (jl.gt.0) then
232              jk = fttof(muk,mu)
233              ft(muk,mu) = f(jk,jl)
234              fst(muk,mu) = fs(jk,jl)
235            endif
236          enddo
237        enddo
238
239#ifdef MPI
240C Globalise ft/fst
241        call globaliseF(no_cl,nbands,nftmax,numft,listft,
242     .                  ft,ftG,Node)
243        call globaliseF(no_cl,nbands,nftmax,numft,listft,
244     .                  fst,fstG,Node)
245#endif
246
247C Sij=CtSC
248C multiply FxC and FsxC row by row
249        do il = 1,nbandsloc
250          do in = 1,numf(il)
251            k = listf(in,il)
252            kk = ncG2L(k)
253            a0 = f(in,il)
254            b0 = fs(in,il)
255            do kn = 1,numc(kk)
256              bux1(listc(kn,kk)) = bux1(listc(kn,kk)) + a0 * c(kn,kk,is)
257              bux2(listc(kn,kk)) = bux2(listc(kn,kk)) + b0 * c(kn,kk,is)
258            enddo
259          enddo
260
261#ifdef MPI
262C Load data into globalisation arrays
263          call globalloadB2(il,nbands,bux1,bux2)
264#endif
265
266#ifdef MPI
267C Reinitialise bux1/2 and save for later in sparse form
268          do jn = 1,numhij(il)
269            j = listhij(jn,il)
270            bux1save(jn,il) = bux1(j)
271            bux2save(jn,il) = bux2(j)
272            bux1(j) = 0.0d0
273            bux2(j) = 0.0d0
274          enddo
275
276        enddo
277
278C Globalise Hij/Sij
279        call globalcommB(Node)
280
281C Restore local bux1/2 terms
282        do il = 1,nbandsloc
283          do jn = 1,numhij(il)
284            j = listhij(jn,il)
285            bux1(j) = bux1save(jn,il)
286            bux2(j) = bux2save(jn,il)
287          enddo
288
289C Reload data after globalisation
290          call globalreloadB2(il,nbands,bux1,bux2,buxg)
291#endif
292
293C Calculate energy terms & reinitialise bux1/2
294          i = nbL2G(il)
295          func1 = func1 + bux1(i)
296
297          do jn = 1,numhij(il)
298            j = listhij(jn,il)
299#ifdef MPI
300            func2 = func2 + bux1(j)*buxg(j,2)
301#else
302            func2 = func2 + bux1(j)*bux2(j)
303#endif
304          enddo
305
306C Multiply Hij x Fs and Sij x F row by row
307C (only products of neccesary elements)
308          do ik = 1,numct(il)
309            mu = listct(ik,il)
310            kk = ncT2P(mu)
311            if (kk.gt.0) then
312              a0 = 0.0d0
313#ifdef MPI
314              do muk = 1,numft2(mu)
315                j = listft2(muk,mu)
316                a0 = a0 + buxg(j,1)*fstG(muk,mu)
317     .                  + buxg(j,2)*ftG(muk,mu)
318#else
319              do muk = 1,numft(mu)
320                j = listft(muk,mu)
321                a0 = a0 + bux1(j)*fst(muk,mu)
322     .                  + bux2(j)*ft(muk,mu)
323#endif
324              enddo
325              grad(cttoc(ik,il),mu,is) = - spinfct*a0
326            endif
327          enddo
328
329#ifdef MPI
330C Reinitialise buxg
331          call globalrezeroB2(il,nbands,buxg)
332#endif
333C Reinitialise bux1 / bux2
334          do jn = 1,numhij(il)
335            j = listhij(jn,il)
336            bux1(j) = 0.0d0
337            bux2(j) = 0.0d0
338          enddo
339
340        enddo
341
342#ifdef MPI
343C Global reduction of func1/2
344        ftmp(1) = func1
345        ftmp(2) = func2
346        call Globalize_sum(ftmp(1:2),ftmp2(1:2))
347        func1 = ftmp2(1)
348        func2 = ftmp2(2)
349#endif
350
351        bux1(1:nbands) = 0.0d0
352        do k = 1,no_cl
353          do ik = 1,numft(k)
354            j = listft(ik,k)
355            bux1(j) = 2.0d0*spinfct*ft(ik,k) + bux1(j)
356          enddo
357          do ik = 1,numc(k)
358            j = listc(ik,k)
359            grad(ik,k,is) = bux1(j) + grad(ik,k,is)
360          enddo
361          do ik = 1,numft(k)
362            j = listft(ik,k)
363            bux1(j) = 0.0d0
364          enddo
365        enddo
366
367        ener = ener + spinfct*(func1 - 0.5d0*func2)
368     .              + 0.5d0*eta(is)*qs(is)
369
370C End loop over spins
371      enddo
372
373#ifdef MPI
374      call globaliseC(no_cl,ncmax,numc,grad,nspin,Node)
375#endif
376
377C Deallocate local arrays
378#ifdef MPI
379      call de_alloc( buxg, 'buxg', 'gradient' )
380      call de_alloc( bux2save, 'bux2save', 'gradient' )
381      call de_alloc( bux1save, 'bux1save', 'gradient' )
382      call de_alloc( fstG, 'fstG', 'gradient' )
383      call de_alloc( ftG, 'ftG', 'gradient' )
384#endif
385      call de_alloc( fst, 'fst', 'gradient' )
386      call de_alloc( ft, 'ft', 'gradient' )
387      call de_alloc( bux2, 'bux2', 'gradient' )
388      call de_alloc( bux1, 'bux1', 'gradient' )
389      call de_alloc( aux2, 'aux2', 'gradient' )
390      call de_alloc( aux1, 'aux1', 'gradient' )
391
392      call timer('gradient',2)
393
394      end subroutine gradient
395      end module m_gradient
396