1!
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt.
6! See Docs/Contributors.txt for a list of contributors.
7!
8      subroutine ener3lomem(c,grad,lam,eta,qs,h,s,nbasis,nbasisloc,
9     .                      nbands,ncmax,nctmax,nfmax,nhmax,nhijmax,
10     .                      numc,listc,numct,listct,cttoc,numf,listf,
11     .                      numh,listh,listhptr,numhij,listhij,ener,
12     .                      nbasisCloc,nspin,Node)
13
14C ************************************************************************
15C Finds the energy at three points of the line passing thru C in the
16C direction of GRAD. LAM is the distance (in units of GRAD) between
17C points.
18C Uses the functional of Kim et al (PRB 52, 1640 (95))
19C Works only with spin-unpolarized systems
20C Written by P.Ordejon. October'96
21C ****************************** INPUT ***********************************
22C real*8 c(ncmax,nbasis)       : Current point (wave function coeff.
23C                                  in sparse form)
24C real*8 grad(ncmax,nbasis)    : Direction of search (sparse)
25C real*8 lam                   : Length of step
26C real*8 eta(nspin)            : Fermi level parameter of Kim et al.
27C real*8 qs(nspin)             : Total number of electrons
28C real*8 h(nhmax)              : Hamiltonian matrix (sparse)
29C real*8 s(nhmax)              : Overlap matrix (sparse)
30C integer nbasis               : Global number of basis orbitals
31C integer nbasisloc            : Local number of basis orbitals
32C integer nbands               : Number of LWF's
33C integer ncmax                : Max num of <>0 elements of each row of C
34C integer nctmax               : Max num of <>0 elements of each col of C
35C integer nfmax                : Max num of <>0 elements of each row of
36C                                   F = Ct x H
37C integer nhmax                : Max num of <>0 elements of each row of H
38C integer nhijmax              : Max num of <>0 elements of each row of
39C                                   Hij=Ct x H x C
40C integer numc(nbasis)         : Control vector of C matrix
41C                                (number of <>0  elements of each row of C)
42C integer listc(ncmax,nbasis)  : Control vector of C matrix
43C                               (list of <>0  elements of each row of C)
44C integer numct(nbands)        : Control vector of C transpose matrix
45C                               (number of <>0  elements of each col of C)
46C integer listct(ncmax,nbands) : Control vector of C transpose matrix
47C                               (list of <>0  elements of each col of C)
48C integer cttoc(ncmax,nbands)  : Map from Ct to C indexing
49C integer numf(nbands)         : Control vector of F matrix
50C                                (number of <>0  elements of each row of F)
51C integer listf(nfmax,nbands)  : Control vector of F matrix
52C                                (list of <>0  elements of each row of F)
53C integer numh(nbasis)         : Control vector of H matrix
54C                                (number of <>0  elements of each row of H)
55C integer listh(nhmax)         : Control vector of H matrix
56C                               (list of <>0  elements of each row of H)
57C integer listhptr(nbasis)     : Control vector of H matrix
58C                               (pointer to start of row in listh/h/s)
59C integer numhij(nbands)       : Control vector of Hij matrix
60C                                (number of <>0  elements of each row of Hij)
61C integer listhij(nhijmax,nbands): Control vector of Hij matrix
62C                                (list of <>0  elements of each row of Hij)
63C ***************************** OUTPUT ***********************************
64C real*8 ener(3)               : Energy at the three points:
65C                                     C +     lam * GRAD
66C                                     C + 2 * lam * GRAD
67C                                     C + 3 * lam * GRAD
68C ************************************************************************
69
70      use precision
71      use globalise
72      use on_main,     only : ncG2L,ncT2P,nbG2L,nbL2G,nbandsloc
73      use m_mpi_utils, only : globalize_sum
74      use alloc,       only : re_alloc, de_alloc
75
76      implicit none
77
78      integer
79     .  nbasis,nbasisloc,nbands,ncmax,nctmax,nfmax,nhmax,nhijmax,
80     .  nbasisCloc,nspin,Node
81
82      integer
83     .  cttoc(nctmax,nbandsloc),listc(ncmax,nbasisCloc),
84     .  listct(nctmax,nbandsloc),listf(nfmax,nbandsloc),
85     .  listh(nhmax),listhptr(nbasisloc),listhij(nhijmax,nbandsloc),
86     .  numc(nbasisCloc),numct(nbandsloc),numf(nbandsloc),
87     .  numh(nbasisloc),numhij(nbandsloc)
88
89      real(dp) ::
90     .  c(ncmax,nbasisCloc,nspin),ener(3),eta(nspin),qs(nspin),
91     .  grad(ncmax,nbasisCloc,nspin),h(nhmax,nspin),lam,s(nhmax)
92
93C Internal variables ......................................................
94
95      integer
96     .  i,il,ilam,in,is,j,jn,k,kk,kn,lc,indk,mu
97
98      real(dp), pointer, save :: aux1(:), aux2(:), bux1(:), bux2(:)
99
100      real(dp) ::
101     .  a,b,c1,func1(3),func2(3),
102     .  lam123(3),pp,hs,ss, spinfct
103
104#ifdef MPI
105      real(dp) ::
106     .  ftmp(3,2),ftmp2(3,2)
107      real(dp), pointer, save :: bux2g(:), bux1s(:,:), bux2s(:,:)
108#endif
109C
110      call timer('ener3',1)
111
112C Allocate workspace arrays
113      call re_alloc( aux1, 1, nbasis, 'aux1', 'ener3' )
114      call re_alloc( aux2, 1, nbasis, 'aux2', 'ener3' )
115      call re_alloc( bux1, 1, nbasis, 'bux1', 'ener3' )
116      call re_alloc( bux2, 1, nbasis, 'bux2', 'ener3' )
117#ifdef MPI
118      call re_alloc( bux1s, 1, nhijmax, 1, nbandsloc, 'bux1s', 'ener3' )
119      call re_alloc( bux2s, 1, nhijmax, 1, nbandsloc, 'bux2s', 'ener3' )
120      call re_alloc( bux2g, 1, nbasis, 'bux2g', 'ener3' )
121#endif
122
123C..................
124
125C Initialize output and auxiliary varialbles ...............................
126
127      if (nspin.eq.1) then
128        spinfct = 2.0d0
129      else
130        spinfct = 1.0d0
131      endif
132
133      do i = 1,3
134        ener(i) = 0.0d0
135      enddo
136
137      do i = 1,nbasis
138        aux1(i) = 0.0d0
139        aux2(i) = 0.0d0
140      enddo
141
142      do i = 1,nbands
143        bux1(i) = 0.0d0
144        bux2(i) = 0.0d0
145#ifdef MPI
146        bux2g(i) = 0.0d0
147#endif
148      enddo
149
150#ifdef MPI
151      do i = 1,nbandsloc
152        do j = 1,nhijmax
153          bux1s(j,i) = 0.0d0
154          bux2s(j,i) = 0.0d0
155        enddo
156      enddo
157#endif
158
159C Define points to compute energy ..........................................
160      lam123(1) = lam
161      lam123(2) = lam*2.0d0
162      lam123(3) = lam*3.0d0
163
164C Loop over spin states
165      do is = 1,nspin
166
167C Loop over lambda values
168        do ilam = 1,3
169
170#ifdef MPI
171C Initialise communication arrays
172          call globalinitB(1)
173#endif
174
175C Initialise variables
176          func1(ilam) = 0.0d0
177          func2(ilam) = 0.0d0
178
179C Calculate Functional
180C F=CtH
181C Fs=CtS
182
183          do il = 1,nbandsloc
184            i = nbL2G(il)
185
186            do in = 1,numct(il)
187              k = listct(in,il)
188              kk = ncT2P(k)
189              if (kk.gt.0) then
190                pp = c(cttoc(in,il),k,is) +
191     .               lam123(ilam)*grad(cttoc(in,il),k,is)
192
193                indk = listhptr(kk)
194
195                do kn = 1,numh(kk)
196                  mu = listh(indk+kn)
197                  hs = h(indk+kn,is) - eta(is)*s(indk+kn)
198                  ss = s(indk+kn)
199                  aux1(mu) = aux1(mu) + pp*hs
200                  aux2(mu) = aux2(mu) + pp*ss
201                enddo
202              endif
203            enddo
204
205            do in = 1,numf(il)
206              k = listf(in,il)
207              a = aux1(k)
208              b = aux2(k)
209              aux1(k) = 0.0d0
210              aux2(k) = 0.0d0
211
212C Hij=CtHC
213C Sij=CtSC
214C multiply FxC and FsxC
215              kk = ncG2L(k)
216              do kn = 1,numc(kk)
217                lc = listc(kn,kk)
218                c1 = c(kn,kk,is) + lam123(ilam)*grad(kn,kk,is)
219                bux1(lc) = bux1(lc) + a * c1
220                bux2(lc) = bux2(lc) + b * c1
221              enddo
222            enddo
223
224#ifdef MPI
225C Load data into globalisation arrays
226            call globalloadB1(il,nbands,bux2)
227#endif
228
229C First energy contribution
230            func1(ilam) = func1(ilam) + bux1(i)
231
232#ifdef MPI
233C Reinitialise buxs and save for later in sparse form
234            do jn = 1,numhij(il)
235              j = listhij(jn,il)
236              bux1s(jn,il) = bux1(j)
237              bux2s(jn,il) = bux2(j)
238              bux1(j) = 0.0d0
239              bux2(j) = 0.0d0
240            enddo
241
242          enddo
243
244C Global sum of relevant bux values
245          call globalcommB(Node)
246
247C Restore local buxs terms
248          do il = 1,nbandsloc
249            do jn = 1,numhij(il)
250              j = listhij(jn,il)
251              bux2(j) = bux2s(jn,il)
252            enddo
253
254C Reload data after globalisation
255            call globalreloadB1(il,nbands,bux2,bux2g)
256#endif
257
258C Second energy contribution & reinitialise bux2/4/6
259            do jn = 1,numhij(il)
260              j = listhij(jn,il)
261#ifdef MPI
262              func2(ilam) = func2(ilam) + bux1s(jn,il)*bux2g(j)
263              bux2(j) = 0.0d0
264#else
265              func2(ilam) = func2(ilam) + bux1(j)*bux2(j)
266              bux1(j) = 0.0d0
267              bux2(j) = 0.0d0
268#endif
269            enddo
270
271#ifdef MPI
272C Reinitialise bux246
273            call globalrezeroB1(il,nbands,bux2g)
274#endif
275
276          enddo
277
278        enddo
279
280#ifdef MPI
281C Global reduction of func1/2
282        ftmp(1:3,1) = func1(1:3)
283        call Globalize_sum(ftmp(1:3,1), ftmp2(1:3,1))
284        func1(1:3) = ftmp2(1:3,1)
285        ftmp(1:3,2) = func2(1:3)
286        call Globalize_sum(ftmp(1:3,2), ftmp2(1:3,2))
287        func2(1:3) = ftmp2(1:3,2)
288#endif
289
290C This is valid for an spin-unpolarized sytem
291        do i = 1,3
292          ener(i) = ener(i) + spinfct*(func1(i) - 0.5d0*func2(i))
293     .                      + 0.5d0*eta(is)*qs(is)
294        enddo
295
296C End loop over spin states
297      enddo
298
299C Dellocate workspace arrays
300#ifdef MPI
301      call de_alloc( bux2g, 'bux2g', 'ener3' )
302      call de_alloc( bux2s, 'bux2s', 'ener3' )
303      call de_alloc( bux1s, 'bux1s', 'ener3' )
304#endif
305      call de_alloc( bux2, 'bux2', 'ener3' )
306      call de_alloc( bux1, 'bux1', 'ener3' )
307      call de_alloc( aux2, 'aux2', 'ener3' )
308      call de_alloc( aux1, 'aux1', 'ener3' )
309
310      call timer('ener3',2)
311
312      return
313      end
314