1!
2! Copyright (C) 1996-2016	The SIESTA group
3!  This file is distributed under the terms of the
4!  GNU General Public License: see COPYING in the top directory
5!  or http://www.gnu.org/copyleft/gpl.txt.
6! See Docs/Contributors.txt for a list of contributors.
7!
8      module m_denmat
9      public :: denmat
10      CONTAINS
11      subroutine denmat(c,eta,h,s,qs,no_u,no_l,nbands,ncmax,
12     .                  nctmax,nfmax,nftmax,nhmax,nhijmax,numc,listc,
13     .                  numct,listct,cttoc,numf,listf,numft,listft,
14     .                  numh,listh,listhptr,numhij,listhij,dm,edm,
15     .                  no_cl,nspin,Node)
16C *******************************************************************
17C Subroutine to compute the Density and Energy Density matrices
18C for the Order-N functional of Kim et al. (PRB 52, 1640 (95))
19C (generalization of that proposed by Mauri et al, and Ordejon et al)
20C
21C Density Matrix:
22C  D_mu,nu = 2 * C_i,mu * ( 2 * delta_i,j - S_i,j) * C_j,nu
23C
24C Energy Density Matrix:
25C  E_mu,nu = 2 * C_i,mu * ( H_i,j + 2 * eta * (delta_i,j - S_i,j) ) * C_j,nu
26C
27C (The factor 2 is for spin)
28C
29C (See Ordejon et al, PRB 51, 1456 (95))
30C
31C The DM is normalized to the exact number of electrons!!!
32C
33C Written by P.Ordejon, Noviembre'96
34C Modified by J.M.Soler, May'97
35C Multiplication for cHc x c / cSc x c changed to suit parallel
36C version and spatial decomposition added by J.D. Gale December '04
37C ************************** INPUT **********************************
38C real*8 c(ncmax,no_u)      : Localized Wave Functions (sparse)
39C real*8 eta                  : Fermi level parameter of Kim et al.
40C real*8 h(nhmax)             : Hamiltonian matrix (sparse)
41C real*8 s(nhmax)             : Overlap matrix (sparse)
42C real*8 qs(nspin)            : Total number of electrons
43C integer no_u              : Global number of atomic orbitals
44C integer no_l           : Local number of atomic orbitals
45C integer nbands              : Number of Localized Wave Functions
46C integer ncmax               : First dimension of listc and C, and maximum
47C                               number of nonzero elements of each row of C
48C integer nctmax              : Max num of <>0 elements of each col of C
49C integer nfmax               : Max num of <>0 elements of each row of
50C                               F = Ct x H
51C integer nftmax              : Max num of <>0 elements of each col of F
52C integer nhmax               : First dimension of listh and H, and maximum
53C                               number of nonzero elements of each row of H
54C integer nhijmax             : Maximum number of non-zero elements of each
55C                               row of Hij
56C integer numc(no_u)        : Control vector of C matrix
57C                               (number of nonzero elements of each row of C)
58C integer listc(ncmax,no_u) : Control vector of C matrix
59C                              (list of nonzero elements of each row of C)
60C integer numct(nbands)       : Control vector of C transpose matrix
61C                              (number of <>0  elements of each col of C)
62C integer listct(ncmax,nbands): Control vector of C transpose matrix
63C                              (list of <>0  elements of each col of C)
64C integer cttoc(ncmax,nbands) : Map from Ct to C indexing
65C integer numf(nbands)        : Control vector of F matrix
66C                               (number of <>0  elements of each row of F)
67C integer listf(nfmax,nbands) : Control vector of F matrix
68C                               (list of <>0  elements of each row of F)
69C integer numft(no_u)       : Control vector of F transpose matrix
70C                               (number of <>0  elements of each col of F)
71C integer listft(nfmax,no_u): Control vector of F transpose matrix
72C                               (list of <>0  elements of each col of F)
73C integer numh(no_u)        : Control vector of H matrix
74C                               (number of nonzero elements of each row of H)
75C integer listh(nhmax)        : Control vector of H matrix
76C                               (list of nonzero elements of each row of H)
77C integer listhptr(no_u)    : Control vector of H matrix
78C                               (pointer to start of row in listh/h/s)
79C integer numhij(nbands)      : Control vector of Hij matrix
80C                                (number of <>0  elements of each row of Hij)
81C integer listhij(nhijmax,nbands): Control vector of Hij matrix
82C                                (list of <>0  elements of each row of Hij)
83C ************************* OUTPUT **********************************
84C real*8 dm(nhmax,no_u)     : Density Matrix
85C real*8 edm(nhmax,no_u)    : Energy density matrix
86C *******************************************************************
87
88      use precision, only : dp
89      use on_main,   only : ncG2L, ncP2T, ncT2P, nbL2G, nbandsloc
90      use m_mpi_utils, only : globalize_sum
91      use alloc,     only : re_alloc, de_alloc
92
93#ifdef MPI
94      use globalise, only: setglobalise, maxft2, numft2, listft2,
95     .                     globalinitb, globalloadb2, globalcommb,
96     .                     globalreloadb2, globalrezerob2,
97     .                     globalisef
98#endif
99
100      implicit none
101
102      integer, intent(in) ::
103     .  no_u,no_l,nbands,ncmax,nctmax,nfmax,nhmax,nhijmax,
104     .  no_cl,Node,nspin
105
106      integer, intent(in) ::
107     .  cttoc(nctmax,nbandsloc),listc(ncmax,no_cl),
108     .  listct(nctmax,nbandsloc),listf(nfmax,nbandsloc),
109     .  listh(nhmax),listhptr(no_l),listhij(nhijmax,nbandsloc),
110     .  numc(no_cl),numct(nbandsloc),numf(nbandsloc),
111     .  numh(no_l),numhij(nbandsloc)
112
113      integer, intent(in) ::
114     .  nftmax,
115     .  listft(nftmax,no_cl), numft(no_cl)
116
117      real(dp), intent(in) ::
118     .  c(ncmax,no_cl,nspin),
119     .  qs(nspin),eta(nspin),h(nhmax,nspin),s(nhmax)
120
121      real(dp), intent(out) :: dm(nhmax,nspin),edm(nhmax,nspin)
122
123      external
124     .  timer
125
126C Internal variales ..................................................
127C   Notation hints:
128C     m,n : basis orbital inexes (mu,nu)
129C     i,j : band (and LWF) indexes
130C     im  : index for LWF's of basis orbital m
131C     mi  : index for basis orbitals of LWF i
132C     nm  : index for basis orbitals connected to basis orbital m
133
134      integer
135     .  i, il, in, indm, indn, im, is, j, jn, m, mi, mm, mn,
136     .  n, nh, ni, nm, nn
137
138      real(dp), pointer ::
139     .  cHrow(:), cSrow(:), chcrow(:), cscrow(:), chccCol(:),
140     .  csccCol(:)
141
142      real(dp) ::
143     .  cim, cnj, chin, csin, cchccmn, ccsccmn,
144     .  Hmn, Smn, qout(2), fact, spinfct
145
146#ifdef MPI
147      real(dp) ::  qtmp(nspin)
148      real(dp), pointer ::
149     .  ctmp(:,:), chcrowsave(:,:), cscrowsave(:,:),
150     .  ftG(:,:), fstG(:,:)
151#else
152       real(dp), pointer ::  ft(:,:), fst(:,:)
153#endif
154
155C Start time counter
156      call timer('denmat',1)
157
158      if (nspin.eq.1) then
159        spinfct = 2.0_dp
160      else
161        spinfct = 1.0_dp
162      endif
163
164C Allocate local arrays
165
166      nullify( cHrow )
167      call re_alloc( cHrow, 1, no_u, name='cHrow', routine='denmat' )
168      nullify( cSrow )
169      call re_alloc( cSrow, 1, no_u, name='cSrow', routine='denmat' )
170      nullify( chcrow )
171      call re_alloc( chcrow, 1, nbands, name='chcrow',
172     &               routine='denmat' )
173      nullify( cscrow )
174      call re_alloc( cscrow, 1, nbands, name='cscrow',
175     &               routine='denmat' )
176      nullify( chccCol )
177      call re_alloc( chccCol, 1, nbands, name='chccCol',
178     &               routine='denmat' )
179      nullify( csccCol )
180      call re_alloc( csccCol, 1, nbands, name='csccCol',
181     &               routine='denmat' )
182
183#ifdef MPI
184      nullify( chcrowsave )
185      call re_alloc( chcrowsave, 1, nhijmax, 1, nbandsloc,
186     &               name='chcrowsave', routine='denmat' )
187      nullify( cscrowsave )
188      call re_alloc( cscrowsave, 1, nhijmax, 1, nbandsloc,
189     &               name='cscrowsave', routine='denmat' )
190      nullify( ctmp )
191      call re_alloc( ctmp, 1, nbands, 1, 2,
192     &               name='ctmp', routine='denmat' )
193      nullify( ftG )
194      call re_alloc( ftG, 1, MaxFt2, 1, no_cl,
195     &               name='ftG', routine='denmat' )
196      nullify( fstG )
197      call re_alloc( fstG, 1, MaxFt2, 1, no_cl,
198     &               name='fstG', routine='denmat' )
199#else
200      nullify( ft )
201      call re_alloc( ft, 1, nftmax, 1, no_cl,
202     &               name='ft', routine='denmat' )
203      nullify( fst )
204      call re_alloc( fst, 1, nftmax, 1, no_cl,
205     &               name='fst', routine='denmat' )
206#endif
207
208C Initialize temporary arrays
209      do m = 1,no_u
210        cHrow(m) = 0.0_dp
211        cSrow(m) = 0.0_dp
212      enddo
213      do i = 1,nbands
214        csccCol(i) = 0.0_dp
215        chccCol(i) = 0.0_dp
216        cscrow(i) = 0.0_dp
217        chcrow(i) = 0.0_dp
218#ifdef MPI
219        ctmp(i,1) = 0.0_dp
220        ctmp(i,2) = 0.0_dp
221#endif
222      enddo
223#ifdef MPI
224      do i = 1,nbandsloc
225        do j = 1,nhijmax
226          cscrowsave(j,i) = 0.0_dp
227          chcrowsave(j,i) = 0.0_dp
228        enddo
229      enddo
230      do i = 1,no_cl
231        do j = 1,MaxFT2
232          ftG(j,i) = 0.0_dp
233          fstG(j,i) = 0.0_dp
234        enddo
235      enddo
236#else
237      ft=0.0_dp
238      fst=0.0_dp
239#endif
240
241C Loop over spins
242      do is = 1,nspin
243
244#ifdef MPI
245C Initialise communication arrays
246        call globalinitB(2)
247#endif
248
249C Find cscc=(2-ct*S*c)*ct and chcc=(ct*H*c+2eta(1-ct*S*c))*ct.
250        do il = 1,nbandsloc
251          i = nbL2G(il)
252
253C Find row i of cS=ct*S and cH=ct*H
254          do mi = 1,numct(il)
255            m = listct(mi,il)
256            mm = ncT2P(m)
257            if (mm.gt.0) then
258              im = cttoc(mi,il)
259              cim = c(im,m,is)
260              indm = listhptr(mm)
261              do nm = 1,numh(mm)
262                n = listh(indm+nm)
263                Hmn = H(indm+nm,is)
264                Smn = S(indm+nm)
265                cHrow(n) = cHrow(n) + cim * Hmn
266                cSrow(n) = cSrow(n) + cim * Smn
267              enddo
268            endif
269          enddo
270
271C Find row i of csc=2-ct*S*c and chc=ct*H*c+2eta(1-ct*S*c)
272
273C Now use the list of nonzero elements of f=ct*H
274          do ni = 1,numf(il)
275            n = listf(ni,il)
276            nn = ncG2L(n)
277            csin = - cSrow(n)
278            chin = cHrow(n) - 2.0_dp*eta(is)*cSrow(n)
279            do jn = 1,numc(nn)
280              j = listc(jn,nn)
281              cnj = c(jn,nn,is)
282              chcrow(j) = chcrow(j) + chin * cnj
283              cscrow(j) = cscrow(j) + csin * cnj
284            enddo
285
286C Restore cSrow and cHrow for next row
287            cSrow(n) = 0.0_dp
288            cHrow(n) = 0.0_dp
289          enddo
290
291#ifdef MPI
292C Load data into globalisation arrays
293          call globalloadB2(il,nbands,chcrow,cscrow)
294
295C Reinitialise bux1/2 and save for later in sparse form
296          do jn = 1,numhij(il)
297            j = listhij(jn,il)
298            chcrowsave(jn,il) = chcrow(j)
299            cscrowsave(jn,il) = cscrow(j)
300            chcrow(j) = 0.0_dp
301            cscrow(j) = 0.0_dp
302          enddo
303
304        enddo
305
306C Globalise chc/csc
307        call globalcommB(Node)
308
309C Restore local chc/csc terms
310        do il = 1,nbandsloc
311          i = nbL2G(il)
312          do jn = 1,numhij(il)
313            j = listhij(jn,il)
314            chcrow(j) = chcrowsave(jn,il)
315            cscrow(j) = cscrowsave(jn,il)
316          enddo
317
318C Reload data after globalisation
319          call globalreloadB2(il,nbands,chcrow,cscrow,ctmp)
320
321C Add diagonal terms - 2 and 2eta
322          ctmp(i,1) = ctmp(i,1) + 2.0_dp * eta(is)
323          ctmp(i,2) = ctmp(i,2) + 2.0_dp
324#else
325          chcrow(i) = chcrow(i) + 2.0_dp * eta(is)
326          cscrow(i) = cscrow(i) + 2.0_dp
327#endif
328
329C Find row i of cscc=csc*ct and chcc=chc*ct.
330C Only the nonzero elements of f=cH will be required.
331          do mi = 1,numct(il)
332            m = listct(mi,il)
333#ifdef MPI
334            if (ncT2P(m).gt.0) then
335              im = cttoc(mi,il)
336              do in = 1,numft2(m)
337                j = listft2(in,m)
338                ftG(in,m)  = ftG(in,m)  + ctmp(j,1)*c(im,m,is)
339                fstG(in,m) = fstG(in,m) + ctmp(j,2)*c(im,m,is)
340              enddo
341            endif
342#else
343            im = cttoc(mi,il)
344            do in = 1,numft(m)
345              j = listft(in,m)
346              ft(in,m)  = ft(in,m)  + chcrow(j)*c(im,m,is)
347              fst(in,m) = fst(in,m) + cscrow(j)*c(im,m,is)
348            enddo
349#endif
350          enddo
351
352C Reinitialise chcrow/cscrow
353          do jn = 1,numhij(il)
354            j = listhij(jn,il)
355            chcrow(j) = 0.0_dp
356            cscrow(j) = 0.0_dp
357          enddo
358#ifdef MPI
359C Reinitialise ctmp
360          call globalrezeroB2(il,nbands,ctmp)
361#endif
362
363        enddo
364
365C Find dm=c*cscc and edm=c*chcc. Only the nonzero elements of H.
366        do n = 1,no_l
367          nn = ncP2T(n)
368
369C Use listft to expand a column of cscc
370#ifdef MPI
371          do in = 1,numft2(nn)
372            i = listft2(in,nn)
373            chccCol(i) = ftG(in,nn)
374            csccCol(i) = fstG(in,nn)
375          enddo
376#else
377          do in = 1,numft(nn)
378            i = listft(in,nn)
379            chccCol(i) = ft(in,nn)
380            csccCol(i) = fst(in,nn)
381          enddo
382#endif
383
384C Find column n of c*cscc and c*chcc
385C Use that H is symmetric to determine required elements
386          indn = listhptr(n)
387          do mn = 1,numh(n)
388            m = listh(indn+mn)
389            mm = ncG2L(m)
390            if (mm.gt.0) then
391C Find element (m,n) of c*cscc and c*chcc
392              ccsccmn = 0.0_dp
393              cchccmn = 0.0_dp
394              do im = 1,numc(mm)
395                i = listc(im,mm)
396                ccsccmn = ccsccmn + c(im,mm,is)*csccCol(i)
397                cchccmn = cchccmn + c(im,mm,is)*chccCol(i)
398              enddo
399C Use that dm and edm are symmetric
400              dm(indn+mn,is)  = spinfct*ccsccmn
401              edm(indn+mn,is) = spinfct*cchccmn
402            endif
403          enddo
404
405C Restore csccCol and chccCol for next column
406          do in = 1,numft(nn)
407            i = listft(in,nn)
408            csccCol(i) = 0.0_dp
409            chccCol(i) = 0.0_dp
410          enddo
411        enddo
412
413C End of loop over spins
414      enddo
415
416C Normalize DM to exact charge .........................
417C Calculate total output charge ...
418      qout(1:2) = 0.0_dp
419      if (no_l.gt.0) then
420        nh = listhptr(no_l) + numh(no_l)
421      else
422        nh = 0
423      endif
424      do is = 1,nspin
425        do in = 1,nh
426          qout(is) = qout(is) + dm(in,is) * s(in)
427        enddo
428      enddo
429
430C Globalize total charge
431#ifdef MPI
432      qtmp(1:nspin) = qout(1:nspin)
433      call Globalize_sum(qtmp(1:nspin),qout(1:nspin))
434#endif
435
436!! AG  #ifdef MPI
437!! AG      qtmp(1:2) = qout(1:2)
438!! AG      call MPI_AllReduce(qtmp,qout,nspin,MPI_double_precision,
439!! AG     .  MPI_sum,MPI_Comm_World,MPIerror)
440!! AG #endif
441
442      if (Node.eq.0) then
443        write(6,"(/a,2f12.4)")
444     .    'denmat: qtot (before DM normalization) = ',qout(1:nspin)
445      endif
446
447C Normalize ...
448      do is = 1,nspin
449C        if (dabs(qs(is)-qout(is)) .gt. 0.05_dp) then
450          fact = qs(is) / qout(is)
451          call dscal(nh,fact,dm(1,is),1)
452          call dscal(nh,fact,edm(1,is),1)
453C        endif
454      enddo
455
456C Deallocate local arrays
457#ifdef MPI
458      call de_alloc( fstG, name='fstG' )
459      call de_alloc( ftG, name='ftG' )
460      call de_alloc( ctmp, name='ctmp' )
461      call de_alloc( cscrowsave, name='cscrowsave' )
462      call de_alloc( chcrowsave, name='chcrowsave' )
463#else
464      call de_alloc( fst, name='fst' )
465      call de_alloc( ft, name='ft' )
466#endif
467      call de_alloc( csccCol, name='csccCol' )
468      call de_alloc( chccCol, name='chccCol' )
469      call de_alloc( cscrow, name='cscrow' )
470      call de_alloc( chcrow, name='chcrow' )
471      call de_alloc( cSrow, name='cSrow' )
472      call de_alloc( cHrow, name='cHrow' )
473
474C Stop time counter and return ..................
475      call timer('denmat',2)
476      end subroutine denmat
477      end module m_denmat
478