1! 2! Copyright (C) 1996-2016 The SIESTA group 3! This file is distributed under the terms of the 4! GNU General Public License: see COPYING in the top directory 5! or http://www.gnu.org/copyleft/gpl.txt. 6! See Docs/Contributors.txt for a list of contributors. 7! 8 module m_gradient 9 public :: gradient 10 CONTAINS 11 subroutine gradient(c,eta,qs,h,s,no_u,no_l,nbands,ncmax, 12 . nctmax,nfmax,nftmax,nhmax,nhijmax, 13 . numc,listc,numct,listct,cttoc,numf,listf, 14 . numft,listft,fttof,numh,listh, 15 . listhptr,numhij,listhij,f,fs,grad,ener, 16 . no_cl,nspin,Node) 17 18C ************************************************************************ 19C Finds the energy and gradient at point C. 20C Uses the functional of Kim et al (PRB 52, 1640 (95)) 21C Works only with spin-unpolarized systems 22C Written by P.Ordejon. October'96 23C Last modified: J.M.Soler. 30/04/97 24C ****************************** INPUT *********************************** 25C real*8 c(ncmax,no_u) : Current point (wave function coeff. 26C in sparse form) 27C real*8 eta(nspin) : Fermi level parameter of Kim et al. 28C real*8 qs(nspin) : Total number of electrons 29C real*8 h(nhmax) : Hamiltonian matrix (sparse) 30C real*8 s(nhmax) : Overlap matrix (sparse) 31C integer no_u : Global number of basis orbitals 32C integer no_l : Local number of basis orbitals 33C integer nbands : Number of LWF's 34C integer ncmax : Max num of <>0 elements of each row of C 35C integer nctmax : Max num of <>0 elements of each col of C 36C integer nfmax : Max num of <>0 elements of each row of 37C F = Ct x H 38C integer nftmax : Max num of <>0 elements of each col of F 39C integer nhmax : Max num of <>0 elements of each row of H 40C integer nhijmax : Max num of <>0 elements of each row of 41C Hij=Ct x H x C 42C integer numc(no_u) : Control vector of C matrix 43C (number of <>0 elements of each row of C) 44C integer listc(ncmax,no_u) : Control vector of C matrix 45C (list of <>0 elements of each row of C) 46C integer numct(nbands) : Control vector of C transpose matrix 47C (number of <>0 elements of each col of C) 48C integer listct(ncmax,nbands) : Control vector of C transpose matrix 49C (list of <>0 elements of each col of C) 50C integer cttoc(ncmax,nbands) : Map from Ct to C indexing 51C integer numf(nbands) : Control vector of F matrix 52C (number of <>0 elements of each row of F) 53C integer listf(nfmax,nbands) : Control vector of F matrix 54C (list of <>0 elements of each row of F) 55C integer numft(no_u) : Control vector of F transpose matrix 56C (number of <>0 elements of each col of F) 57C integer listft(nfmax,no_u) : Control vector of F transpose matrix 58C (list of <>0 elements of each col of F) 59C integer fttof(nfmax,no_u) : Map from Ft to F indexing 60C integer numh(no_u) : Control vector of H matrix 61C (number of <>0 elements of each row of H) 62C integer listh(nhmax) : Control vector of H matrix 63C (list of <>0 elements of each row of H) 64C integer listhptr(no_u) : Control vector of H matrix 65C (pointer to start of rows in listh/h/s) 66C integer numhij(nbands) : Control vector of Hij matrix 67C (number of <>0 elements of each row of Hij) 68C integer listhij(nhijmax,nbands): Control vector of Hij matrix 69C (list of <>0 elements of each row of Hij) 70C real*8 f(nfmax,nbands) : Auxiliary space 71C real*8 fs(nfmax,nbands) : Auxiliary space 72C ***************************** OUTPUT *********************************** 73C real*8 ener : Energy at point C 74C real*8 grad(ncmax,no_u) : Gradient of functional (sparse) 75C ************************************************************************ 76 77 use precision 78 use on_main, only : ncG2L, ncT2P, nbL2G, nbG2L, nbandsloc 79 use alloc, only : re_alloc, de_alloc 80#ifdef MPI 81 use globalise, only : globalinitB, globalisef, globalisec 82 use globalise, only : globalloadb2, globalcommb 83 use globalise, only : globalreloadb2, globalrezerob2 84 use globalise, only : maxft2, numft2, listft2 85#endif 86 use m_mpi_utils, only : globalize_sum 87 88 implicit none 89 90 integer, intent(in) :: 91 . no_u,no_l,nbands,ncmax,nctmax,nfmax,nftmax,nhmax, 92 . nhijmax,no_cl,nspin,Node 93 94 integer, intent(in) :: 95 . cttoc(nctmax,nbandsloc),fttof(nftmax,no_cl), 96 . listc(ncmax,no_cl),listct(nctmax,nbandsloc), 97 . listf(nfmax,nbandsloc),listft(nftmax,no_cl), 98 . listh(nhmax),listhptr(no_l),listhij(nhijmax,nbandsloc), 99 . numc(no_cl),numct(nbandsloc),numf(nbandsloc), 100 . numft(no_cl),numh(no_l),numhij(nbandsloc) 101 102 real(dp), intent(in) :: 103 . c(ncmax,no_cl,nspin),eta(nspin),qs(nspin), 104 . h(nhmax,nspin),s(nhmax) 105 106 real(dp), intent(inout) :: 107 . f(nfmax,nbandsloc),fs(nfmax,nbandsloc) 108 109 real(dp), intent(out) :: grad(ncmax,no_cl,nspin), ener 110 111C Internal variables ...................................................... 112 113 integer 114 . i,ik,il,in,indk,is,j,jk,jl,jn,k,kk,kn,mu,muk 115 116#ifdef MPI 117 real(dp) :: 118 . ftmp(2), ftmp2(2) 119 real(dp), pointer, save :: 120 . buxg(:,:),bux1save(:,:),bux2save(:,:), ftG(:,:), fstG(:,:) 121#endif 122 real(dp), pointer, save :: 123 . aux1(:), aux2(:), bux1(:), bux2(:), ft(:,:), fst(:,:) 124 125 real(dp) :: 126 . a0,b0,p0,func1,func2,spinfct 127 128C.................. 129 130 call timer('gradient',1) 131 132C Allocate local arrays 133 call re_alloc( aux1, 1, no_u, 'aux1', 'gradient' ) 134 call re_alloc( aux2, 1, no_u, 'aux2', 'gradient' ) 135 call re_alloc( bux1, 1, nbands, 'bux1', 'gradient' ) 136 call re_alloc( bux2, 1, nbands, 'bux2', 'gradient' ) 137 call re_alloc( ft, 1, nftmax, 1, no_cl, 'ft', 'gradient' ) 138 call re_alloc( fst, 1, nftmax, 1, no_cl, 'fst', 'gradient' ) 139#ifdef MPI 140 call re_alloc( ftG, 1, MaxFt2, 1, no_cl, 'ftG', 'gradient' ) 141 call re_alloc( fstG, 1, MaxFt2, 1, no_cl, 'fstG', 'gradient') 142 call re_alloc( bux1save, 1, nhijmax, 1, nbandsloc, 'bux1save', 143 & 'gradient' ) 144 call re_alloc( bux2save, 1, nhijmax, 1, nbandsloc, 'bux2save', 145 & 'gradient' ) 146 call re_alloc( buxg, 1, nbands, 1, 2, 'buxg', 'gradient') 147#endif 148 149C Initialize output and auxiliary varialbles ............................... 150 151 if (nspin.eq.1) then 152 spinfct = 2.0d0 153 else 154 spinfct = 1.0d0 155 endif 156 157 ener = 0.0d0 158 159 do i = 1,no_u 160 aux1(i) = 0.0d0 161 aux2(i) = 0.0d0 162 enddo 163 164 do i = 1,nbands 165 bux1(i) = 0.0d0 166 bux2(i) = 0.0d0 167#ifdef MPI 168 buxg(i,1) = 0.0d0 169 buxg(i,2) = 0.0d0 170#endif 171 enddo 172 173#ifdef MPI 174 do i = 1,nbandsloc 175 do j = 1,nhijmax 176 bux1save(j,i) = 0.0d0 177 bux2save(j,i) = 0.0d0 178 enddo 179 enddo 180#endif 181 182C Initialise gradient vector 183 grad = 0.0d0 184 185C Calculate Functional ..................................................... 186 187C Loop over spin 188 do is = 1,nspin 189 190#ifdef MPI 191C Initialise communication arrays 192 call globalinitB(2) 193#endif 194 195C Initialise variables 196 func1 = 0.0d0 197 func2 = 0.0d0 198 199C F=CtH ---> JMS: F=Ct*(H-eta*S) 200C Fs=CtS 201 do il = 1,nbandsloc 202 do in = 1,numct(il) 203 k = listct(in,il) 204 kk = ncT2P(k) 205 if (kk.gt.0) then 206 ik = cttoc(in,il) 207 p0 = c(ik,k,is) 208 indk = listhptr(kk) 209 do kn = 1,numh(kk) 210 aux1(listh(indk+kn)) = aux1(listh(indk+kn)) + 211 . p0*( h(indk+kn,is) - eta(is)*s(indk+kn) ) 212 aux2(listh(indk+kn)) = aux2(listh(indk+kn)) + 213 . p0*s(indk+kn) 214 enddo 215 endif 216 enddo 217 do in = 1,numf(il) 218 k = listf(in,il) 219 f(in,il) = aux1(k) 220 fs(in,il) = aux2(k) 221 aux1(k) = 0.0d0 222 aux2(k) = 0.0d0 223 enddo 224 enddo 225 226C-JMS Find transpose of F and Fs 227 do mu = 1,no_cl 228 do muk = 1,numft(mu) 229 j = listft(muk,mu) 230 jl = nbG2L(j) 231 if (jl.gt.0) then 232 jk = fttof(muk,mu) 233 ft(muk,mu) = f(jk,jl) 234 fst(muk,mu) = fs(jk,jl) 235 endif 236 enddo 237 enddo 238 239#ifdef MPI 240C Globalise ft/fst 241 call globaliseF(no_cl,nbands,nftmax,numft,listft, 242 . ft,ftG,Node) 243 call globaliseF(no_cl,nbands,nftmax,numft,listft, 244 . fst,fstG,Node) 245#endif 246 247C Sij=CtSC 248C multiply FxC and FsxC row by row 249 do il = 1,nbandsloc 250 do in = 1,numf(il) 251 k = listf(in,il) 252 kk = ncG2L(k) 253 a0 = f(in,il) 254 b0 = fs(in,il) 255 do kn = 1,numc(kk) 256 bux1(listc(kn,kk)) = bux1(listc(kn,kk)) + a0 * c(kn,kk,is) 257 bux2(listc(kn,kk)) = bux2(listc(kn,kk)) + b0 * c(kn,kk,is) 258 enddo 259 enddo 260 261#ifdef MPI 262C Load data into globalisation arrays 263 call globalloadB2(il,nbands,bux1,bux2) 264#endif 265 266#ifdef MPI 267C Reinitialise bux1/2 and save for later in sparse form 268 do jn = 1,numhij(il) 269 j = listhij(jn,il) 270 bux1save(jn,il) = bux1(j) 271 bux2save(jn,il) = bux2(j) 272 bux1(j) = 0.0d0 273 bux2(j) = 0.0d0 274 enddo 275 276 enddo 277 278C Globalise Hij/Sij 279 call globalcommB(Node) 280 281C Restore local bux1/2 terms 282 do il = 1,nbandsloc 283 do jn = 1,numhij(il) 284 j = listhij(jn,il) 285 bux1(j) = bux1save(jn,il) 286 bux2(j) = bux2save(jn,il) 287 enddo 288 289C Reload data after globalisation 290 call globalreloadB2(il,nbands,bux1,bux2,buxg) 291#endif 292 293C Calculate energy terms & reinitialise bux1/2 294 i = nbL2G(il) 295 func1 = func1 + bux1(i) 296 297 do jn = 1,numhij(il) 298 j = listhij(jn,il) 299#ifdef MPI 300 func2 = func2 + bux1(j)*buxg(j,2) 301#else 302 func2 = func2 + bux1(j)*bux2(j) 303#endif 304 enddo 305 306C Multiply Hij x Fs and Sij x F row by row 307C (only products of neccesary elements) 308 do ik = 1,numct(il) 309 mu = listct(ik,il) 310 kk = ncT2P(mu) 311 if (kk.gt.0) then 312 a0 = 0.0d0 313#ifdef MPI 314 do muk = 1,numft2(mu) 315 j = listft2(muk,mu) 316 a0 = a0 + buxg(j,1)*fstG(muk,mu) 317 . + buxg(j,2)*ftG(muk,mu) 318#else 319 do muk = 1,numft(mu) 320 j = listft(muk,mu) 321 a0 = a0 + bux1(j)*fst(muk,mu) 322 . + bux2(j)*ft(muk,mu) 323#endif 324 enddo 325 grad(cttoc(ik,il),mu,is) = - spinfct*a0 326 endif 327 enddo 328 329#ifdef MPI 330C Reinitialise buxg 331 call globalrezeroB2(il,nbands,buxg) 332#endif 333C Reinitialise bux1 / bux2 334 do jn = 1,numhij(il) 335 j = listhij(jn,il) 336 bux1(j) = 0.0d0 337 bux2(j) = 0.0d0 338 enddo 339 340 enddo 341 342#ifdef MPI 343C Global reduction of func1/2 344 ftmp(1) = func1 345 ftmp(2) = func2 346 call Globalize_sum(ftmp(1:2),ftmp2(1:2)) 347 func1 = ftmp2(1) 348 func2 = ftmp2(2) 349#endif 350 351 bux1(1:nbands) = 0.0d0 352 do k = 1,no_cl 353 do ik = 1,numft(k) 354 j = listft(ik,k) 355 bux1(j) = 2.0d0*spinfct*ft(ik,k) + bux1(j) 356 enddo 357 do ik = 1,numc(k) 358 j = listc(ik,k) 359 grad(ik,k,is) = bux1(j) + grad(ik,k,is) 360 enddo 361 do ik = 1,numft(k) 362 j = listft(ik,k) 363 bux1(j) = 0.0d0 364 enddo 365 enddo 366 367 ener = ener + spinfct*(func1 - 0.5d0*func2) 368 . + 0.5d0*eta(is)*qs(is) 369 370C End loop over spins 371 enddo 372 373#ifdef MPI 374 call globaliseC(no_cl,ncmax,numc,grad,nspin,Node) 375#endif 376 377C Deallocate local arrays 378#ifdef MPI 379 call de_alloc( buxg, 'buxg', 'gradient' ) 380 call de_alloc( bux2save, 'bux2save', 'gradient' ) 381 call de_alloc( bux1save, 'bux1save', 'gradient' ) 382 call de_alloc( fstG, 'fstG', 'gradient' ) 383 call de_alloc( ftG, 'ftG', 'gradient' ) 384#endif 385 call de_alloc( fst, 'fst', 'gradient' ) 386 call de_alloc( ft, 'ft', 'gradient' ) 387 call de_alloc( bux2, 'bux2', 'gradient' ) 388 call de_alloc( bux1, 'bux1', 'gradient' ) 389 call de_alloc( aux2, 'aux2', 'gradient' ) 390 call de_alloc( aux1, 'aux1', 'gradient' ) 391 392 call timer('gradient',2) 393 394 end subroutine gradient 395 end module m_gradient 396