1!!@LICENSE
2!
3! *******************************************************************
4! subroutine cellXC( irel, cell, nMesh, lb1, ub1, lb2, ub2, lb3, ub3,
5!    .               nSpin, dens, Ex, Ec, Dx, Dc, stress, Vxc, dVxcdD )
6! *******************************************************************
7! Finds total exchange-correlation energy and potential in a
8!   periodic cell.
9! This version implements the Local (spin) Density Approximation and
10!   the Generalized-Gradient-Aproximation with the 'explicit mesh
11!   functional' approach of White & Bird, PRB 50, 4954 (1994).
12! Gradients are 'defined' by numerical derivatives, using 2*nn+1 mesh
13!   points, where nn is a parameter defined below
14! Ref: L.C.Balbas et al, PRB 64, 165110 (2001)
15! Wrtten by J.M.Soler using algorithms developed by
16!   L.C.Balbas, J.L.Martins and J.M.Soler, Dec.1996 - Aug.1997
17! Parallel version written by J.Gale. June 1999.
18! Argument dVxcdD added by J.Junquera. November 2000.
19! Adapted for multiple functionals in the same run by J.Gale 2005
20! Van der Waals functional added by J.M.Soler, Jan.2008, as explained in
21!   G.Roman-Perez and J.M.Soler, PRL 103, 096102 (2009)
22! ************************* INPUT ***********************************
23! integer  irel        : Relativistic exchange? (0=>no, 1=>yes)
24! real(dp) cell(3,3)   : Unit cell vectors cell(ixyz,ivector)
25! integer  nMesh(3)    : Total mesh divisions of each cell vector
26! integer  lb1,lb2,lb3 : Lower bounds of arrays dens, Vxc, dVxcdD
27! integer  ub1,ub2,ub3 : Upper bounds of arrays dens, Vxc, dVxcdD
28! integer  nSpin       : nSpin=1 => unpolarized; nSpin=2 => polarized;
29!                        nSpin>2 => non-collinear polarization
30! real(grid_p) dens(lb1:ub1,lb2:ub2,lb3:ub3,nSpin) : Total (nSpin=1) or
31!                        spin (nSpin=2) electron density at mesh points
32!                        For non-collinear polarization, the density
33!                        matrix is given by: dens(1)=D11, dens(2)=D22,
34!                        dens(3)=Real(D12), dens(4)=Im(D12)
35! ************************* OUTPUT **********************************
36! real(dp) Ex             : Total exchange energy per unit cell
37! real(dp) Ec             : Total correlation energy per unit cell
38! real(dp) Dx             : IntegralOf( rho * (eps_x - v_x) ) in unit cell
39! real(dp) Dc             : IntegralOf( rho * (eps_c - v_c) ) in unit cell
40! real(dp) stress(3,3)    : xc contribution to the stress tensor, in unit
41!                           cell, assuming constant density (not charge),
42!                           i.e. r->r' => rho'(r') = rho(r)
43!                           For plane-wave and grid (finite diff) basis
44!                           sets, density rescaling gives an extra term
45!                           (not included) (Dx+Dc-Ex-Ec)/cell_volume for
46!                           the diagonal elements of stress. For other
47!                           basis sets, the extra term is, in general:
48!                           IntegralOf(v_xc * d_rho/d_strain) / cell_vol
49! real(grid_p) Vxc(lb1:ub1,lb2:ub2,lb3:ub3,nSpin) : (Spin) xc potential
50! ************************* OPTIONAL OUTPUT *************************
51! real(grid_p) dVxcdD(lb1:ub1,lb2:ub2,lb3:ub3,nSpin*nSpin) : Derivatives
52!                           of xc potential respect to charge density
53!                           Available only for LDA
54! ************************ UNITS ************************************
55! Distances in atomic units (Bohr).
56! Densities in atomic units (electrons/Bohr**3)
57! Energy unit depending of parameter EUnit below
58! Stress in EUnit/Bohr**3
59! ************************ USAGE ************************************
60! With the prototype module xcmod below, you must make a previous call
61!     CALL setXC( nFunc, XCfunc, XCauth, XCweightX, XCweightC )
62! before calling cellXC for the first time
63!
64! A typical serial program call is:
65!
66!   use precision, only: dp, grid_p
67!   use xcmod,     only: setXC
68!   integer  :: nMesh(3), nSpin
69!   real(dp) :: cell(3,3), Dc, Dx, Ec, Ex, stress(3,3),
70!   real(grid_p),allocatable :: dens(:,:,:,:), Vxc(:,:,:,:)
71!     Find nSpin, cell(:,:), and nMesh(:)
72!   allocate( dens(nMesh(1),nMesh(2),nMesh(3),nSpin), &
73!              Vxc(nMesh(1),nMesh(2),nMesh(3),nSpin)) )
74!     Find dens(:,:,:,:) at all mesh points
75!   call setXC( 1, (/'GGA'/), (/'PBE'/), (/1._dp/), (/1._dp/) )
76!   call cellXC( 0, cell, nMesh, 1,nMesh(1), 1,nMesh(2), 1,nMesh(3), &
77!                nSpin, dens, Ex, Ex, Dx, Dc, stress, Vxc )
78!
79! A typical parallel program call is:
80!
81!   use precision, only: dp, grid_p
82!   use xcmod,     only: setXC
83!   integer  :: iSpin, myBox(2,3), nMesh(3), nSpin
84!   real(dp) :: cell(3,3), Dc, Dx, Ec, Ex, stress(3,3),
85!   real(grid_p),allocatable :: dens(:,:,:,:), Vxc(:,:,:,:)
86!     Find nSpin, cell(:,:), nMesh(:), and myBox(:,:)
87!   allocate( dens(myBox(1,1):myBox(2,1),        &
88!                  myBox(1,2):myBox(2,2),        &
89!                  myBox(1,3):myBox(2,3),nSpin), &
90!              Vxc(myBox(1,1):myBox(2,1),        &
91!                  myBox(1,2):myBox(2,2),        &
92!                  myBox(1,3):myBox(2,3),nSpin) )
93!   do i3 = myBox(1,3),myBox(2,3)
94!   do i2 = myBox(1,2),myBox(2,2)
95!   do i1 = myBox(1,1),myBox(2,1)
96!     do iSpin = 1,nSpin
97!       dens(i1,i2,i3,iSpin) = (spin)density at point (i1,i2,i3)
98!     end do
99!   end do
100!   end do
101!   end do
102!   call setXC( 1, (/'GGA'/), (/'PBE'/), (/1._dp/), (/1._dp/) )
103!   call cellXC( 0, cell, nMesh, myBox(1,1), myBox(2,1), &
104!                                myBox(1,2), myBox(2,2), &
105!                                myBox(1,3), myBox(2,3), &
106!                nSpin, dens, Ex, Ex, Dx, Dc, stress, Vxc )
107!
108! IMPORTANT: arrays dens, Vxc, and dVxcdD may be alternatively
109! allocated and initialized with indexes beginning in 0 or 1,
110! or even use a single-index array, e.g.:
111!   real(grid_p),allocatable :: dens(:,:), Vxc(:,:)
112!   myMesh(1:3) = myBox(2,:) - myBox(1,:) + 1
113!   myPoints = myMesh(1)*myMesh(2)*myMesh(3)
114!   allocate( dens(myPoints,nSpin), Vxc(myPoints,nSpin) )
115!   iPoint = 0
116!   do i3 = myBox(1,3),myBox(2,3)
117!   do i2 = myBox(1,2),myBox(2,2)
118!   do i1 = myBox(1,1),myBox(2,1)
119!     iPoint = iPoint + 1
120!     do iSpin = 1,nSpin
121!       dens(iPoint,iSpin) = (spin)density at point (i1,i2,i3)
122!     end do
123!   end do
124!   end do
125!   end do
126! but the call to cellXC must still be as given above, i.e. the
127! arguments lb1,ub1,... must be the lower and upper bounds of the
128! mesh box stored by each processor (not the allocated array bounds).
129! However, the arrays size MUST be (ub1-lb1+1)*(ub2-lb2+1)*(ub3-lb3+1)
130! The processor mesh boxes must not overlap, and they must cover the
131! entire unit cell mesh. This is NOT checked inside cellXC.
132!
133! ********* BEHAVIOUR ***********************************************
134! - Stops and prints a warning if functl is not one of LDA, GGA, or VDW
135! - The output values of Ex, Ec, Dx, Dc, and stress, are integrals over
136!   the whole unit cell, not over the mesh box of the local processor
137! - Since the exchange and correlation part is usually a small fraction
138!   of a typical electronic structure calculation, this routine has
139!   been coded with emphasis on simplicity and functionality, not in
140!   efficiency.
141! ********* DEPENDENCIES ********************************************
142! Routines called:
143!   GGAXC, LDAXC, meshKcut, RECLAT, TIMER, VOLCEL
144! Modules used in serial:
145!   precision : defines parameters 'dp' and 'grid_p' for real kinds
146!   xcmod     : sets and gets the xc functional(s) used
147!   vdW       : povides routines for the Van der Waals functional
148!   mesh3D    : provides routines to handle mesh arrays distributed
149!               among processors
150!   sys       : provides the stopping subroutine 'die'
151! Additional modules used in parallel:
152!   mpi_siesta
153! ********* PRECISION MODULE PROTOTYPE ******************************
154! The following module will set the required real types
155!
156! module precision
157!   integer,parameter:: dp     = kind(1.d0)
158!   integer,parameter:: grid_p = kind(1.d0)
159! end module precision
160!
161! ********* XCMOD MODULE PROTOTYPE **********************************
162! The following module will set the xc functional(s) directly, when
163! calling routine setxc (rather than reading them from the datafile,
164! as done in siesta):
165!
166! module xcmod
167!   use precision, only: dp
168!   implicit none
169!   integer, parameter :: maxFunc = 10
170!   integer,           save :: nXCfunc
171!   character(len=10), save :: XCauth(MaxFunc), XCfunc(MaxFunc)
172!   real(dp),          save :: XCweightX(MaxFunc), XCweightC(MaxFunc)
173! contains
174!   subroutine setXC( n, func, auth, wx, wc )
175!     implicit none
176!     integer,         intent(in):: n       ! Number of functionals
177!     character(len=*),intent(in):: func(n) ! Functional name labels
178!     character(len=*),intent(in):: auth(n) ! Functional author labels
179!     real(dp),        intent(in):: wx(n)   ! Functl weights for exchng
180!     real(dp),        intent(in):: wc(n)   ! Functl weights for correl
181!     nXCfunc = n
182!     XCfunc(1:n) = func(1:n)
183!     XCauth(1:n) = auth(1:n)
184!     XCweightX(1:n) = wx(1:n)
185!     XCweightC(1:n) = wc(1:n)
186!   end subroutine setXC
187! end module xcmod
188!
189! Sample usage:
190!   use precision, only: dp
191!   use xcmod, only: setxc
192!   call setXC( 1, (/'GGA'/), (/'PBE'/), (/1._dp/), (/1._dp/) )
193!   call cellXC( ... )
194!
195! Allowed functional/author values:
196! XCfunc:
197!   'LDA' or 'LSD' => Local density approximation
198!            'GGA' => Generalized gradients approx.
199!            'VDW' => Van der Waals functional
200! XCauth:
201!     'CA' or 'PZ' => LSD Perdew & Zunger, PRB 23, 5075 (1981)
202!           'PW91' => GGA Perdew & Wang, JCP, 100, 1290 (1994)
203!           'PW92' => LSD Perdew & Wang, PRB, 45, 13244 (1992). This is
204!                     the local density limit of the next:
205!            'PBE' => GGA Perdew, Burke & Ernzerhof, PRL 77, 3865 (1996)
206!           'RPBE' => GGA Hammer, Hansen & Norskov, PRB 59, 7413 (1999)
207!         'revPBE' => GGA Zhang & Yang, PRL 80,890(1998)
208!            'LYP' => GGA Becke-Lee-Yang-Parr (see subroutine blypxc)
209!             'WC' => GGA Wu-Cohen (see subroutine wcxc)
210!         'PBESOL' => GGA Perdew et al, PRL, 100, 136406 (2008)
211!           'AM05' => GGA Mattsson & Armiento, PRB, 79, 155101 (2009)
212!    'PBE(JsJrLO)' => GGA Reparametrizations of the PBE functional by
213!   'PBE(JsJrHEG)' => GGA   L.S.Pedroza et al, PRB 79, 201106 (2009) and
214!    'PBE(GcGxLO)' => GGA   M.M.Odashima et al, JCTC 5, 798 (2009)
215!   'PBE(GcGxHEG)' => GGA using 4 different combinations of criteria
216!          'DRSLL' => VDW Dion et al, PRL 92, 246401 (2004)
217!          'LMKLL' => VDW K.Lee et al, PRB 82, 081101 (2010)
218!            'KBM' => VDW optB88-vdW of J.Klimes et al, JPCM 22, 022201 (2010)
219!            'C09' => VDW V.R. Cooper, PRB 81, 161104 (2010)
220!             'BH' => VDW K. Berland and Per Hyldgaard, PRB 89, 035412 (2014)
221!             'VV' => VDW Vydrov-VanVoorhis, JCP 133, 244103 (2010)
222! *******************************************************************
223
224MODULE m_cellXC
225
226  implicit none
227
228  PUBLIC:: cellXC  ! Exchange and correlation in a periodic unit cell
229
230  PRIVATE ! Nothing is declared public beyond this point
231
232CONTAINS ! nothing else but public routine cellXC
233
234SUBROUTINE cellXC( irel, cell, nMesh, lb1, ub1, lb2, ub2, lb3, ub3, &
235                   nSpin, dens, Ex, Ec, Dx, Dc, stress, Vxc, dVxcdD, &
236                   keep_input_distribution )
237
238  ! Module routines
239  use mesh3D,  only: addMeshData   ! Accumulates a mesh array
240  use alloc,   only: alloc_default ! Sets (re)allocation defaults
241  use mesh3D,  only: associateMeshTask ! Associates mesh tasks & distr.
242  use mesh3D,  only: copyMeshData  ! Copies a box of a mesh array
243  use alloc,   only: de_alloc      ! Deallocates arrays
244  use sys,     only: die           ! Termination routine
245  use fftr,    only: fftk2r        ! Inverse real Fourier transform
246  use fftr,    only: fftr2k        ! Direct real Fourier transform
247  use mesh3D,  only: fftMeshDistr  ! Sets/gets distribution for FFTs
248  use mesh3D,  only: freeMeshDistr ! Frees a mesh distribution ID
249  use moreParallelSubs, only: miscAllReduce ! Adds variables from all processes
250  use xcmod,   only: getXC         ! Returns the XC functional to be used
251  use m_ggaxc, only: ggaxc         ! General GGA XC routine
252  use m_ldaxc, only: ldaxc         ! General LDA XC routine
253  use m_chkgmx,only: meshKcut      ! Returns the planewave cutoff of a mesh
254  use mesh3D,  only: myMeshBox     ! Returns the mesh box of my processor
255  use parallel,only: parallel_init ! Initializes nodes variables
256#ifdef DEBUG_XC
257  use moreParallelSubs, only: nodeString ! Returns a string with my node number
258  use m_vdwxc, only: qofrho        ! For debugging only
259#endif /* DEBUG_XC */
260  use alloc,   only: re_alloc      ! Reallocates arrays
261  use cellsubs,only: reclat        ! Finds reciprocal unit cell vectors
262  use mesh3D,  only: sameMeshDistr ! Finds if two mesh distr. are equal
263  use mesh3D,  only: setMeshDistr  ! Defines a new mesh distribution
264#ifdef DEBUG_XC
265  use debugXC, only: setDebugOutputUnit ! Sets udebug variable
266#endif /* DEBUG_XC */
267  use m_timer, only: timer_get     ! Returns counted times
268  use m_timer, only: timer_start   ! Starts counting time
269  use m_timer, only: timer_stop    ! Stops counting time
270  use m_vdwxc, only: vdw_localxc   ! Local LDA/GGA xc apropriate for vdW flavour
271  use m_vdwxc, only: vdw_decusp    ! Cusp correction to VDW energy
272  use m_vdwxc, only: vdw_get_qmesh ! Returns q-mesh for VDW integrals
273  use m_vdwxc, only: vdw_phi       ! Returns VDW functional kernel
274  use m_vdwxc, only: vdw_set_kcut  ! Fixes k-cutoff in VDW integrals
275  use m_vdwxc, only: vdw_theta     ! Returns VDW theta function
276  use cellsubs,only: volcel        ! Finds volume of unit cell
277
278  ! Module types and variables
279  use alloc,     only: allocDefaults ! Derived type for allocation defaults
280  use precision, only: dp            ! Double precision real type
281  use precision, only: gp=>grid_p    ! Real precision type of mesh arrays
282  use parallel,  only: nodes         ! Number of processor nodes
283#ifdef DEBUG_XC
284  use debugXC,   only: udebug        ! Output file unit for debug info
285#endif /* DEBUG_XC */
286
287  implicit none
288
289  ! Argument types and dimensions
290  integer, intent(in) :: irel        ! Relativistic exchange? (0=>no, 1=>yes)
291  real(dp),intent(in) :: cell(3,3)   ! Unit cell vectors cell(ixyz,ivector)
292  integer, intent(in) :: nMesh(3)    ! Total mesh divisions of each cell vector
293  integer, intent(in) :: lb1,lb2,lb3 ! Lower bounds of dens, Vxc, and dVxcdD
294  integer, intent(in) :: ub1,ub2,ub3 ! Upper bounds of dens, Vxc, and dVxcdD
295  integer, intent(in) :: nSpin       ! Number of spin components
296  real(gp),intent(in) :: dens(0:ub1-lb1,0:ub2-lb2,0:ub3-lb3,1:nSpin)
297                                     ! (spin) Density at mesh points
298  real(dp),intent(out):: Ex          ! Total exchange energy in unit cell
299  real(dp),intent(out):: Ec          ! Total correlation energy in unit cell
300  real(dp),intent(out):: Dx          ! IntegralOf(rho*(eps_x-v_x)) in unit cell
301  real(dp),intent(out):: Dc          ! IntegralOf(rho*(eps_c-v_c)) in unit cell
302  real(dp),intent(out):: stress(3,3) ! xc contribution to stress in unit cell
303  real(gp),intent(out):: Vxc(0:ub1-lb1,0:ub2-lb2,0:ub3-lb3,1:nSpin)
304                                     ! (spin) xc potential
305  real(gp),intent(out),optional:: &  ! dVxc/dDens (LDA only)
306                         dVxcdD(0:ub1-lb1,0:ub2-lb2,0:ub3-lb3,1:nSpin**2)
307  logical, intent(in),optional::  keep_input_distribution
308
309  ! Fix the order of the numerical derivatives
310  ! nn is the number of points used in each coordinate and direction,
311  ! i.e. a total of 6*nn neighbour points is used to find the gradients
312  integer,parameter  :: nn = 3
313
314  ! Fix energy unit:  Eunit=1.0 => Hartrees,
315  !                   Eunit=0.5 => Rydbergs,
316  !                   Eunit=0.03674903 => eV
317  real(dp),parameter :: Eunit = 0.5_dp
318
319  ! Fix the maximum allowed dispersion in CPU-time among different processors
320  real(dp),parameter :: maxUnbalance = 0.10_dp
321
322  ! Fix density threshold below which it will be taken as zero
323  real(dp),parameter :: Dmin = 1.0e-15_dp
324
325  ! Fix density threshold below which we make Vxc=0
326  real(dp),parameter :: Dcut = 1.0e-9_dp
327
328  ! Fix a minimum value of k vectors to avoid division by zero
329  real(dp),parameter :: kmin = 1.0e-15_dp
330
331  ! Fix the maximum number of functionals to be combined
332  integer, parameter :: maxFunc = 10
333
334  ! Subroutine name
335  character(len=*),parameter :: myName = 'cellXC '
336  character(len=*),parameter :: errHead = myName//'ERROR: '
337
338  ! Static internal variables
339  integer,save::   &
340     io2my=-1,     &! ID of communications task between ioDistr and myDistr
341     ioDistr=-1,   &! ID of input mesh distribution
342     kDistr=-1,    &! ID of mesh distribution used for FFTs
343     left2my(3)=-1,&! ID of commun. task from left-border boxes and myBox
344     my2io=-1,     &! ID of commun. task between myDistr and ioDistr
345     my2left(3)=-1,&! ID of commun. task from myBox to left-border boxes
346     my2rght(3)=-1,&! ID of commun. task from myBox to right-border boxes
347     myDistr=-1,   &! ID of mesh distrib. used internally in this routine
348     oldMesh(3)=-1,&! Total number of mesh points in previous call
349     rght2my(3)=-1  ! ID of commun. task from right-border boxes and myBox
350  real(dp),save::  &
351     myTime=1,     &! CPU time in this routine and processor in last iteration
352     timeAvge=1,   &! Average over processors of CPU time
353     timeDisp=huge(1.0_dp)  ! Dispersion over processors of CPU time
354
355  ! Internal pointers for dynamical allocation
356  real(dp), pointer:: &
357     dphidk(:,:)=>null(), dtdgd(:,:,:)=>null(), dtdd(:,:)=>null(), &
358     dudk(:)=>null(), phi(:,:)=>null(), tk(:)=>null(), tr(:)=>null(), &
359     ur(:)=>null(), uk(:)=>null()
360  real(gp), pointer:: &
361     tvac(:)=>null(), tvdw(:,:)=>null(), uvdw(:,:)=>null(), &
362     Vj(:)=>null(), workload(:,:,:)=>null()
363  real(gp), dimension(:,:,:,:), pointer:: &
364     Dleft=>null(), Dleft1=>null(), Dleft2=>null(), Dleft3=>null(), &
365     Drght=>null(), Drght1=>null(), Drght2=>null(), Drght3=>null(), &
366     myDens=>null(), mydVxcdD=>null(), myVxc=>null(), &
367     tq=>null(), uq=>null(), &
368     Vleft=>null(), Vleft1=>null(), Vleft2=>null(), Vleft3=>null(), &
369     Vrght=>null(), Vrght1=>null(), Vrght2=>null(), Vrght3=>null()
370  logical, pointer:: &
371     nonempty(:,:,:)=>null()
372
373  ! Other internal variables and arrays
374  integer :: &
375     boxLeft(2,3), boxRght(2,3), &
376     i1, i2, i3, ic, ii1, ii2, ii3, ik, in, &
377     ioBox(2,3), ip, iq, is, ix, iuvdw,  &
378     jj(3), jn, jp, js, jx, kBox(2,3), kMesh(3), kPoints, ks,  &
379     l11, l12, l13, l21, l22, l23, &
380     m11, m12, m13, m21, m22, m23, maxPoints, mesh(3), &
381     myBox(2,3), myMesh(3), myOldDistr, myPoints, &
382     ndSpin, nf, nonemptyPoints, nPoints, nq, ns, nXCfunc, &
383     r11, r12, r13, r21, r22, r23
384  real(dp):: &
385     comTime, D(nSpin), dedk, dEcdD(nSpin), dEcdGD(3,nSpin), &
386     dEcidDj, dEcuspdD(nSpin), dEcuspdGD(3,nSpin),  &
387     dExdD(nSpin), dExdGD(3,nSpin), dExidDj, &
388     dGdM(-nn:nn), dGidFj(3,3,-nn:nn), Dj(nSpin), &
389     dMdX(3,3), DV, dVol, Dtot, dXdM(3,3), &
390     dVcdD(nSpin*nSpin), dVxdD(nSpin*nSpin), &
391     EcuspVDW, Enl, epsC, epsCusp, epsNL, epsX, f1, f2, &
392     GD(3,nSpin), k, kcell(3,3), kcut, kvec(3),  &
393     stressVDW(3,3), sumTime, sumTime2, totTime, VDWweightC, volume, &
394     XCweightC(maxFunc), XCweightVDW, XCweightX(maxFunc)
395#ifdef DEBUG_XC
396  integer :: iip, jjp, jq
397  real(dp):: rmod, rvec(3)
398  integer,save:: myIter=0
399#endif /* DEBUG_XC */
400  logical :: &
401     GGA, GGAfunctl, VDW, VDWfunctl
402  character(len=20):: &
403     XCauth(maxFunc), XCfunc(maxFunc)
404  character(len=80):: &
405     errMsg
406  type(allocDefaults):: &
407     prevAllocDefaults
408
409  logical :: keep_input_distr
410
411#ifdef DEBUG_XC
412  ! Variables for debugging
413  real(dp):: GDtot(3), q, dqdD, dqdGD(3)
414#endif /* DEBUG_XC */
415
416  ! Make sure that variables in parallel module are set
417  call parallel_init()
418
419  ! Start time counter
420  call timer_start( myName )
421
422  keep_input_distr = .false.
423  if (present(keep_input_distribution)) then
424     keep_input_distr =   keep_input_distribution
425  endif
426
427#ifdef DEBUG_XC
428  ! Initialize udebug variable
429  call setDebugOutputUnit()
430#endif /* DEBUG_XC */
431
432  ! Get the functional(s) to be used
433  call getXC( nXCfunc, XCfunc, XCauth, XCweightX, XCweightC )
434
435  ! Set routine name for allocations
436  call alloc_default( old=prevAllocDefaults, copy=.true., shrink=.true. )
437
438  ! Find number of diagonal spin components
439  ndSpin = min( nSpin, 2 )
440
441  ! Find total number of mesh points in unit cell
442  nPoints = nMesh(1) * nMesh(2) * nMesh(3)
443
444  ! Find the cell volume and the volume per mesh point
445  volume = volcel( cell )
446  dVol = volume / nPoints
447
448  ! Set GGA and VDW switches
449  GGA = .false.
450  VDW = .false.
451  XCweightVDW = 0
452  do nf = 1,nXCfunc
453    if ( XCfunc(nf).eq.'LDA' .or. XCfunc(nf).eq.'lda' .or. &
454         XCfunc(nf).eq.'LSD' .or. XCfunc(nf).eq.'lsd' ) then
455      cycle ! nf loop
456    else if ( XCfunc(nf).eq.'GGA' .or. XCfunc(nf).eq.'gga') then
457      GGA = .true.
458    else if ( XCfunc(nf).eq.'VDW' .or. XCfunc(nf).eq.'vdw') then
459      GGA = .true.
460      VDW = .true.
461      XCweightVDW = XCweightVDW + XCweightC(nf)
462    else
463      write(errMsg,*) errHead//'Unknown functional ', XCfunc(nf)
464      call die( trim(errMsg) )
465    endif
466  enddo
467
468  ! Check argument dVxcdD
469  if (present(dVxcdD) .and. GGA) &
470    call die(errHead//'dVxcdD available only for LDA')
471
472  ! Find my mesh box in I/O distribution of mesh points
473  ioBox(1,1)=lb1;  ioBox(1,2)=lb2;  ioBox(1,3)=lb3
474  ioBox(2,1)=ub1;  ioBox(2,2)=ub2;  ioBox(2,3)=ub3
475  myMesh(:) = ioBox(2,:) - ioBox(1,:) + 1
476  myPoints = product(myMesh)
477
478  ! Get ID of the I/O distribution of mesh points
479  call setMeshDistr( ioDistr, nMesh, ioBox )
480
481  if (keep_input_distr) then
482
483     myDistr = ioDistr
484
485  else
486
487  ! If nMesh has changed, use input distribution also initially as myDistr
488  if (any(nMesh/=oldMesh)) call setMeshDistr( myDistr, nMesh, ioBox )
489  oldMesh = nMesh
490
491  ! Find new mesh distribution, if previous iteration was too unbalanced
492  if (nodes>1 .and. timeDisp/timeAvge>maxUnbalance) then
493    ! Find my node's mesh box using myDistr
494    call myMeshBox( nMesh, myDistr, myBox )
495    ! Allocate arrays for the density and workload per point in my box
496    call re_alloc( myDens, myBox(1,1),myBox(2,1), &
497                           myBox(1,2),myBox(2,2), &
498                           myBox(1,3),myBox(2,3), 1,ndSpin, myName//'myDens' )
499    call re_alloc(workload,myBox(1,1),myBox(2,1), &
500                           myBox(1,2),myBox(2,2), &
501                           myBox(1,3),myBox(2,3), myName//'workload' )
502    ! Copy density to my box
503    call associateMeshTask( io2my, ioDistr, myDistr )
504    call copyMeshData( nMesh, ioDistr, dens(:,:,:,1:ndSpin), &
505                              myBox, myDens(:,:,:,1:ndSpin), io2my )
506!    call copyMeshData( nMesh, ioDistr, dens(:,:,:,1:ndSpin), &
507!                              myBox, myDens(:,:,:,1:ndSpin) )
508    ! Find initial expected workload (zero if dens=0, one otherwise)
509    do i3 = myBox(1,3),myBox(2,3)
510    do i2 = myBox(1,2),myBox(2,2)
511    do i1 = myBox(1,1),myBox(2,1)
512      Dtot = sum( myDens(i1,i2,i3,1:ndSpin) )
513      if (Dtot>Dmin) then
514         workload(i1,i2,i3) = 1
515      else
516         workload(i1,i2,i3) = 0
517      end if
518    end do
519    end do
520    end do
521    ! Modify workload according to previous CPU time
522    workload = workload * myTime/timeAvge
523!    workload = workload * (myTime/myPoints) / (nodes*timeAvge/nPoints)
524    ! Find new distribution
525    call setMeshDistr( myDistr, nMesh, wlDistr=myDistr, workload=workload )
526    ! Deallocate temporary arrays
527    call de_alloc( workload, myName//'workload' )
528    call de_alloc( myDens,   myName//'myDens' )
529#ifdef DEBUG_XC
530    myIter = myIter+1
531    call myMeshBox( nMesh, myDistr, myBox )
532    write(udebug,'(a,i6,1x,3i5,3(1x,2i5))') &
533      myName//'Iter,nMesh,myBox=', myIter, nMesh, myBox
534#endif /* DEBUG_XC */
535  end if ! (nodes>1 .and. timeDisp/timeAvge>maxUnbalance)
536
537  endif
538#ifdef DEBUG_XC
539  ! Keep input distribution for the time being
540!  myDistr = ioDistr
541#endif /* DEBUG_XC */
542
543  ! Find the box of mesh points that belong to this node
544  call myMeshBox( nMesh, myDistr, myBox )
545
546  ! Find local number of mesh points along each axis
547  myMesh(:) = myBox(2,:) - myBox(1,:) + 1
548  myPoints = product(myMesh)
549
550  ! Allocate myDens and myVxc arrays if either:
551  ! - The parallel mesh distribution is changed internally (even with LDA)
552  ! - We need finite differences (GGA) and have a distributed mesh
553  if (.not.sameMeshDistr(myDistr,ioDistr) .or. (GGA .and. myDistr/=0)) then
554
555    m11=myBox(1,1); m12=myBox(1,2); m13=myBox(1,3)
556    m21=myBox(2,1); m22=myBox(2,2); m23=myBox(2,3)
557    ns = nSpin  ! Just a shorter name
558    call re_alloc( myDens, m11,m21, m12,m22, m13,m23, 1,ns, myName//'myDens' )
559    call re_alloc( myVxc,  m11,m21, m12,m22, m13,m23, 1,ns, myName//'myVxc'  )
560    if (present(dVxcdD)) &
561      call re_alloc( mydVxcdD, m11,m21, m12,m22, m13,m23, 1,ns**2, &
562                     myName//'mydVxcdD'  )
563    ! Allocate arrays for density and potential in neighbor regions
564    if (GGA) then
565      l11=m11-nn;     l12=m12-nn;     l13=m13-nn
566      l21=m11-1;      l22=m12-1;      l23=m13-1
567      r11=m21+1;      r12=m22+1;      r13=m23+1
568      r21=m21+nn;     r22=m22+nn;     r23=m23+nn
569      call re_alloc( Dleft1, l11,l21, m12,m22, m13,m23, 1,ns, myName//'Dleft1' )
570      call re_alloc( Drght1, r11,r21, m12,m22, m13,m23, 1,ns, myName//'Drght1' )
571      call re_alloc( Dleft2, m11,m21, l12,l22, m13,m23, 1,ns, myName//'Dleft2' )
572      call re_alloc( Drght2, m11,m21, r12,r22, m13,m23, 1,ns, myName//'Drght2' )
573      call re_alloc( Dleft3, m11,m21, m12,m22, l13,l23, 1,ns, myName//'Dleft3' )
574      call re_alloc( Drght3, m11,m21, m12,m22, r13,r23, 1,ns, myName//'Drght3' )
575      call re_alloc( Vleft1, l11,l21, m12,m22, m13,m23, 1,ns, myName//'Vleft1' )
576      call re_alloc( Vrght1, r11,r21, m12,m22, m13,m23, 1,ns, myName//'Vrght1' )
577      call re_alloc( Vleft2, m11,m21, l12,l22, m13,m23, 1,ns, myName//'Vleft2' )
578      call re_alloc( Vrght2, m11,m21, r12,r22, m13,m23, 1,ns, myName//'Vrght2' )
579      call re_alloc( Vleft3, m11,m21, m12,m22, l13,l23, 1,ns, myName//'Vleft3' )
580      call re_alloc( Vrght3, m11,m21, m12,m22, r13,r23, 1,ns, myName//'Vrght3' )
581    end if ! (GGA)
582  end if ! (myDistr/=ioDistr .or. GGA)
583
584  ! Redistribute dens data
585  if (associated(myDens)) then
586    call associateMeshTask( io2my, ioDistr, myDistr )
587    call copyMeshData( nMesh, ioDistr, dens, myBox, myDens, io2my )
588!    call copyMeshData( nMesh, ioDistr, dens, myBox, myDens )
589  end if
590
591  ! If mesh arrays are distributed, find density of neighbor regions
592  if (GGA .and. myDistr/=0) then ! Distributed Dens data
593    ! Find density of neighbor regions
594    do ic = 1,3  ! Loop on cell axes
595      if (ic==1) then
596        Dleft => Dleft1
597        Drght => Drght1
598      else if (ic==2) then
599        Dleft => Dleft2
600        Drght => Drght2
601      else ! (ic==3)
602        Dleft => Dleft3
603        Drght => Drght3
604      end if ! (ic==1)
605      boxLeft = myBox
606      boxLeft(1,ic) = myBox(1,ic)-nn
607      boxLeft(2,ic) = myBox(1,ic)-1
608      boxRght = myBox
609      boxRght(1,ic) = myBox(2,ic)+1
610      boxRght(2,ic) = myBox(2,ic)+nn
611      call associateMeshTask( my2left(ic), myDistr )
612      call associateMeshTask( my2rght(ic), myDistr )
613      call copyMeshData( nMesh, myDistr, myDens, boxLeft, Dleft, my2left(ic) )
614      call copyMeshData( nMesh, myDistr, myDens, boxRght, Drght, my2rght(ic) )
615!      call copyMeshData( nMesh, myDistr, myDens, boxLeft, Dleft )
616!      call copyMeshData( nMesh, myDistr, myDens, boxRght, Drght )
617    end do ! ic
618  end if ! (GGA .and. myDistr/=0)
619
620  ! Find Jacobian matrix dx/dmesh and gradient coeficients dGrad_i/dDens_j
621  if (GGA) then
622
623    ! Find mesh unit vectors dx/dmesh
624    do ic = 1,3
625      dXdM(:,ic) = cell(:,ic) / nMesh(ic)
626    enddo
627
628    ! Find reciprocal mesh vectors dmesh/dx
629    call reclat( dXdM, dMdX, 0 )
630
631    ! Find weights of numerical derivation from Lagrange interp. formula
632    do in = -nn,nn
633      f1 = 1.0_dp
634      f2 = 1.0_dp
635      do jn = -nn,nn
636        if (jn/=in .and. jn/=0) f1 = f1 * (0  - jn)
637        if (jn/=in)             f2 = f2 * (in - jn)
638      enddo
639      dGdM(in) = f1 / f2
640    enddo
641    dGdM(0) = 0.0_dp
642
643    ! Find the weights for the derivative d(gradF(i))/d(F(j)) of
644    ! the gradient at point i with respect to the value at point j
645    do in = -nn,nn
646      do ic = 1,3
647        dGidFj(:,ic,in) = dMdX(:,ic) * dGdM(in)
648      enddo
649    enddo
650
651  endif ! (GGA)
652
653  ! Initialize output
654  Ex = 0.0_dp
655  Ec = 0.0_dp
656  Dx = 0.0_dp
657  Dc = 0.0_dp
658  stress(:,:) = 0.0_dp
659  Vxc(:,:,:,:) = 0.0_gp
660  if (present(dVxcdD)) dVxcdD(:,:,:,:) = 0.0_gp
661
662  ! VdW initializations -------------------------------------------------------
663  if (VDW) then
664
665    ! Find mask of nonempty points
666    call re_alloc( nonempty, 0,myMesh(1)-1, 0,myMesh(2)-1, 0,myMesh(3)-1, &
667                   myName//'nonempty' )
668    if (associated(myDens)) then
669      do i3 = myBox(1,3),myBox(2,3)
670      do i2 = myBox(1,2),myBox(2,2)
671      do i1 = myBox(1,1),myBox(2,1)
672        Dtot = sum( myDens(i1,i2,i3,1:ndSpin) )
673        if ( Dtot >= Dmin ) then
674          nonempty(i1-myBox(1,1),i2-myBox(1,2),i3-myBox(1,3)) = .true.
675        else
676          nonempty(i1-myBox(1,1),i2-myBox(1,2),i3-myBox(1,3)) = .false.
677        end if
678      end do
679      end do
680      end do
681    else
682      do i3 = 0, myMesh(3)-1
683      do i2 = 0, myMesh(2)-1
684      do i1 = 0, myMesh(1)-1
685        Dtot = sum( dens(i1,i2,i3,1:ndSpin) )
686        if ( Dtot >= Dmin ) then
687          nonempty(i1,i2,i3) = .true.
688        else
689          nonempty(i1,i2,i3) = .false.
690        end if
691      end do
692      end do
693      end do
694    end if
695    nonemptyPoints = count( nonempty )
696
697    ! Find distribution of k-mesh points
698    call fftMeshDistr( nMesh, kDistr )
699
700    ! Find my node's box of k-mesh points
701    call myMeshBox( nMesh, kDistr, kBox )
702    kMesh(:) = kBox(2,:) - kBox(1,:) + 1
703    kPoints = product( kMesh )
704
705    ! Find a size large enough for both real and reciprocal points
706    maxPoints = max( nonemptyPoints, kPoints )
707    mesh = max( myMesh, kMesh )
708
709    ! Allocate VdW arrays
710    call vdw_get_qmesh( nq )
711    call re_alloc( dudk,  1,nq,                   myName//'dudk' )
712    call re_alloc( dtdd,  1,nq, 1,nSpin,          myName//'dtdd' )
713    call re_alloc( dtdgd,  1,3,    1,nq, 1,nSpin, myName//'dtdgd')
714    call re_alloc( dphidk,1,nq,    1,nq,          myName//'dphidk')
715    call re_alloc( phi,   1,nq,    1,nq,          myName//'phi'  )
716    call re_alloc( tk,    1,nq,                   myName//'tk'   )
717    call re_alloc( tr,    1,nq,                   myName//'tr'   )
718    call re_alloc( tvac,  1,nq,                   myName//'tvac' )
719    call re_alloc( ur,    1,nq,                   myName//'ur'   )
720    call re_alloc( uk,    1,nq,                   myName//'uk'   )
721    call re_alloc( tvdw, 1,maxPoints, 1,nq,       myName//'tvdw' )
722    call re_alloc( tq, 0,mesh(1)-1, 0,mesh(2)-1, 0,mesh(3)-1, 1,1, &
723                   myName//'tq' )
724
725    ! Assign another name to these arrays (but be careful!!!)
726    uvdw => tvdw
727    uq   => tq
728
729    ! Set mesh cutoff to filter VdW kernel
730    kcut = meshKcut( cell, nMesh )
731    call vdw_set_kcut( kcut )
732
733    ! Find vacuum value of theta
734    D = 0._dp
735    GD = 0._dp
736    call vdw_theta( nSpin, D, GD, tr, dtdd, dtdgd )
737    tvac = tr
738
739#ifdef DEBUG_XC
740!    call timer_start( 'cellXC1' )
741#endif /* DEBUG_XC */
742
743    ! Loop on mesh points to find theta_q(r)
744    ip = 0
745    do i3 = 0,myMesh(3)-1
746    do i2 = 0,myMesh(2)-1
747    do i1 = 0,myMesh(1)-1
748
749      ! Skip point if density=0
750      if (.not.nonempty(i1,i2,i3)) cycle
751      ip = ip + 1
752
753      ! Find mesh indexes relative to cell origin
754      ii1 = i1 + myBox(1,1)
755      ii2 = i2 + myBox(1,2)
756      ii3 = i3 + myBox(1,3)
757
758      ! Find density at this point. Notice that mesh indexes of dens and myDens
759      ! are relative to box and cell origins, respectively
760      if (associated(myDens)) then
761        D(:) = myDens(ii1,ii2,ii3,:)
762      else
763        D(:) = dens(i1,i2,i3,:)
764      end if
765
766      ! Avoid negative densities
767      D(1:ndSpin) = max( D(1:ndSpin), 0._dp )
768
769      !  Find gradient of density at this point
770      call getGradDens( ii1, ii2, ii3, GD )   ! This subr. is contained below
771
772      ! Find expansion of theta(q(r)) for VdW
773      call vdw_theta( nSpin, D, GD, tr, dtdd, dtdgd )
774      tvdw(ip,1:nq) = tr(1:nq)
775
776#ifdef DEBUG_XC
777!      ! Write q(r) for debugging
778!      if (i3==myBox(1,3)) then
779!        if (i1==myBox(1,1) .and. i2==myBox(1,2)) then
780!          open(unit=47,file='qvdw.out')
781!        else if (i1==myBox(2,1) .and. i2==myBox(2,2)) then
782!          close(unit=47)
783!        end if
784!        Dtot = sum(D(1:ndSpin))
785!        do ix = 1,3
786!          GDtot(ix) = sum(GD(ix,1:ndSpin))
787!        end do
788!        call qofrho( Dtot, GDtot, q, dqdD, dqdGD )
789!        if (i1==myBox(1,1)) write(47,*) ' '
790!        write(47,*) q
791!      end if
792#endif /* DEBUG_XC */
793
794    enddo ! i1
795    enddo ! i2
796    enddo ! i3  (End of loop on mesh points to find theta_q(r))
797
798#ifdef DEBUG_XC
799!    call timer_stop( 'cellXC1' )
800!    call timer_start( 'cellXC2' )
801!    call timer_start( 'cellXC2.1' )
802#endif /* DEBUG_XC */
803
804    ! Fourier-tranform theta_iq(r)
805    do iq = 1,nq
806      ! Unpack nonempty points into a full-mesh array
807      ! Last index (=1) is just because fftr2k expects a rank 4 array
808!      tq(0:myMesh(1)-1,0:myMesh(2)-1,0:myMesh(3)-1,1) = &  ! Slower!!!
809!        unpack( tvdw(1:nonemptyPoints,iq), mask=nonempty, field=tvac(iq) )
810      ip = 0
811      do i3 = 0,myMesh(3)-1
812      do i2 = 0,myMesh(2)-1
813      do i1 = 0,myMesh(1)-1
814        if (nonempty(i1,i2,i3)) then
815          ip = ip+1
816          tq(i1,i2,i3,1) = tvdw(ip,iq)
817        else
818          tq(i1,i2,i3,1) = tvac(iq)
819        end if
820      end do
821      end do
822      end do
823      ! Peform Fourier transform
824      call fftr2k( nMesh, myDistr, tq(:,:,:,1:1) )
825      ! Store all reciprocal space points
826!      tvdw(1:kPoints,iq) = &   ! Slower!!!
827!        reshape( tq(0:kMesh(1)-1,0:kMesh(2)-1,0:kMesh(3)-1,1), (/kPoints/) )
828      ik = 0
829      do i3 = 0,kMesh(3)-1
830      do i2 = 0,kMesh(2)-1
831      do i1 = 0,kMesh(1)-1
832        ik = ik+1
833        tvdw(ik,iq) = tq(i1,i2,i3,1)
834      end do
835      end do
836      end do
837    end do ! iq
838
839#ifdef DEBUG_XC
840!    call timer_stop( 'cellXC2.1' )
841!    call timer_start( 'cellXC2.2' )
842#endif /* DEBUG_XC */
843
844    ! Find reciprocal unit vectors
845    call reclat( cell, kcell, 1 )
846
847    ! Initialize nonlocal parts of VdW correlation energy and stress
848    Enl = 0.0_dp
849    EcuspVDW = 0.0_dp
850    stressVDW = 0.0_dp
851
852    ! Loop on k-mesh points
853    ik = 0
854    do i3 = 0,kMesh(3)-1   ! Mesh indexes relative to my k-box origin
855    do i2 = 0,kMesh(2)-1
856    do i1 = 0,kMesh(1)-1
857      ik = ik + 1
858
859      ! Find k vector
860      ii1 = i1 + kBox(1,1)  ! Mesh indexes relative to reciprocal cell origin
861      ii2 = i2 + kBox(1,2)
862      ii3 = i3 + kBox(1,3)
863      if (ii1 > nMesh(1)/2) ii1 = ii1 - nMesh(1)
864      if (ii2 > nMesh(2)/2) ii2 = ii2 - nMesh(2)
865      if (ii3 > nMesh(3)/2) ii3 = ii3 - nMesh(3)
866      kvec(:) = kcell(:,1)*ii1 + kcell(:,2)*ii2 + kcell(:,3)*ii3
867      k = sqrt( sum(kvec**2) )
868
869      if (k<kcut) then
870
871        ! Find Fourier transform of VdW kernel phi(r,r')
872        call vdw_phi( k, phi, dphidk )
873
874        ! Find Fourier transform of Int_dr'*phi(r,r')*rho(r')
875        ! Warning: tvdw and uvdw are the same array
876        tk(1:nq) = tvdw(ik,1:nq)
877        uk(1:nq) = matmul( tk(1:nq), phi(1:nq,1:nq) )
878        uvdw(ik,1:nq) = uk(1:nq)
879
880#ifdef DEBUG_XC
881!        ! Find contribution to 0.5*Int_dr*Int_dr'*rho(r)*phi(r,r')*rho(r')
882!        ! Factor 0.5 in the integral cancels with a factor 2 required
883!        ! because tk and uk contain only the real or imaginary parts
884!        ! of the Fourier components (see fftr2k)
885!        Enl = Enl + volume * sum(uk*tk)
886#endif /* DEBUG_XC */
887
888        ! Find contribution to stress from change of k vectors with strain
889        if (k > kmin) then  ! Avoid k=0 (whose contribution is zero)
890          dudk(1:nq) = matmul( tk(1:nq), dphidk(1:nq,1:nq) )
891          ! See note above on cancelation of factors 0.5 and 2 in Enl
892          dedk = sum(dudk*tk) * (volume / k)
893          do jx = 1,3
894            do ix = 1,3
895              stressVDW(ix,jx) = stressVDW(ix,jx) - dedk * kvec(ix) * kvec(jx)
896            end do
897          end do
898        end if ! (k>kmin)
899
900      else
901        uvdw(ik,1:nq) = 0.0_gp
902      end if ! (k<kcut)
903
904    end do ! i1
905    end do ! i2
906    end do ! i3  End of loop on k-mesh points
907
908#ifdef DEBUG_XC
909!    call timer_stop( 'cellXC2.2' )
910!    call timer_start( 'cellXC2.3' )
911#endif /* DEBUG_XC */
912
913#ifdef DEBUG_XC
914!    print'(a,3f12.6)','cellXC: Ex,Ec,Enl (eV) =', &
915!      Ex/0.03674903_dp, Ec/0.03674903_dp, Enl/0.03674903_dp
916#endif /* DEBUG_XC */
917
918    ! Fourier-tranform u_q(k) back to real space
919    do iq = 1,nq
920!      uq(0:kMesh(1)-1,0:kMesh(2)-1,0:kMesh(3)-1,1) = &    ! Slower!!!
921!        reshape( uvdw(1:kPoints,iq), kMesh )
922      ik = 0
923      do i3 = 0,kMesh(3)-1
924      do i2 = 0,kMesh(2)-1
925      do i1 = 0,kMesh(1)-1
926        ik = ik+1
927        uq(i1,i2,i3,1) = uvdw(ik,iq)
928      end do
929      end do
930      end do
931      call fftk2r( nMesh, myDistr, uq(:,:,:,1:1) )
932!      uvdw(1:nonemptyPoints,iq) = &    ! Slower!!!
933!        pack( uq(0:myMesh(1)-1,0:myMesh(2)-1,0:myMesh(3)-1,1), mask=nonempty )
934      ip = 0
935      do i3 = 0,myMesh(3)-1
936      do i2 = 0,myMesh(2)-1
937      do i1 = 0,myMesh(1)-1
938        if (nonempty(i1,i2,i3)) then
939          ip = ip+1
940          uvdw(ip,iq) = uq(i1,i2,i3,1)
941        end if
942      end do
943      end do
944      end do
945    end do
946
947#ifdef DEBUG_XC
948!    call timer_stop( 'cellXC2.3' )
949!    call timer_stop( 'cellXC2' )
950#endif /* DEBUG_XC */
951
952#ifdef DEBUG_XC
953!    ! Re-initialize Enl if it has been calculated in reciprocal space
954!    Enl = 0.0_dp
955#endif /* DEBUG_XC */
956
957  end if ! (VDW) End of VdW initializations------------------------------------
958
959#ifdef DEBUG_XC
960!  call timer_start( 'cellXC3' )
961#endif /* DEBUG_XC */
962
963  ! Loop on mesh points -------------------------------------------------------
964  ip = 0
965  do i3 = 0,myMesh(3)-1   ! Mesh indexes relative to my box origin
966  do i2 = 0,myMesh(2)-1
967  do i1 = 0,myMesh(1)-1
968    ii1 = i1 + myBox(1,1) ! Mesh indexes relative to cell origin
969    ii2 = i2 + myBox(1,2)
970    ii3 = i3 + myBox(1,3)
971
972    ! Find density at this point. Notice that mesh indexes of dens and myDens
973    ! are relative to box and cell origins, respectively
974    if (associated(myDens)) then
975      D(:) = myDens(ii1,ii2,ii3,:)
976    else
977      D(:) = dens(i1,i2,i3,:)
978    end if
979
980    ! Skip point if density=0
981    Dtot = sum(D(1:ndSpin))
982    if (Dtot < Dmin) cycle ! i1 loop on mesh points
983    ip = ip + 1
984
985    ! Avoid negative densities
986    D(1:ndSpin) = max( D(1:ndSpin), 0._dp )
987
988    ! Find gradient of density at this point
989    if (GGA) call getGradDens( ii1, ii2, ii3, GD )
990
991    ! Loop over all functionals
992    do nf = 1,nXCfunc
993
994      ! Is this a VDW or GGA?
995      if (XCfunc(nf).eq.'VDW' .or. XCfunc(nf).eq.'vdw') then
996        VDWfunctl = .true.
997        GGAfunctl = .true.
998      else if (XCfunc(nf).eq.'GGA' .or. XCfunc(nf).eq.'gga') then
999        VDWfunctl = .false.
1000        GGAfunctl = .true.
1001      else
1002        VDWfunctl = .false.
1003        GGAfunctl = .false.
1004      endif
1005
1006      ! Find exchange and correlation energy densities and their
1007      ! derivatives with respect to density and density gradient
1008      if (VDWfunctl) then
1009
1010        ! Local exchange-corr. part from the apropriate LDA/GGA functional
1011        call vdw_localxc( irel, nSpin, D, GD, epsX, epsC, &
1012                          dExdD, dEcdD, dExdGD, dEcdGD )
1013
1014#ifdef DEBUG_XC
1015!        ! Select only non local correlation energy and potential
1016!        epsX = 0
1017!        epsC = 0
1018!        dExdD = 0
1019!        dEcdD = 0
1020!        dExdGD = 0
1021!        dEcdGD = 0
1022#endif /* DEBUG_XC */
1023
1024        ! Local cusp correction to nonlocal VdW energy integral
1025        call vdw_decusp( nSpin, D, GD, epsCusp, dEcuspdD, dEcuspdGD )
1026
1027#ifdef DEBUG_XC
1028!        ! Select only non local correlation energy and potential
1029!        epsCusp = 0
1030!        dEcuspdD = 0
1031!        dEcuspdGD = 0
1032#endif /* DEBUG_XC */
1033
1034        ! Find expansion of theta(q(r)) for VdW
1035        call vdw_theta( nSpin, D, GD, tr, dtdd, dtdgd )
1036
1037        ! Add nonlocal VdW energy contribution and its derivatives
1038        Dtot = sum(D(1:ndSpin))
1039        ur(1:nq) = uvdw(ip,1:nq)
1040        epsNL = epsCusp + 0.5_dp*sum(ur*tr)/(Dtot+tiny(Dtot))
1041        epsC = epsC + epsNL
1042        do is = 1,nSpin
1043          dEcdD(is) = dEcdD(is) + dEcuspdD(is) + sum(ur(1:nq)*dtdd(1:nq,is))
1044          do ix = 1,3
1045            dEcdGD(ix,is) = dEcdGD(ix,is) + dEcuspdGD(ix,is) + &
1046                            sum(ur(1:nq)*dtdgd(ix,1:nq,is))
1047          end do
1048        end do
1049
1050        ! Sum nonlocal VdW contributions for debugging
1051        EcuspVDW = EcuspVDW + dVol * Dtot * epsCusp
1052        Enl = Enl + dVol * Dtot * epsNL
1053
1054#ifdef DEBUG_XC
1055!        if (i1==0 .and. i2==0 .and. i3==0) then
1056!          open( unit=33, file='epsNL'//trim(nodeString()), form='formatted' )
1057!          write(33,'(3f12.6,i6)') (cell(:,ix),nMesh(ix),ix=1,3)
1058!        end if
1059!        write(33,'(3i6,3e15.6)') ii1, ii2, ii3, Dtot, epsNL, epsX+epsC
1060!        ! Alternatively, write x,y,z instead of i1,i2,i3
1061!        write(33,'(3f12.6,2e15.6)') &
1062!          ii1*cell(:,1)/nMesh(1), &
1063!          ii2*cell(:,2)/nMesh(2), &
1064!          ii3*cell(:,3)/nMesh(3), Dtot, epsNL
1065#endif /* DEBUG_XC */
1066
1067      else if (GGAfunctl) then
1068        call ggaxc( XCauth(nf), irel, nSpin, D, GD, &
1069                    epsX, epsC, dExdD, dEcdD, dExdGD, dEcdGD )
1070      else ! (.not.VDWfunctl .and. .not.GGAfunctl)
1071        call ldaxc( XCauth(nf), irel, nSpin, D, &
1072                    epsX, epsC, dExdD, dEcdD, dVxdD, dVcdD )
1073      endif ! (VDWfunctl)
1074
1075      ! Scale return values by weight for this functional
1076      epsX = XCweightX(nf)*epsX
1077      epsC = XCweightC(nf)*epsC
1078      dExdD(:) = XCweightX(nf)*dExdD(:)
1079      dEcdD(:) = XCweightC(nf)*dEcdD(:)
1080      if (GGAfunctl) then
1081        dExdGD(:,:) = XCweightX(nf)*dExdGD(:,:)
1082        dEcdGD(:,:) = XCweightC(nf)*dEcdGD(:,:)
1083      endif
1084      if (present(dVxcdD)) then
1085        dVxdD(:) = XCweightX(nf)*dVxdD(:)
1086        dVcdD(:) = XCweightC(nf)*dVcdD(:)
1087      endif
1088
1089      ! Add contributions to exchange-correlation energy and its
1090      ! derivatives with respect to density at this point
1091      Ex = Ex + dVol * Dtot * epsX
1092      Ec = Ec + dVol * Dtot * epsC
1093      Dx = Dx + dVol * Dtot * epsX
1094      Dc = Dc + dVol * Dtot * epsC
1095      Dx = Dx - dVol * sum(D(:)*dExdD(:))
1096      Dc = Dc - dVol * sum(D(:)*dEcdD(:))
1097      if (associated(myVxc)) then
1098        myVxc(ii1,ii2,ii3,:) = myVxc(ii1,ii2,ii3,:) + dExdD(:) + dEcdD(:)
1099      else  ! Add directly to output array Vxc
1100        Vxc(i1,i2,i3,:) = Vxc(i1,i2,i3,:) + dExdD(:) + dEcdD(:)
1101      end if ! (associated(myVxc))
1102      if (present(dVxcdD)) then
1103        if (associated(mydVxcdD)) then
1104          mydVxcdD(ii1,ii2,ii3,:) = mydVxcdD(ii1,ii2,ii3,:) + dVxdD(:)+dVcdD(:)
1105        else
1106          dVxcdD(i1,i2,i3,:) = dVxcdD(i1,i2,i3,:) + dVxdD(:) + dVcdD(:)
1107        end if
1108      end if ! (present(dVxcdD))
1109
1110      ! Add contributions to exchange-correlation potential
1111      ! with respect to density at neighbor points
1112      if (GGAfunctl) then
1113        if (myDistr==0) then   ! dens data not distributed
1114          do ic = 1,3          ! Loop on cell axes
1115            do in = -nn,nn     ! Loop on finite difference index
1116              ! Find mesh indexes of neighbor point
1117              jj(1) = ii1
1118              jj(2) = ii2
1119              jj(3) = ii3
1120              jj(ic) = modulo( jj(ic)+in, nMesh(ic) )
1121              ! Add contributions from dE/dGradDi * dGradDi/dDj
1122              ! Notice: for myDistr==0, box and cell origins are equal
1123              do is = 1,nSpin  ! Loop on spin component
1124                dExidDj = sum( dExdGD(:,is) * dGidFj(:,ic,in) )
1125                dEcidDj = sum( dEcdGD(:,is) * dGidFj(:,ic,in) )
1126                Dx = Dx - dVol * dens(jj(1),jj(2),jj(3),is) * dExidDj
1127                Dc = Dc - dVol * dens(jj(1),jj(2),jj(3),is) * dEcidDj
1128                Vxc(jj(1),jj(2),jj(3),is) = Vxc(jj(1),jj(2),jj(3),is) + &
1129                                            dExidDj + dEcidDj
1130              end do ! is
1131            end do ! in
1132          end do ! ic
1133        else ! (myDistr/=0)      Distributed dens data
1134          do ic = 1,3          ! Loop on cell axes
1135            if (ic==1) then
1136              Dleft => Dleft1
1137              Drght => Drght1
1138              Vleft => Vleft1
1139              Vrght => Vrght1
1140            else if (ic==2) then
1141              Dleft => Dleft2
1142              Drght => Drght2
1143              Vleft => Vleft2
1144              Vrght => Vrght2
1145            else ! (ic==3)
1146              Dleft => Dleft3
1147              Drght => Drght3
1148              Vleft => Vleft3
1149              Vrght => Vrght3
1150            end if ! (ic==1)
1151            do in = -nn,nn     ! Loop on finite difference index
1152              ! Find indexes jj(:) of neighbor point
1153              jj(1) = ii1  ! Warning: jj(:)=(/ii1,ii2,ii3/) is VERY slow!!!
1154              jj(2) = ii2
1155              jj(3) = ii3
1156              jj(ic) = jj(ic) + in
1157              ! Point Dj and Vj to the apropriate array
1158              if (jj(ic)<myBox(1,ic)) then ! Left neighbor region
1159                Dj =  Dleft(jj(1),jj(2),jj(3),1:nSpin)
1160                Vj => Vleft(jj(1),jj(2),jj(3),1:nSpin)
1161              else if (jj(ic)>myBox(2,ic)) then ! Right region
1162                Dj =  Drght(jj(1),jj(2),jj(3),1:nSpin)
1163                Vj => Vrght(jj(1),jj(2),jj(3),1:nSpin)
1164              else ! j within myBox
1165                Dj = myDens(jj(1),jj(2),jj(3),1:nSpin)
1166                Vj => myVxc(jj(1),jj(2),jj(3),1:nSpin)
1167              end if
1168              ! Add contributions from dE/dGradDi * dGradDi/dDj
1169              do is = 1,nSpin  ! Loop on spin component
1170                dExidDj = sum( dExdGD(:,is) * dGidFj(:,ic,in) )
1171                dEcidDj = sum( dEcdGD(:,is) * dGidFj(:,ic,in) )
1172                Dx = Dx - dVol * Dj(is) * dExidDj
1173                Dc = Dc - dVol * Dj(is) * dEcidDj
1174                Vj(is) = Vj(is) + dExidDj + dEcidDj
1175              end do ! is
1176            end do ! in
1177          end do ! ic
1178        end if ! (myDistr==0)
1179      end if ! (GGAfunctl)
1180
1181      ! Add contribution to stress due to change in gradient of density
1182      ! originated by the deformation of the mesh with strain
1183      if (GGAfunctl) then
1184        do jx = 1,3
1185          do ix = 1,3
1186            do is = 1,nSpin
1187              stress(ix,jx) = stress(ix,jx) - dVol * GD(ix,is) * &
1188                               ( dExdGD(jx,is) + dEcdGD(jx,is) )
1189            enddo
1190          enddo
1191        enddo
1192      endif ! (GGAfunctl)
1193
1194    enddo ! nf (End of loop over functionals)
1195
1196  enddo ! i1
1197  enddo ! i2
1198  enddo ! i3  (End of loop over mesh points)-----------------------------------
1199
1200#ifdef DEBUG_XC
1201  close( unit=33 )
1202#endif /* DEBUG_XC */
1203
1204  ! If mesh arrays are distributed, add Vxc contribution from neighbor regions
1205  if (GGA .and. myDistr/=0) then ! Distributed Vxc data
1206    ! Add neighbor regions contribution to myVxc array
1207    do ic = 1,3  ! Loop on cell axes
1208      if (ic==1) then
1209        Vleft => Vleft1
1210        Vrght => Vrght1
1211      else if (ic==2) then
1212        Vleft => Vleft2
1213        Vrght => Vrght2
1214      else ! (ic==3)
1215        Vleft => Vleft3
1216        Vrght => Vrght3
1217      end if ! (ic==1)
1218      boxLeft = myBox
1219      boxLeft(1,ic) = myBox(1,ic)-nn
1220      boxLeft(2,ic) = myBox(1,ic)-1
1221      boxRght = myBox
1222      boxRght(1,ic) = myBox(2,ic)+1
1223      boxRght(2,ic) = myBox(2,ic)+nn
1224      call associateMeshTask( left2my(ic), myDistr )
1225      call associateMeshTask( rght2my(ic), myDistr )
1226      call addMeshData( nMesh, boxLeft, Vleft, myDistr, myVxc, left2my(ic) )
1227      call addMeshData( nMesh, boxRght, Vrght, myDistr, myVxc, rght2my(ic) )
1228!      call addMeshData( nMesh, boxLeft, Vleft, myDistr, myVxc )
1229!      call addMeshData( nMesh, boxRght, Vrght, myDistr, myVxc )
1230    end do ! ic
1231  end if ! (GGA .and. myDistr/=0)
1232
1233  ! Make Vxc=0 if VDWfunctl and Dens<Dcut, to avoid singularities
1234  if (VDWfunctl) then
1235    do i3 = 0,myMesh(3)-1   ! Mesh indexes relative to my box origin
1236    do i2 = 0,myMesh(2)-1
1237    do i1 = 0,myMesh(1)-1
1238      ii1 = i1 + myBox(1,1) ! Mesh indexes relative to cell origin
1239      ii2 = i2 + myBox(1,2)
1240      ii3 = i3 + myBox(1,3)
1241      if (associated(myDens)) then
1242        Dtot = sum( myDens(ii1,ii2,ii3,1:ndSpin) )
1243      else
1244        Dtot = sum( dens(i1,i2,i3,1:ndSpin) )
1245      end if
1246      if (Dtot<Dcut) then
1247        if (associated(myVxc)) then
1248          myVxc(ii1,ii2,ii3,:) = 0
1249        else
1250          Vxc(i1,i2,i3,:) = 0
1251        end if
1252        if (present(dVxcdD)) then
1253          if (associated(mydVxcdD)) then
1254            mydVxcdD(ii1,ii2,ii3,:) = 0
1255          else
1256            dVxcdD(i1,i2,i3,:) = 0
1257          end if
1258        end if ! (present(dVxcdD))
1259      end if ! (Dtot<Dcut)
1260    end do
1261    end do
1262    end do
1263  end if ! (VDWfunctl)
1264
1265  ! Copy Vxc data to output arrays
1266  if (associated(myVxc)) then  ! Distributed Vxc array
1267    if (sameMeshDistr(ioDistr,myDistr)) then ! Just copy myVxc to output array
1268      Vxc = myVxc
1269      if (present(dVxcdD)) dVxcdD = mydVxcdD
1270    else ! Redistribution required
1271      ! Copy myVxc and dVxcdD to output box
1272      call associateMeshTask( my2io, myDistr )
1273      call copyMeshData( nMesh, myDistr, myVxc, ioBox, Vxc, my2io )
1274!      call copyMeshData( nMesh, myDistr, myVxc, ioBox, Vxc )
1275      if (present(dVxcdD)) &
1276        call copyMeshData( nMesh, myDistr, mydVxcdD, ioBox, dVxcdD, my2io )
1277!        call copyMeshData( nMesh, myDistr, mydVxcdD, ioBox, dVxcdD )
1278    end if ! (sameMeshDistr(ioDistr,myDistr))
1279  end if ! (associated(myVxc))
1280
1281#ifdef DEBUG_XC
1282!  call timer_stop( 'cellXC3' )
1283#endif /* DEBUG_XC */
1284
1285#ifdef DEBUG_XC
1286!  ! Some printout for debugging
1287!  if (VDW) then
1288!    print'(a,f12.6)', &
1289!         'cellXC: EcuspVDW (eV) =', EcuspVDW/0.03674903_dp
1290!    print'(a,3f12.6)','cellXC: Ex,Ecl,Ecnl (eV) =', &
1291!         Ex/0.03674903_dp, (Ec-Enl)/0.03674903_dp, Enl/0.03674903_dp
1292!  end if
1293#endif /* DEBUG_XC */
1294
1295  ! Add contribution to stress from the change of volume with strain
1296  forall(ix=1:3) stress(ix,ix) = stress(ix,ix) + Ex + Ec
1297
1298  ! Add contribution to stress from change of k vectors with strain
1299  if (VDW) stress = stress + stressVDW * XCweightVDW
1300
1301  ! Divide by volume to get correct stress definition (dE/dStrain)/Vol
1302  stress = stress / volume
1303
1304  ! Divide by energy unit
1305  Ex = Ex / Eunit
1306  Ec = Ec / Eunit
1307  Dx = Dx / Eunit
1308  Dc = Dc / Eunit
1309  Vxc = Vxc / Eunit
1310  stress = stress / Eunit
1311  if (present(dVxcdD)) dVxcdD = dVxcdD / Eunit
1312
1313  ! Deallocate VDW-related arrays
1314  if (VDW) then
1315    call de_alloc( tq,       myName//'tq' )
1316    call de_alloc( tvdw,     myName//'tvdw' )
1317    call de_alloc( uk,       myName//'uk' )
1318    call de_alloc( ur,       myName//'ur' )
1319    call de_alloc( tvac,     myName//'tvac' )
1320    call de_alloc( tr,       myName//'tr' )
1321    call de_alloc( tk,       myName//'tk' )
1322    call de_alloc( phi,      myName//'phi' )
1323    call de_alloc( dphidk,   myName//'dphidk' )
1324    call de_alloc( dtdgd,    myName//'dtdgd' )
1325    call de_alloc( dtdd,     myName//'dtdd' )
1326    call de_alloc( dudk,     myName//'dudk' )
1327    call de_alloc( nonempty, myName//'nonempty' )
1328  end if
1329
1330  ! Deallocate GGA-related arrays
1331  if (associated(myDens)) then
1332    if (GGA) then
1333      call de_alloc( Vrght3, myName//'Vrght3' )
1334      call de_alloc( Vleft3, myName//'Vleft3' )
1335      call de_alloc( Vrght2, myName//'Vrght2' )
1336      call de_alloc( Vleft2, myName//'Vleft2' )
1337      call de_alloc( Vrght1, myName//'Vrght1' )
1338      call de_alloc( Vleft1, myName//'Vleft1' )
1339      call de_alloc( Drght3, myName//'Drght3' )
1340      call de_alloc( Dleft3, myName//'Dleft3' )
1341      call de_alloc( Drght2, myName//'Drght2' )
1342      call de_alloc( Dleft2, myName//'Dleft2' )
1343      call de_alloc( Drght1, myName//'Drght1' )
1344      call de_alloc( Dleft1, myName//'Dleft1' )
1345    end if
1346    if (present(dVxcdD)) call de_alloc( mydVxcdD, myName//'mydVxcdD'  )
1347    call de_alloc( myVxc,  myName//'myVxc'  )
1348    call de_alloc( myDens, myName//'myDens' )
1349  end if ! (associated(myDens))
1350
1351  ! Restore previous allocation defaults
1352  call alloc_default( restore=prevAllocDefaults )
1353
1354  ! Stop time counter
1355  call timer_stop( myName )
1356
1357  ! Get local calculation time (excluding communications)
1358  call timer_get( myName, lastTime=totTime, lastCommTime=comTime )
1359  myTime = totTime - comTime
1360  sumTime  = myTime
1361  sumTime2 = myTime**2
1362
1363  ! Add integrated magnitudes from all processors
1364  call miscAllReduce( 'sum', Ex, Ec, Dx, Dc, sumTime, sumTime2, a2=stress )
1365
1366  ! Find average and dispersion of CPU time
1367  timeAvge = sumTime / nodes
1368  timeDisp = sqrt( max( sumTime2/nodes - timeAvge**2, 0._dp ) )
1369
1370#ifdef DEBUG_XC
1371  write(udebug,'(a,3f12.6,/)') &
1372    myName//'My CPU time, avge, rel.disp =', &
1373    myTime, timeAvge, timeDisp/timeAvge
1374#endif /* DEBUG_XC */
1375
1376CONTAINS !---------------------------------------------------------------------
1377
1378  subroutine getGradDens( ii1, ii2, ii3, GD )
1379
1380  ! Finds the density gradient at one mesh point
1381
1382  ! Arguments
1383  integer, intent(in) :: ii1, ii2, ii3  ! Global mesh point indexes
1384  real(dp),intent(out):: GD(3,nSpin) ! Density gradient
1385
1386  ! Variables and arrays accessed from parent subroutine:
1387  !   dens, Dleft1, Dleft2, Dleft3,
1388  !   Drght1, Drght2, Drght3, DGiDFj,
1389  !   myBox, myDistr, nn, nSpin
1390
1391  ! Local variables and arrays
1392  integer :: ic, in, is, jj(3)
1393  real(dp):: Dj(nSpin)
1394  real(gp),pointer:: Dleft(:,:,:,:), Drght(:,:,:,:)
1395
1396  GD(:,:) = 0
1397  if (myDistr==0) then   ! dens data not distributed
1398    do ic = 1,3          ! Loop on cell axes
1399      do in = -nn,nn     ! Loop on finite difference index
1400        ! Find index jp of neighbor point
1401        jj(1) = ii1  ! Warning: jj(:)=(/ii1,ii2,ii3/) is VERY slow!!!
1402        jj(2) = ii2
1403        jj(3) = ii3
1404        jj(ic) = modulo( jj(ic)+in, nMesh(ic) )
1405        ! Find contribution of density at j to gradient at i
1406        do is = 1,nSpin
1407          do ix = 1,3  ! Warning: GD(:,is)=GD(:,is)+... is slower!!!
1408            GD(ix,is) = GD(ix,is) + DGiDFj(ix,ic,in) * &
1409                                    dens(jj(1),jj(2),jj(3),is)
1410          end do ! ix
1411        end do ! is
1412      end do ! in
1413    end do ! ic
1414  else                   ! Distributed dens data
1415    do ic = 1,3          ! Loop on cell axes
1416      if (ic==1) then
1417        Dleft => Dleft1
1418        Drght => Drght1
1419      else if (ic==2) then
1420        Dleft => Dleft2
1421        Drght => Drght2
1422      else ! (ic==3)
1423        Dleft => Dleft3
1424        Drght => Drght3
1425      end if ! (ic==1)
1426      do in = -nn,nn     ! Loop on finite difference index
1427        ! Find indexes jj(:) of neighbor point
1428        jj(1) = ii1
1429        jj(2) = ii2
1430        jj(3) = ii3
1431        jj(ic) = jj(ic) + in
1432        ! Find density Dj at neighbor point j
1433        if (jj(ic)<myBox(1,ic)) then ! Left neighbor region
1434          Dj(1:nSpin) = Dleft(jj(1),jj(2),jj(3),1:nSpin)
1435        else if (jj(ic)>myBox(2,ic)) then ! Right region
1436          Dj(1:nSpin) = Drght(jj(1),jj(2),jj(3),1:nSpin)
1437        else ! j within myBox
1438          Dj(1:nSpin) = myDens(jj(1),jj(2),jj(3),1:nSpin)
1439        end if
1440        ! Find contribution of density at j to gradient at i
1441        do is = 1,nSpin  ! Loop on spin component
1442          do ix = 1,3    ! Warning: GD(:,is)=GD(:,is)+... is slower!!!
1443            GD(ix,is) = GD(ix,is) + DGiDFj(ix,ic,in) * Dj(is)
1444          end do ! ix
1445        end do ! is
1446      end do ! in
1447    end do ! ic
1448  end if ! (myDistr==0)
1449
1450  end subroutine getGradDens
1451
1452END SUBROUTINE cellXC
1453
1454END MODULE m_cellXC
1455