1*     *****************************************
2*     *                                       *
3*     *             vdw_DF_init               *
4*     *                                       *
5*     *****************************************
6*
7*
8      subroutine vdw_DF_init()
9      implicit none
10
11#include "inp.fh"
12#include "rtdb.fh"
13#include "stdio.fh"
14#include "util.fh"
15#include "bafdecls.fh"
16#include "errquit.fh"
17
18#include "vdw-DF.fh"
19
20*     **** local variables ****
21      integer taskid,MASTER
22      parameter (MASTER=0)
23      logical mprint,hprint,debug,does_it_exist,oprint,value
24      logical from_environment,from_compile,from_nwchemrc
25      integer iop,lgth,unitf,print_level,i,j,k,l,nx
26      integer Gindx(3)
27      real*8  dk,gg,gx,gy,gz
28
29      character*255 datafile
30      logical found_datafile
31      integer ifound
32
33*     **** external functions ***
34      logical  control_has_vdw,control_is_vdw2
35      external control_has_vdw,control_is_vdw2
36      integer  nwpw_splint_nx,Pack_G_indx
37      external nwpw_splint_nx,Pack_G_indx
38
39
40*     **** return if not has_vdw ****
41      has_vdw = control_has_vdw()
42      is_vdw2 = control_is_vdw2()
43      if (.not.has_vdw) return
44
45      call D3dB_n2ft3d(1,n2ft3d)
46      call D3dB_nfft3d(1,nfft3d)
47      call Pack_npack(0,npack0)
48
49      call Parallel_taskid(taskid)
50      oprint = (taskid.eq.MASTER)
51
52      call util_file_name_noprefix('vdw_kernels.dat',
53     >                                .false.,
54     >                                .false.,
55     >                                datafile)
56      if (taskid.eq.MASTER) then
57         ifound = 0
58         inquire(file=datafile,exist=found_datafile)
59         if (found_datafile) ifound = 1
60      end if
61      call Parallel_Brdcst_ivalue(MASTER,ifound)
62      found_datafile = .false.
63      if (ifound.eq.1) found_datafile=.true.
64
65      !**** generate vdw_kernels.dat file ****
66      if (.not.found_datafile) then
67         if (oprint) then
68            l = index(datafile,' ') - 1
69            write(luout,*) "Generating VDW kernel filename:",
70     >                      datafile(1:l)
71         end if
72         call vdw_DF_kernel_gen_data(datafile)
73      end if
74
75
76      if (oprint) then
77         l = index(datafile,' ') - 1
78         write(luout,*) "Reading VDW kernel filename:",datafile(1:l)
79
80      end if
81*     **** create vdw data file ****
82      l = index(datafile,' ') -1
83
84      if (taskid.eq.MASTER) then
85         call openfile(5,datafile,l,'r',l)
86         call iread(5,Nqs,1)
87         call iread(5,nk,1)
88         call dread(5,kmax,1)
89      end if
90      call Parallel_Brdcst_ivalue(MASTER,Nqs)
91      call Parallel_Brdcst_value(MASTER,kmax)
92      call Parallel_Brdcst_ivalue(MASTER,nk)
93      nk1 = nk + 1
94
95      value= BA_alloc_get(mt_dbl,Nqs,'vdw_qmesh',qmesh(2),qmesh(1))
96      value= value.and.
97     >       BA_alloc_get(mt_dbl,Nqs*Nqs,'vdw_y',  ya(2), ya(1))
98      value= value.and.
99     >       BA_alloc_get(mt_dbl,Nqs*Nqs,'vdw_y2',y2a(2),y2a(1))
100      value= value.and.
101     >       BA_alloc_get(mt_dbl,nk1,'vdw_g',gphi(2),gphi(1))
102      value= value.and.
103     >       BA_alloc_get(mt_dbl,nk1*Nqs*(Nqs+1),
104     >                    'vdw_phi',phi(2),phi(1))
105      value= value.and.
106     >       BA_alloc_get(mt_dbl,Nqs*n2ft3d,
107     >                    'vdw_theta',theta(2),theta(1))
108      value= value.and.
109     >       BA_alloc_get(mt_dbl,Nqs*n2ft3d,
110     >                    'vdw_ufunc',ufunc(2),ufunc(1))
111      value= value.and.
112     >       BA_alloc_get(mt_dbl,10*n2ft3d,
113     >                    'vdw_xcp',xcp(2),xcp(1))
114      xce(1)  = xcp(1) + 2*n2ft3d
115      xxp(1)  = xcp(1) + 4*n2ft3d
116      xxe(1)  = xcp(1) + 6*n2ft3d
117      rho(1)  = xcp(1) + 8*n2ft3d
118      value= value.and.
119     >       BA_alloc_get(mt_dbl,npack0,
120     >                    'vdw_Gpack',Gpack(2),Gpack(1))
121      value= value.and.
122     >       BA_alloc_get(mt_int,npack0,
123     >                    'vdw_nxpack',nxpack(2),nxpack(1))
124      if (.not.value) call errquit('vdw_DF_init:out of heap',2,MA_ERR)
125
126      if (taskid.eq.MASTER) then
127         call dread(5,dbl_mb(qmesh(1)),Nqs)
128         call dread(5,dbl_mb(phi(1)),nk1*Nqs*(Nqs+1))
129         call closefile(5)
130
131         !*** set qmin = 0.0d0 ***
132         dbl_mb(qmesh(1)) = 0.0d0
133
134         !***  set phi(:,1,1) = 0 ***
135         call dcopy(2*nk1,0.0d0,0,dbl_mb(phi(1)),1)
136
137      end if
138      call Parallel_Brdcst_values(MASTER,Nqs,dbl_mb(qmesh(1)))
139      call Parallel_Brdcst_values(MASTER,nk1*Nqs*(Nqs+1),
140     >                            dbl_mb(phi(1)))
141
142      call vdw_DF_init_poly(Nqs,dbl_mb(qmesh(1)),
143     >                      dbl_mb(ya(1)),
144     >                      dbl_mb(y2a(1)),
145     >                      dbl_mb(gphi(1)))
146
147      dk = kmax/dble(nk)
148      do k=0,nk
149         dbl_mb(gphi(1)+k) = k*dk
150      end do
151
152      Gindx(1)=Pack_G_indx(0,1)
153      Gindx(2)=Pack_G_indx(0,2)
154      Gindx(3)=Pack_G_indx(0,3)
155      do k=1,npack0
156         gx = dbl_mb(Gindx(1)+k-1)
157         gy = dbl_mb(Gindx(2)+k-1)
158         gz = dbl_mb(Gindx(3)+k-1)
159         gg = dsqrt(gx*gx + gy*gy + gz*gz)
160         dbl_mb(Gpack(1)+k-1) = gg
161
162         nx = gg/dk
163         int_mb(nxpack(1)+k-1) = nwpw_splint_nx(dbl_mb(gphi(1)),nx,gg)
164      end do
165
166      if (is_vdw2) then
167         Zab = -1.887d0
168      else
169         Zab = -0.8491d0
170      end if
171      qmax = dbl_mb(qmesh(1)+Nqs-1)
172      qmin = 0.0d0
173
174      return
175
176 999  continue
177      call errquit('vdw_DF_init:error reading qmesh,Nqs=',Nqs,DISK_ERR)
178      return
179
180      end
181
182
183*     **********************************************
184*     *                                            *
185*     *                vdw_DF_end                  *
186*     *                                            *
187*     **********************************************
188
189      subroutine vdw_DF_end()
190      implicit none
191
192#include "bafdecls.fh"
193#include "errquit.fh"
194
195#include "vdw-DF.fh"
196
197*     **** local variables ****
198      logical value
199
200      if (has_vdw) then
201         value = BA_free_heap(nxpack(2))
202         value = value.and.BA_free_heap(Gpack(2))
203         value = value.and.BA_free_heap(xcp(2))
204         value = value.and.BA_free_heap(ufunc(2))
205         value = value.and.BA_free_heap(theta(2))
206         value = value.and.BA_free_heap(phi(2))
207         value = value.and.BA_free_heap(gphi(2))
208         value = value.and.BA_free_heap(qmesh(2))
209         value = value.and.BA_free_heap(ya(2))
210         value = value.and.BA_free_heap(y2a(2))
211         if (.not.value)
212     >      call errquit('vdw_DF_end:free heap failed',0,MA_ERR)
213      end if
214
215      return
216      end
217
218*     **********************************************
219*     *                                            *
220*     *                vdw_DF_init_poly            *
221*     *                                            *
222*     **********************************************
223      subroutine vdw_DF_init_poly(n,x,ya,y2a,utmp)
224      implicit none
225      integer n
226      real*8 x(*),ya(n,*),y2a(n,*),utmp(*)
227
228*     **** local variables ****
229      integer i,j
230      real*8 yp1,ypn
231
232      yp1 = 0.0d0
233      ypn = 0.0d0
234
235      do i=1,n
236         do j=1,n
237            if (i.eq.j) then
238               ya(i,j) = 1.0d0
239            else
240               ya(i,j) = 0.0d0
241            end if
242         end do
243      end do
244      do j=1,n
245         call nwpw_spline(x,ya(1,j),n,yp1,ypn,y2a(1,j),utmp)
246      end do
247
248      return
249      end
250
251*     **********************************************
252*     *                                            *
253*     *                vdw_DF_poly                 *
254*     *                                            *
255*     **********************************************
256      subroutine vdw_DF_poly(i,x,p,dp)
257      implicit none
258      integer i
259      real*8 x
260      real*8 p,dp
261
262#include "bafdecls.fh"
263#include "errquit.fh"
264#include "vdw-DF.fh"
265
266      !**** local variables ****
267      integer nx
268
269      !**** external functions ****
270      real*8   nwpw_splint,nwpw_dsplint
271      external nwpw_splint,nwpw_dsplint
272
273
274      if ((x.ge.qmin).and.(x.le.qmax)) then
275         nx = dsqrt(x/dbl_mb(qmesh(1)+Nqs-1)) * Nqs
276         if (nx.lt.1)       nx = 1
277         if (nx.gt.(Nqs-1)) nx = Nqs-1
278
279         p = nwpw_splint(dbl_mb(qmesh(1)),
280     >                   dbl_mb(ya(1)+Nqs*(i-1)),
281     >                   dbl_mb(y2a(1)+Nqs*(i-1)),
282     >                   Nqs,nx,x)
283         dp = nwpw_dsplint(dbl_mb(qmesh(1)),
284     >                     dbl_mb(ya(1)+Nqs*(i-1)),
285     >                     dbl_mb(y2a(1)+Nqs*(i-1)),
286     >                     Nqs,nx,x)
287      else
288         p = 0.0d0
289         dp = 0.0d0
290      end if
291      return
292      end
293
294
295*     **********************************************
296*     *                                            *
297*     *                vdw_DF_exist                *
298*     *                                            *
299*     **********************************************
300      logical function vdw_DF_exist()
301      implicit none
302
303#include "vdw-DF.fh"
304
305      vdw_DF_exist = has_vdw
306      return
307      end
308
309*     **********************************************
310*     *                                            *
311*     *                vdw_DF                      *
312*     *                                            *
313*     **********************************************
314*      input:  n2ft3d                  grid
315*              dn                     density
316*              agr                     absolute gradient of density,|grad n|
317*      output: exc                     exchange correlation energy density
318*              fn                      d(n*exc)/dnup,
319*              fdn                     d(n*exc)/d(|grad n|)
320
321      subroutine vdw_DF(n2ft3d0,ispin,dn,agr,exc,fn,fdn)
322      implicit none
323      integer n2ft3d0,ispin
324      real*8 dn(n2ft3d0,ispin)
325      real*8 agr(n2ft3d0,ispin)
326      real*8 exc(n2ft3d0)
327      real*8 fn(n2ft3d0,ispin)
328      real*8 fdn(n2ft3d0,ispin)
329
330#include "bafdecls.fh"
331#include "errquit.fh"
332#include "vdw-DF.fh"
333
334
335      !*** generate LDA results ***
336      call vxc(n2ft3d,ispin,dn,dbl_mb(xcp(1)),dbl_mb(xce(1)),
337     >         dbl_mb(rho(1)))
338      if (ispin.eq.1)
339     >   call v_dirac(n2ft3d,ispin,dn,dbl_mb(xxp(1)),dbl_mb(xxe(1)),
340     >                dbl_mb(rho(1)))
341
342      !**** generate rho ***
343      call vdw_DF_Generate_rho(ispin,n2ft3d,dn,dbl_mb(rho(1)))
344      call D3dB_r_Zero_Ends(1,dbl_mb(rho(1)))
345
346      !*** Generate theta(G), ptheta(r), dthetadrho(r,ms), dthetaddrho(r) ****
347      call vdw_DF_Generate_thetag(Nqs,nfft3d,ispin,n2ft3d,
348     >                            Zab,qmin,qmax,dbl_mb(rho(1)),agr,
349     >                            dbl_mb(xcp(1)),dbl_mb(xce(1)),
350     >                            dbl_mb(xxp(1)),dbl_mb(xxe(1)),
351     >                            dbl_mb(theta(1)))
352
353      !*** compute ufunc(G,i) = Sum(j) theta(G,j)*phi(G,i,j) ***
354      call vdw_DF_Generate_ufunc(nk1,Nqs,
355     >                           dbl_mb(gphi(1)),dbl_mb(phi(1)),
356     >                           npack0,nfft3d,
357     >                           dbl_mb(Gpack(1)),int_mb(nxpack(1)),
358     >                           dbl_mb(theta(1)),dbl_mb(ufunc(1)))
359
360
361      !*** compute contributions to xce(r), fn(r,ms), fdn ****
362      call vdw_DF_Generate_potentials(Nqs,nfft3d,ispin,n2ft3d,
363     >             dbl_mb(ufunc(1)),
364     >             dbl_mb(xce(1)),dbl_mb(xcp(1)),dbl_mb(xxe(1)),
365     >             dbl_mb(xce(1)+n2ft3d),dbl_mb(xxp(1)),dbl_mb(rho(1)),
366     >             exc,fn,fdn)
367
368
369      return
370      end
371
372*     ************************************************
373*     *                                              *
374*     *         vdw_DF_Generate_rho                  *
375*     *                                              *
376*     ************************************************
377      subroutine vdw_DF_Generate_rho(ispin,n2ft3d,dn,rho)
378      implicit none
379      integer ispin,n2ft3d
380      real*8 dn(n2ft3d,ispin)
381      real*8 rho(n2ft3d)
382
383*     **** local variables ****
384      integer i,tid,nthr
385      real*8 dncut
386      parameter (dncut=1.0d-30)
387
388*     **** external functions ****
389      integer  Parallel_threadid,Parallel_nthreads
390      external Parallel_threadid,Parallel_nthreads
391
392      tid  = Parallel_threadid()
393      nthr = Parallel_nthreads()
394
395      do i=tid+1,n2ft3d,nthr
396         rho(i) = dn(i,1)+dn(1,ispin)+dncut
397      end do
398!$OMP BARRIER
399
400      return
401      end
402
403*     ************************************************
404*     *                                              *
405*     *         vdw_DF_Generate_thetag               *
406*     *                                              *
407*     ************************************************
408*
409      subroutine vdw_DF_Generate_thetag(Nqs,nfft3d,ispin,n2ft3d,
410     >                                 Zab,qmin,qmax,
411     >                                 rho,agr,vxc,exc,vxx,exx,
412     >                                 theta)
413      implicit none
414      integer Nqs,nfft3d,ispin,n2ft3d
415      real*8  Zab,qmin,qmax
416      real*8  rho(n2ft3d),agr(n2ft3d)
417      real*8  vxc(n2ft3d,ispin), exc(n2ft3d)
418      real*8  vxx(n2ft3d,ispin), exx(n2ft3d)
419      real*8  theta(n2ft3d,Nqs)
420
421*     **** local variables ****
422      integer i,j,k,jstart,nj,taskid_j,np_j,tid,nthr
423      integer nx,ny,nz,r,ms
424      real*8 A,Cf,pi,pj,dpj,tsum,dtsum,q0,q0sat,xi,dxi,Cxi,scal1
425      real*8 dq0drho(2),dq0ddrho,dq0satdq0
426      real*8 vxgga,exgga
427      real*8 onethird,frthrd,elthrd,dncut,check
428      parameter (onethird=1.0d0/3.0d0)
429      parameter (frthrd=4.0d0/3.0d0)
430      parameter (elthrd=11.0d0/3.0d0)
431      parameter (dncut=1.0d-12)
432
433*     **** external functions ****
434      integer  Parallel_threadid,Parallel_nthreads
435      external Parallel_threadid,Parallel_nthreads
436
437      tid  = Parallel_threadid()
438      nthr = Parallel_nthreads()
439      call Parallel2d_taskid_j(taskid_j)
440      call Parallel2d_np_j(np_j)
441      call D3dB_nx(1,nx)
442      call D3dB_ny(1,ny)
443      call D3dB_nz(1,nz)
444      scal1 = 1.0d0/dble(nx*ny*nz)
445
446      nx = Nqs/np_j
447      r = mod(Nqs,np_j)
448      if (taskid_j.lt.r) then
449         nj = nx + 1
450         jstart = nx*taskid_j + taskid_j + 1
451      else
452         nj = nx
453         jstart = nx*taskid_j + r + 1
454      end if
455
456      call D3dB_r_nZero(1,Nqs,theta)
457
458      if (nj.gt.0) then
459         pi = 4.0d0*datan(1.0d0)
460         Cf = (3.0d0*pi*pi)**onethird
461         A = -4.0d0*pi/3.0d0
462         Cxi = Zab/(36.0d0*Cf)
463
464         !*** compute theta(r) ***
465         do i=tid+1,n2ft3d,nthr
466            if (rho(i).ge.dncut) then
467               xi  = Cxi*(agr(i)/rho(i)**frthrd)**2
468               dxi = 2.0d0*Cxi*agr(i)*(1.0d0/rho(i)**frthrd)**2
469               exgga = exx(i) - xi*exx(i)
470               vxgga = vxx(i,1) - xi*(vxx(i,1)-elthrd*exx(i))
471               if ((vxgga-exgga).lt.0.0d0) then
472                  xi = 0.0d0
473                  dxi = 0.0d0
474               end if
475
476               q0 = A*(exc(i) - exx(i)*xi)
477
478               if (q0.lt.qmin) then
479                  q0 = qmin
480                  do ms=1,ispin
481                     dq0drho(ms)  = 0.0d0
482                  end do
483                  dq0ddrho = 0.0d0
484               else
485                  do ms=1,ispin
486                     dq0drho(ms) = A*((vxc(i,ms)-exc(i))
487     >                           - xi*(vxx(i,ms)-elthrd*exx(i)))/rho(i)
488                  end do
489                  dq0ddrho = -A*exx(i)*dxi
490               end if
491
492               tsum = 0.0d0
493               dtsum = 0.0d0
494               do k=1,12
495                  tsum = tsum + (q0/qmax)**k / dble(k)
496                  dtsum = dtsum + (q0/qmax)**(k-1)
497               end do
498               q0sat     = qmax*(1.0d0-dexp(-tsum))
499               dq0satdq0 = dexp(-tsum)*dtsum
500
501               exc(i)  =  q0sat
502               do ms=1,ispin
503                  vxc(i,ms) = rho(i)*dq0satdq0*dq0drho(ms)
504               end do
505               exx(i)  =  rho(i)*dq0satdq0*dq0ddrho
506            else
507               xi = 0.0d0
508               q0sat    = qmax
509               exc(i) = qmax
510               do ms=1,ispin
511                  vxc(i,ms) = 0.0d0
512               end do
513               exx(i) = 0.0d0
514            end if
515
516            do j=jstart,jstart-1+nj
517               call vdw_DF_poly(j,q0sat,pj,dpj)
518               theta(i,j)  = rho(i)*pj
519            end do
520         end do
521!$OMP BARRIER
522
523         !*** compute theta(g) ***
524         do j=jstart,jstart-1+nj
525            call D3dB_r_Zero_Ends(1,theta(1,j))
526         end do
527         call Grsm_hg_fftf(nfft3d,nj,theta(1,jstart))
528         call Grsm_gg_dScale1(nfft3d,nj,scal1,theta(1,jstart))
529      end if
530
531      call D1dB_Vector_SumAll(Nqs*n2ft3d,theta)
532
533      return
534      end
535
536
537*     ************************************************
538*     *                                              *
539*     *         vdw_DF_Generate_ufunc                *
540*     *                                              *
541*     ************************************************
542*
543*   This routine computes the
544*
545*   ufunc(k,i) = Sum(j=1,Nqs) theta(k,j) * phi(k,i,j)
546*
547*   where phi is interpolated using a cubic spline.
548*
549      subroutine vdw_DF_Generate_ufunc(nk1,Nqs,gphi,phi,
550     >                                 npack0,nfft3d,
551     >                                 Gpack,nxpack,theta,ufunc)
552      implicit none
553      integer nk1,Nqs
554      real*8  gphi(nk1),phi(nk1,2,Nqs*(Nqs+1)/2)
555      integer    npack0,nfft3d
556      real*8     Gpack(npack0)
557      integer    nxpack(npack0)
558      complex*16 theta(nfft3d,*)
559      complex*16 ufunc(nfft3d,*)
560
561*     **** local variables ****
562      integer tid,nthr,taskid_j,np_j,pcount
563      integer i,j,k,indx,klo,khi
564      real*8 a,b,f,g,h
565
566*     **** external functions ****
567      integer  Parallel_threadid,Parallel_nthreads
568      external Parallel_threadid,Parallel_nthreads
569
570      tid  = Parallel_threadid()
571      nthr = Parallel_nthreads()
572      call Parallel2d_taskid_j(taskid_j)
573      call Parallel2d_np_j(np_j)
574
575c      write(*,*) "tid,nthr=",tid,nthr
576c      write(*,*) "taskid_j,np_j=",taskid_j,np_j
577      !*** compute ufunc(g) ***
578      call D3dB_c_nZero(1,Nqs,ufunc)
579      pcount = 0
580      indx = 0
581      do j=1,Nqs    !*** assuming phi(:,1,1) = 0 ***
582         do i=1,j
583            indx = indx + 1
584c            write(*,*) "i,j=",i,j, indx, Nqs,npack0,nfft3d
585            if (pcount.eq.taskid_j) then
586
587               do k=tid+1,npack0,nthr
588                  g  = Gpack(k)
589                  klo  = nxpack(k)
590                  khi  = klo + 1
591                  !write(*,*) "k,g,klo,khi=",k,g,klo,khi,nk1
592
593                  h = gphi(khi)-gphi(klo)
594                  a = (gphi(khi)-g)/h
595                  b = (g-gphi(klo))/h
596                  f = a*phi(klo,1,indx)
597     >              + b*phi(khi,1,indx)
598     >              + ((a**3-a)*phi(klo,2,indx)
599     >                +(b**3-b)*phi(khi,2,indx))*h**2/6.0d0
600
601                  ufunc(k,i) = ufunc(k,i) + theta(k,j) * f
602                  if (i.ne.j)  ufunc(k,j) = ufunc(k,j) + theta(k,i)*f
603               end do
604            end if
605            pcount = mod(pcount+1,np_j)
606         end do
607      end do
608!$OMP BARRIER
609      call D1dB_Vector_SumAll(2*Nqs*nfft3d,ufunc)
610
611      return
612      end
613
614
615
616*     ************************************************
617*     *                                              *
618*     *         vdw_DF_Generate_potentials           *
619*     *                                              *
620*     ************************************************
621*
622      subroutine vdw_DF_Generate_potentials(Nqs,nfft3d,ispin,n2ft3d,
623     >                            ufunc,
624     >                            q0,drho,ddrho,tmpexc,tmpfn,tmpfdn,
625     >                            exc,fn,fdn)
626      implicit none
627      integer Nqs,nfft3d,ispin,n2ft3d
628      real*8  ufunc(n2ft3d,Nqs)
629      real*8  q0(n2ft3d)
630      real*8  drho(n2ft3d,ispin)
631      real*8  ddrho(n2ft3d,ispin)
632      real*8  tmpexc(n2ft3d)
633      real*8  tmpfn(n2ft3d,ispin),tmpfdn(n2ft3d,ispin)
634      real*8  exc(n2ft3d),fn(n2ft3d,ispin),fdn(n2ft3d,ispin)
635
636*     **** local variables ****
637      integer i,j,jstart,nj,taskid_j,np_j,tid,nthr
638      integer nx,r,ms
639      real*8  pj,dpj
640
641*     **** external functions ****
642      integer  Parallel_threadid,Parallel_nthreads
643      external Parallel_threadid,Parallel_nthreads
644
645      tid  = Parallel_threadid()
646      nthr = Parallel_nthreads()
647      call Parallel2d_taskid_j(taskid_j)
648      call Parallel2d_np_j(np_j)
649
650      nx = Nqs/np_j
651      r = mod(Nqs,np_j)
652      if (taskid_j.lt.r) then
653         nj = nx + 1
654         jstart = nx*taskid_j + taskid_j + 1
655      else
656         nj = nx
657         jstart = nx*taskid_j + r + 1
658      end if
659      call D3dB_r_Zero(1,tmpexc)
660      call D3dB_r_nZero(1,ispin,tmpfn)
661      call D3dB_r_nZero(1,ispin,tmpfdn)
662
663      if (nj.gt.0) then
664
665         !*** compute ufunc(r) ***
666         call Grsm_gh_fftb(nfft3d,nj,ufunc(1,jstart))
667
668         !*** generate tmpexc,tmpfn,tmpfdn ***
669         do i=tid+1,n2ft3d,nthr
670            do j=jstart,jstart-1+nj
671               call vdw_DF_poly(j,q0(i),pj,dpj)
672               tmpexc(i) = tmpexc(i) + 0.5d0*pj*ufunc(i,j)
673               do ms=1,ispin
674                  tmpfn(i,ms) = tmpfn(i,ms)
675     >                        + (pj + dpj*drho(i,ms))*ufunc(i,j)
676                  tmpfdn(i,ms)=tmpfdn(i,ms) + dpj*ddrho(i,ms)*ufunc(i,j)
677               end do
678            end do
679         end do
680
681      end if
682!$OMP BARRIER
683      call D1dB_Vector_SumAll(n2ft3d,tmpexc)
684      call D1dB_Vector_SumAll(ispin*n2ft3d,tmpfn)
685      call D1dB_Vector_SumAll(ispin*n2ft3d,tmpfdn)
686
687      call D3dB_rr_Sum2(1,tmpexc,exc)
688      do ms=1,ispin
689         call D3dB_rr_Sum2(1,tmpfn(1,ms),fn(1,ms))
690         call D3dB_rr_Sum2(1,tmpfdn(1,ms),fdn(1,ms))
691      end do
692
693      return
694      end
695