1*
2* $Id$
3*
4
5*     ***********************************
6*     *                                 *
7*     *            psp_paw_init         *
8*     *                                 *
9*     ***********************************
10      subroutine psp_paw_init()
11      implicit none
12
13#include "bafdecls.fh"
14#include "errquit.fh"
15#include "psp.fh"
16
17*     *** local variables ***
18      logical value
19      integer ii,ia,iii,iia,i,j
20      integer nx,ny,nz,nrho,nray,npack0
21      real*8  unita(3,3),bmesh,log_bmesh,pi,rcut,rs,w,sigma_smooth
22      integer Gray(2),vlray(2),tmpray(2),f(2),rho(2)
23
24*     *** external functions ***
25      integer  ion_nion_qm,ion_katm_qm,ion_nkatm_qm,kbpp_calc_nray
26      external ion_nion_qm,ion_katm_qm,ion_nkatm_qm,kbpp_calc_nray
27      real*8   lattice_unita,control_rcut
28      external lattice_unita,control_rcut
29
30      if (pawexist) then
31         nion_paw = 0
32         do ii=1,ion_nion_qm()
33           ia = ion_katm_qm(ii)
34           if (int_mb(psp_type(1)+ia-1).eq.4) then
35              nion_paw = nion_paw + 1
36           end if
37         end do
38
39*        **** allocate dummy variables ****
40         if (.not.BA_alloc_get(mt_int,nion_paw,"ion_pawtoion",
41     >                         ion_pawtoion(2),ion_pawtoion(1)))
42     >      call errquit("psp_paw_init:allocate heap",0,MA_ERR)
43
44         iii = 0
45         do ii=1,ion_nion_qm()
46            ia = ion_katm_qm(ii)
47            if (int_mb(psp_type(1)+ia-1).eq.4) then
48               iii = iii + 1
49               int_mb(ion_pawtoion(1)+iii-1) = ii
50            end if
51         end do
52      end if
53
54      return
55      end
56
57
58
59
60
61*     ***********************************
62*     *                                 *
63*     *            psp_paw_end          *
64*     *                                 *
65*     ***********************************
66      subroutine psp_paw_end()
67      implicit none
68
69#include "bafdecls.fh"
70#include "errquit.fh"
71#include "psp.fh"
72
73      if (pawexist) then
74        if (.not.BA_free_heap(ion_pawtoion(2)))
75     >      call errquit("psp_paw_end:deallocate heap",0,MA_ERR)
76      end if
77
78      return
79      end
80
81
82
83
84*     ***********************************
85*     *				        *
86*     *	    v_lr_local_seperate_paw     *
87*     *					*
88*     ***********************************
89*
90*     This routine calculates the long-range part of the
91*     local pseudopotential (used by version4)
92*
93      subroutine v_lr_local_seperate_paw(r_grid,vlr_paw,vlr_notpaw)
94      implicit none
95      real*8     r_grid(3,*)
96      real*8     vlr_paw(*)
97      real*8     vlr_notpaw(*)
98
99#include "bafdecls.fh"
100#include "errquit.fh"
101#include "psp.fh"
102
103*     **** Error function parameters ****
104      real*8 xerf,yerf
105c     real*8 c1,c2,c3,c4,c5,c6,yerf,xerf
106      real*8 c1,c2,c3,c4,c5,c6
107      parameter (c1=0.07052307840d0,c2=0.04228201230d0)
108      parameter (c3=0.00927052720d0)
109      parameter (c4=0.00015201430d0,c5=0.00027656720d0)
110      parameter (c6=0.00004306380d0)
111
112*     **** local variables ****
113      integer taskid_j,np_j
114      integer i,j,ia,n2ft3d,n2ft3d_map,l,m,lm,nion_qm
115      real*8 x,y,z,q,c,r,sqrt_pi,lmbda0,lmbda
116
117*     **** external functions ****
118      logical  pspw_qmmm_lambda_flag
119      logical  control_fast_erf
120      integer  ion_nion,ion_nion_qm,ion_katm
121      real*8   ion_rion,psp_rlocal,psp_zv,util_erf,pspw_qmmm_lambda
122      external pspw_qmmm_lambda_flag
123      external control_fast_erf
124      external ion_nion,ion_nion_qm,ion_katm
125      external ion_rion,psp_rlocal,psp_zv,util_erf,pspw_qmmm_lambda
126      integer  ion_katm_ptr,ion_rion_ptr,psp_zv_ptr
127      external ion_katm_ptr,ion_rion_ptr,psp_zv_ptr
128
129      call nwpw_timing_start(5)
130      call Parallel2d_np_j(np_j)
131      call Parallel2d_taskid_j(taskid_j)
132      call D3dB_n2ft3d(1,n2ft3d)
133      call D3dB_n2ft3d_map(1,n2ft3d_map)
134      nion_qm = ion_nion_qm()
135
136      lmbda0 = 1.0d0
137      if (pspw_qmmm_lambda_flag()) lmbda0 = pspw_qmmm_lambda()
138
139      sqrt_pi = dsqrt(4.0d0*datan(1.0d0))
140      !call dcopy(n2ft3d,0.0d0,0,vlr_paw,1)
141      !call dcopy(n2ft3d,0.0d0,0,vlr_notpaw,1)
142      call Parallel_shared_vector_zero(.false.,n2ft3d,vlr_paw)
143      call Parallel_shared_vector_zero(.true.,n2ft3d,vlr_notpaw)
144
145      if (psp_fmm) then
146         call FMM_rion_Llm(psp_fmm_lmax,ion_nion(),nion_qm,
147     >                     int_mb(ion_katm_ptr()),
148     >                     dbl_mb(ion_rion_ptr()),
149     >                     dbl_mb(psp_zv_ptr()),
150     >                     lmbda0,
151     >                     psp_fmm_rmax2,
152     >                     dbl_mb(psp_fmm_Llm(1)))
153!$OMP DO
154         do i=1,n2ft3d_map
155            lm = 0
156            do l=0,psp_fmm_lmax
157            do m=-l,l
158               vlr_notpaw(i) = vlr_notpaw(i)
159     >                       - dbl_mb(psp_fmm_Llm(1)+lm)
160     >                        *dbl_mb(psp_fmm_rTlm(1)+lm*n2ft3d+i-1)
161               lm = lm + 1
162            end do
163            end do
164         end do
165!$OMP END DO
166      end if
167
168      if (control_fast_erf()) then
169
170      do j=1,ion_nion()
171
172         if (mod(j-1,np_j).eq.taskid_j) then
173            if (j.gt.nion_qm) then
174               lmbda = lmbda0
175            else
176               lmbda = 1.0d0
177            end if
178            ia= ion_katm(j)
179            x = ion_rion(1,j)
180            y = ion_rion(2,j)
181            z = ion_rion(3,j)
182            q = -psp_zv(ia)
183            c = 1.0d0/psp_rlocal(ia)
184
185            if ((int_mb(psp_type(1)+ia-1).eq.4)) then
186!$OMP DO
187               do i=1,n2ft3d_map
188                  r = dsqrt( (r_grid(1,i)-x)**2
189     >                     + (r_grid(2,i)-y)**2
190     >                     + (r_grid(3,i)-z)**2)
191                  if (r.gt.1.0d-15) then
192                    xerf=r*c
193                    yerf = (1.0d0
194     >                    + xerf*(c1 + xerf*(c2
195     >                    + xerf*(c3 + xerf*(c4
196     >                    + xerf*(c5 + xerf*c6))))))**4
197                    yerf = (1.0d0 - 1.0d0/yerf**4)
198                    vlr_paw(i) = vlr_paw(i) + (q/r)*yerf
199                  else
200                    vlr_paw(i) = vlr_paw(i) + 2.0d0*q*c/sqrt_pi
201                  end if
202               end do
203!$OMP END DO
204
205            else
206            if ((.not.psp_fmm).or.((x*x+y*y+z*z).le.psp_fmm_rmax2)) then
207!$OMP DO
208               do i=1,n2ft3d_map
209                  r = dsqrt( (r_grid(1,i)-x)**2
210     >                     + (r_grid(2,i)-y)**2
211     >                     + (r_grid(3,i)-z)**2)
212                  if (r.gt.1.0d-15) then
213                    xerf=r*c
214                    yerf = (1.0d0
215     >                    + xerf*(c1 + xerf*(c2
216     >                    + xerf*(c3 + xerf*(c4
217     >                    + xerf*(c5 + xerf*c6))))))**4
218                    yerf = (1.0d0 - 1.0d0/yerf**4)
219                    vlr_notpaw(i) = vlr_notpaw(i) + lmbda*(q/r)*yerf
220                  else
221                    vlr_notpaw(i) = vlr_notpaw(i)
222     >                            + lmbda*2.0d0*q*c/sqrt_pi
223                  end if
224               end do
225!$OMP END DO
226            end if
227            end if
228
229         end if
230      end do
231
232      else
233
234      do j=1,ion_nion()
235
236         if (mod(j-1,np_j).eq.taskid_j) then
237            ia= ion_katm(j)
238            x = ion_rion(1,j)
239            y = ion_rion(2,j)
240            z = ion_rion(3,j)
241            q = -psp_zv(ia)
242            c = 1.0d0/psp_rlocal(ia)
243            if (j.gt.nion_qm) then
244               lmbda = lmbda0
245            else
246               lmbda = 1.0d0
247            end if
248
249            if ((int_mb(psp_type(1)+ia-1).eq.4)) then
250!$OMP DO
251               do i=1,n2ft3d_map
252                  r = dsqrt( (r_grid(1,i)-x)**2
253     >                     + (r_grid(2,i)-y)**2
254     >                     + (r_grid(3,i)-z)**2)
255                  if (r.gt.1.0d-15) then
256                    xerf=r*c
257                    yerf = util_erf(xerf)
258                    vlr_paw(i) = vlr_paw(i) + (q/r)*yerf
259                  else
260                    vlr_paw(i) = vlr_paw(i) + 2.0d0*q*c/sqrt_pi
261                  end if
262               end do
263!$OMP END DO
264            else
265            if ((.not.psp_fmm).or.((x*x+y*y+z*z).le.psp_fmm_rmax2)) then
266!$OMP DO
267               do i=1,n2ft3d_map
268                  r = dsqrt( (r_grid(1,i)-x)**2
269     >                     + (r_grid(2,i)-y)**2
270     >                     + (r_grid(3,i)-z)**2)
271                  if (r.gt.1.0d-15) then
272                    xerf=r*c
273                    yerf = util_erf(xerf)
274                    vlr_notpaw(i) = vlr_notpaw(i) + lmbda*(q/r)*yerf
275                  else
276                    vlr_notpaw(i) = vlr_notpaw(i)
277     >                            + lmbda*2.0d0*q*c/sqrt_pi
278                  end if
279               end do
280!$OMP END DO
281            end if
282            end if
283         end if
284      end do
285
286      end if
287      if (np_j.gt.1) call D1dB_Vector_SumAll(n2ft3d_map,vlr_paw)
288      if (np_j.gt.1) call D1dB_Vector_SumAll(n2ft3d_map,vlr_notpaw)
289
290      call nwpw_timing_end(5)
291
292      return
293      end
294
295
296*     **********************************
297*     *                                *
298*     *     f_lr_local_seperate_paw    *
299*     *                                *
300*     **********************************
301*
302*     This routine calculates the gradient of the long-range part of the
303*     local pseudopotential (used by version 4)
304*
305*     Entry -
306*     Exit -
307*
308      subroutine f_lr_local_seperate_paw(r_grid,rho_paw,rho_notpaw,fion)
309      implicit none
310      real*8  r_grid(3,*)
311      real*8  rho_paw(*)
312      real*8  rho_notpaw(*)
313      real*8  fion(3,*)
314
315#include "bafdecls.fh"
316#include "errquit.fh"
317#include "psp.fh"
318
319*     **** Error function parameters ****
320      real*8  yerf,verf
321
322c     real*8 c1,c2,c3,c4,c5,c6,yerf,fterf,verf
323      real*8 c1,c2,c3,c4,c5,c6,fterf
324      parameter (c1=0.07052307840d0,c2=0.04228201230d0)
325      parameter (c3=0.00927052720d0)
326      parameter (c4=0.00015201430d0,c5=0.00027656720d0)
327      parameter (c6=0.00004306380d0)
328
329*     **** local variables ****
330      logical ispawv
331      integer ftmp(2)
332      integer taskid_j,np_j
333      integer i,j,ia,np1,np2,np3,n2ft3d_map,nion,l,m,lm,n2ft3d,nion_qm
334      real*8 x,y,z,q,c,r,sqrt_pi,dv,v,rx,ry,rz,fx,fy,fz,lmbda0,lmbda
335
336*     **** external functions ****
337      logical  pspw_qmmm_lambda_flag
338      logical  control_fast_erf
339      integer  ion_nion,ion_nion_qm,ion_katm
340      real*8   lattice_omega,ion_rion,psp_rlocal,psp_zv,util_erf
341      real*8   pspw_qmmm_lambda
342      external pspw_qmmm_lambda_flag
343      external control_fast_erf
344      external ion_nion,ion_nion_qm,ion_katm
345      external lattice_omega,ion_rion,psp_rlocal,psp_zv,util_erf
346      external pspw_qmmm_lambda
347      integer  ion_katm_ptr,ion_rion_ptr,psp_zv_ptr
348      external ion_katm_ptr,ion_rion_ptr,psp_zv_ptr
349
350      call nwpw_timing_start(5)
351      call Parallel2d_np_j(np_j)
352      call Parallel2d_taskid_j(taskid_j)
353      call D3dB_n2ft3d_map(1,n2ft3d_map)
354      call D3dB_n2ft3d(1,n2ft3d)
355      nion = ion_nion()
356      nion_qm = ion_nion_qm()
357
358      lmbda0 = 1.0d0
359      if (pspw_qmmm_lambda_flag()) lmbda0 = pspw_qmmm_lambda()
360
361*     **** constants ****
362      sqrt_pi = dsqrt(4.0d0*datan(1.0d0))
363
364      call D3dB_nx(1,np1)
365      call D3dB_ny(1,np2)
366      call D3dB_nz(1,np3)
367      dv = lattice_omega()/dble(np1*np2*np3)
368
369*     ***** allocate temporary space ****
370      if (.not.BA_push_get(mt_dbl,3*nion,'ftmp',ftmp(2),ftmp(1)))
371     > call errquit('f_lr_local_seperate_paw:out of stack',0,MA_ERR)
372
373      !call dcopy(3*nion,0.0d0,0,dbl_mb(ftmp(1)),1)
374      call Parallel_shared_vector_zero(.true.,3*nion,dbl_mb(ftmp(1)))
375      if (psp_fmm) then
376c         call dcopy((psp_fmm_lmax+1)**2,0.0d0,0,
377c     >              dbl_mb(psp_fmm_Mlm(1)),1)
378         call Parallel_shared_vector_zero(.true.,(psp_fmm_lmax+1)**2,
379     >                                    dbl_mb(psp_fmm_Mlm(1)),1)
380         lm = 0
381         do l=0,psp_fmm_lmax
382         do m=-l,l
383!$OMP DO
384            do i=1,n2ft3d_map
385               dbl_mb(psp_fmm_Mlm(1)+lm) = dbl_mb(psp_fmm_Mlm(1)+lm)
386     >            + dbl_mb(psp_fmm_rTlm(1)+lm*n2ft3d+i-1)*rho_notpaw(i)
387            end do
388!$OMP END DO
389            lm = lm + 1
390         end do
391         end do
392         call D3dB_Vector_SumAll((psp_fmm_lmax+1)**2,
393     >                           dbl_mb(psp_fmm_Mlm(1)))
394         call FMM_fion_Mlm(psp_fmm_lmax,ion_nion(),nion_qm,
395     >                     int_mb(ion_katm_ptr()),
396     >                     dbl_mb(ion_rion_ptr()),
397     >                     dbl_mb(psp_zv_ptr()),
398     >                     lmbda0,
399     >                     psp_fmm_rmax2,
400     >                     dbl_mb(psp_fmm_Mlm(1)),
401     >                     dbl_mb(ftmp(1)))
402
403      end if
404
405      if (control_fast_erf()) then
406
407!$OMP MASTER
408      do j=1,nion
409         if (mod(j-1,np_j).eq.taskid_j) then
410            ia=ion_katm(j)
411            x = ion_rion(1,j)
412            y = ion_rion(2,j)
413            z = ion_rion(3,j)
414            if (j.gt.nion_qm) then
415               lmbda = lmbda0
416            else
417               lmbda = 1.0d0
418            end if
419            if ((.not.psp_fmm).or.((x*x+y*y+z*z).le.psp_fmm_rmax2)) then
420            q = -psp_zv(ia)
421            c = 1.0d0/psp_rlocal(ia)
422            fx = 0.0d0
423            fy = 0.0d0
424            fz = 0.0d0
425            ispawv = (int_mb(psp_type(1)+ia-1).eq.4)
426            do i=1,n2ft3d_map
427               rx = x - r_grid(1,i)
428               ry = y - r_grid(2,i)
429               rz = z - r_grid(3,i)
430               r  = dsqrt( rx**2 + ry**2 + rz**2)
431
432               if (r .gt. 1.0d-8) then
433                 yerf=r*c
434                 fterf = (1.0d0
435     >                 + yerf*(c1 + yerf*(c2
436     >                 + yerf*(c3 + yerf*(c4
437     >                 + yerf*(c5 + yerf*c6))))))**4
438                 verf = (1.0d0 - 1.0d0/fterf**4)
439c                 verf = util_erf(yerf)
440                 v    = q*( (2.0d0/sqrt_pi)*(r*c)*exp(-(r*c)**2)
441     >                    - verf)/r**3
442               else
443                 v = 0.0d0
444               end if
445
446               if (ispawv) then
447                  fx = fx + rho_paw(i)*rx*v
448                  fy = fy + rho_paw(i)*ry*v
449                  fz = fz + rho_paw(i)*rz*v
450               else
451                  fx = fx + rho_notpaw(i)*rx*v
452                  fy = fy + rho_notpaw(i)*ry*v
453                  fz = fz + rho_notpaw(i)*rz*v
454               end if
455            end do
456            dbl_mb(ftmp(1)+3*(j-1))   = -fx*dv*lmbda
457            dbl_mb(ftmp(1)+3*(j-1)+1) = -fy*dv*lmbda
458            dbl_mb(ftmp(1)+3*(j-1)+2) = -fz*dv*lmbda
459            end if
460         end if
461      end do
462!$OMP END MASTER
463!$OMP BARRIER
464
465      else
466
467!$OMP MASTER
468      do j=1,nion
469
470         if (mod(j-1,np_j).eq.taskid_j) then
471            ia= ion_katm(j)
472            x = ion_rion(1,j)
473            y = ion_rion(2,j)
474            z = ion_rion(3,j)
475            if (j.gt.nion_qm) then
476               lmbda = lmbda0
477            else
478               lmbda = 1.0d0
479            end if
480            if ((.not.psp_fmm).or.((x*x+y*y+z*z).le.psp_fmm_rmax2)) then
481            q = -psp_zv(ia)
482            c = 1.0d0/psp_rlocal(ia)
483            ispawv = (int_mb(psp_type(1)+ia-1).eq.4)
484            fx = 0.0d0
485            fy = 0.0d0
486            fz = 0.0d0
487            do i=1,n2ft3d_map
488               rx = x - r_grid(1,i)
489               ry = y - r_grid(2,i)
490               rz = z - r_grid(3,i)
491               r  = dsqrt( rx**2 + ry**2 + rz**2)
492
493               if (r .gt. 1.0d-8) then
494                 yerf=r*c
495c                fterf = (1.0d0
496c    >                 + yerf*(c1 + yerf*(c2
497c    >                 + yerf*(c3 + yerf*(c4
498c    >                 + yerf*(c5 + yerf*c6))))))**4
499c                verf = (1.0d0 - 1.0d0/fterf**4)
500                 verf = util_erf(yerf)
501                 v    = q*( (2.0d0/sqrt_pi)*(r*c)*exp(-(r*c)**2)
502     >                    - verf)/r**3
503               else
504                 v = 0.0d0
505               end if
506
507               if (ispawv) then
508                  fx = fx + rho_paw(i)*rx*v
509                  fy = fy + rho_paw(i)*ry*v
510                  fz = fz + rho_paw(i)*rz*v
511               else
512                  fx = fx + rho_notpaw(i)*rx*v
513                  fy = fy + rho_notpaw(i)*ry*v
514                  fz = fz + rho_notpaw(i)*rz*v
515               end if
516            end do
517            dbl_mb(ftmp(1)+3*(j-1))   = -fx*dv*lmbda
518            dbl_mb(ftmp(1)+3*(j-1)+1) = -fy*dv*lmbda
519            dbl_mb(ftmp(1)+3*(j-1)+2) = -fz*dv*lmbda
520
521            end if
522         end if
523      end do
524!$OMP END MASTER
525!$OMP BARRIER
526
527      end if
528
529      call Parallel_Vector_SumAll(3*nion,dbl_mb(ftmp(1)))
530      call daxpy_omp(3*nion,1.0d0,dbl_mb(ftmp(1)),1,fion,1)
531
532      if (.not.BA_pop_stack(ftmp(2)))
533     > call errquit('f_lr_local_seperate_paw:popping stack',1,MA_ERR)
534
535      call nwpw_timing_end(5)
536
537      return
538      end
539
540
541
542*     ***********************************
543*     *					*
544*     *	     v_local_seperate_paw  	*
545*     *					*
546*     ***********************************
547
548      subroutine v_local_seperate_paw(vl_paw,vl_notpaw)
549      implicit none
550      complex*16 vl_paw(*)
551      complex*16 vl_notpaw(*)
552
553#include "bafdecls.fh"
554#include "errquit.fh"
555#include "psp.fh"
556
557
558*     *** local variables ***
559      integer taskid_j,np_j
560      integer npack0,nion,nion_qm
561      integer i,ii,ia
562      integer exi(2)
563      logical value,periodic,inside
564      real*8  rxyz(3),fxyz(3),lmbda0,lmbda
565
566*     **** external functions ****
567      logical  pspw_qmmm_lambda_flag
568      integer  Pack_G_indx,ion_nion,ion_nion_qm,ion_katm,control_version
569      real*8   ion_rion,pspw_qmmm_lambda
570      external pspw_qmmm_lambda_flag
571      external Pack_G_indx,ion_nion,ion_nion_qm,ion_katm,control_version
572      external ion_rion,pspw_qmmm_lambda
573
574      call nwpw_timing_start(5)
575      call Parallel2d_np_j(np_j)
576      call Parallel2d_taskid_j(taskid_j)
577      call Pack_npack(0,npack0)
578      nion     = ion_nion()
579      nion_qm  = ion_nion_qm()
580      periodic = (control_version().eq.3)
581
582      lmbda0 = 1.0d0
583      if (pspw_qmmm_lambda_flag()) lmbda0 = pspw_qmmm_lambda()
584
585      value = BA_push_get(mt_dcpl,npack0,'exi', exi(2), exi(1))
586      if (.not.value)
587     > call errquit('v_local_seperate_paw:out of stack',0,MA_ERR)
588
589      !call dcopy((2*npack0),0.0d0,0,vl_paw,1)
590      !call dcopy((2*npack0),0.0d0,0,vl_notpaw,1)
591      call Parallel_shared_vector_zero(.false.,2*npack0,vl_paw)
592      call Parallel_shared_vector_zero(.true., 2*npack0,vl_notpaw)
593
594      do ii=1,nion
595
596          if (mod(ii-1,np_j).eq.taskid_j) then
597
598             if (.not.periodic) then
599                rxyz(1) = ion_rion(1,ii)
600                rxyz(2) = ion_rion(2,ii)
601                rxyz(3) = ion_rion(3,ii)
602                call lattice_r1_to_frac(1,rxyz,fxyz)
603                inside =((dabs(fxyz(1)).le.0.4d0).and.
604     >                   (dabs(fxyz(2)).le.0.4d0).and.
605     >                   (dabs(fxyz(3)).le.0.4d0))
606             else
607                inside = .true.
608             end if
609
610             if (inside) then
611                ia=ion_katm(ii)
612                if (ii.gt.nion_qm) then
613                   lmbda = lmbda0
614                else
615                   lmbda = 1.0d0
616                end if
617
618*               **** structure factor and local pseudopotential ****
619                call strfac_pack(0,ii,dcpl_mb(exi(1)))
620
621*               **** add to local psp ****
622                if ((int_mb(psp_type(1)+ia-1).eq.4)) then
623                   call Pack_tc_MulAdd(0,dbl_mb(vl(1)+npack0*(ia-1)),
624     >                                dcpl_mb(exi(1)),
625     >                                vl_paw)
626                else
627c                   call Pack_tc_MulAdd(0,dbl_mb(vl(1)+npack0*(ia-1)),
628c     >                                dcpl_mb(exi(1)),
629c     >                                vl_notpaw)
630                   call Pack_tc_aMulAdd(0,lmbda,
631     >                                dbl_mb(vl(1)+npack0*(ia-1)),
632     >                                dcpl_mb(exi(1)),
633     >                                vl_notpaw)
634                end if
635             end if
636
637          end if
638
639      end do
640
641      if (np_j.gt.1) then
642         call D1dB_Vector_SumAll(2*npack0,vl_paw)
643         call D1dB_Vector_SumAll(2*npack0,vl_notpaw)
644      end if
645
646      value = BA_pop_stack(exi(2))
647      if (.not.value)
648     >  call errquit('v_local_seperate_paw:popping stack',0,MA_ERR)
649
650      call nwpw_timing_end(5)
651      return
652      end
653
654*     ***********************************
655*     *                                 *
656*     *      f_local_seperate_paw       *
657*     *                                 *
658*     ***********************************
659
660      subroutine f_local_seperate_paw(dng_paw,dng_notpaw,fion)
661      implicit none
662      complex*16 dng_paw(*)
663      complex*16 dng_notpaw(*)
664      real*8     fion(3,*)
665
666#include "bafdecls.fh"
667#include "errquit.fh"
668#include "psp.fh"
669
670
671*     *** local variables ***
672      integer taskid_j,np_j
673      integer npack0,nion,nion_qm
674      integer i,ii,ia
675      integer exi(2),ftmp(2),vtmp(2),xtmp(2),G(3)
676      logical value,periodic,inside
677      real*8  rxyz(3),fxyz(3),lmbda0,lmbda
678
679*     **** external functions ****
680      logical  pspw_qmmm_lambda_flag
681      integer  Pack_G_indx,ion_nion,ion_nion_qm,ion_katm,control_version
682      real*8   ion_rion,pspw_qmmm_lambda
683      external pspw_qmmm_lambda_flag
684      external Pack_G_indx,ion_nion,ion_nion_qm,ion_katm,control_version
685      external ion_rion,pspw_qmmm_lambda
686
687      call nwpw_timing_start(5)
688      call Parallel2d_np_j(np_j)
689      call Parallel2d_taskid_j(taskid_j)
690      call Pack_npack(0,npack0)
691      nion     = ion_nion()
692      nion_qm  = ion_nion_qm()
693      periodic = (control_version().eq.3)
694
695      lmbda0 = 1.0d0
696      if (pspw_qmmm_lambda_flag()) lmbda0 = pspw_qmmm_lambda()
697
698      value = BA_push_get(mt_dcpl,npack0,'exi',exi(2),exi(1))
699      value = value.and.
700     >        BA_push_get(mt_dcpl,npack0,'vtmp',vtmp(2),vtmp(1))
701      value = value.and.
702     >        BA_push_get(mt_dbl,npack0,'xtmp',xtmp(2),xtmp(1))
703      value = value.and.
704     >        BA_push_get(mt_dbl,3*nion,'ftmp',ftmp(2),ftmp(1))
705      if (.not.value)
706     > call errquit('f_local_seperate_paw:out of stack',0,MA_ERR)
707
708      !call dcopy(3*nion,0.0d0,0,dbl_mb(ftmp(1)),1)
709      call Parallel_shared_vector_zero(.true.,3*nion,dbl_mb(ftmp(1)))
710      G(1) = Pack_G_indx(0,1)
711      G(2) = Pack_G_indx(0,2)
712      G(3) = Pack_G_indx(0,3)
713
714
715      do ii=1,nion
716
717          if (mod(ii-1,np_j).eq.taskid_j) then
718
719             if (.not.periodic) then
720                rxyz(1) = ion_rion(1,ii)
721                rxyz(2) = ion_rion(2,ii)
722                rxyz(3) = ion_rion(3,ii)
723                call lattice_r1_to_frac(1,rxyz,fxyz)
724                inside =((dabs(fxyz(1)).le.0.4d0).and.
725     >                   (dabs(fxyz(2)).le.0.4d0).and.
726     >                   (dabs(fxyz(3)).le.0.4d0))
727             else
728                inside = .true.
729             end if
730
731             if (inside) then
732                if (ii.gt.nion_qm) then
733                   lmbda = lmbda0
734                else
735                   lmbda = 1.0d0
736                end if
737                ia=ion_katm(ii)
738*               **** structure factor and local pseudopotential ****
739                call strfac_pack(0,ii,dcpl_mb(exi(1)))
740
741*               **** add to local psp ****
742                if ((int_mb(psp_type(1)+ia-1).eq.4)) then
743                   call Pack_tc_Mul(0,dbl_mb(vl(1)+npack0*(ia-1)),
744     >                                dcpl_mb(exi(1)),
745     >                                dcpl_mb(vtmp(1)))
746                   call Pack_cct_iconjgMulb(0,dng_paw,
747     >                                 dcpl_mb(vtmp(1)),
748     >                                 dbl_mb(xtmp(1)))
749                else
750                   call Pack_tc_Mul(0,dbl_mb(vl(1)+npack0*(ia-1)),
751     >                                dcpl_mb(exi(1)),
752     >                                dcpl_mb(vtmp(1)))
753                   call Pack_c_SMul1(0,lmbda,dcpl_mb(vtmp(1)))
754                   call Pack_cct_iconjgMulb(0,dng_notpaw,
755     >                                 dcpl_mb(vtmp(1)),
756     >                                 dbl_mb(xtmp(1)))
757                end if
758                call Pack_tt_idot(0,dbl_mb(G(1)),dbl_mb(xtmp(1)),
759     >                              dbl_mb(ftmp(1)+3*(ii-1)))
760                call Pack_tt_idot(0,dbl_mb(G(2)),dbl_mb(xtmp(1)),
761     >                              dbl_mb(ftmp(1)+3*(ii-1)+1))
762                call Pack_tt_idot(0,dbl_mb(G(3)),dbl_mb(xtmp(1)),
763     >                              dbl_mb(ftmp(1)+3*(ii-1)+2))
764             end if
765
766          end if
767
768      end do
769      call Parallel_Vector_SumAll(3*nion,dbl_mb(ftmp(1)))
770      call daxpy_omp(3*nion,1.0d0,dbl_mb(ftmp(1)),1,fion,1)
771
772
773      value =           BA_pop_stack(ftmp(2))
774      value = value.and.BA_pop_stack(xtmp(2))
775      value = value.and.BA_pop_stack(vtmp(2))
776      value = value.and.BA_pop_stack(exi(2))
777      if (.not.value)
778     >  call errquit('f_local_seperate_paw:popping stack',0,MA_ERR)
779
780      call nwpw_timing_end(5)
781      return
782      end
783
784
785
786
787
788*     **********************************
789*     *                                *
790*     *       grad_v_lr_local_paw      *
791*     *                                *
792*     **********************************
793*
794*     This routine calculates the gradient of the long-range part of the
795*     local pseudopotential (used by version 4)
796*
797*     Entry -
798*     Exit -
799*
800      subroutine grad_v_lr_local_paw(r_grid,rho,fion)
801      implicit none
802      real*8  r_grid(3,*)
803      real*8  rho(*)
804      real*8  fion(3,*)
805
806#include "bafdecls.fh"
807#include "errquit.fh"
808#include "psp.fh"
809
810*     **** Error function parameters ****
811      real*8  yerf,verf
812
813c     real*8 c1,c2,c3,c4,c5,c6,yerf,fterf,verf
814      real*8 c1,c2,c3,c4,c5,c6,fterf
815      parameter (c1=0.07052307840d0,c2=0.04228201230d0)
816      parameter (c3=0.00927052720d0)
817      parameter (c4=0.00015201430d0,c5=0.00027656720d0)
818      parameter (c6=0.00004306380d0)
819
820*     **** local variables ****
821      integer ftmp(2)
822      integer taskid_j,np_j
823      integer i,j,jj,np1,np2,np3,n2ft3d_map,nion,l,m,lm,n2ft3d
824      real*8 x,y,z,q,c,r,sqrt_pi,dv,v,rx,ry,rz,fx,fy,fz
825
826*     **** external functions ****
827      logical  control_fast_erf
828      integer  ion_nion,ion_katm
829      real*8   lattice_omega,ion_rion,psp_rlocal,psp_zv,util_erf
830      external control_fast_erf
831      external ion_nion,ion_katm
832      external lattice_omega,ion_rion,psp_rlocal,psp_zv,util_erf
833      integer  ion_katm_ptr,ion_rion_ptr,psp_zv_ptr
834      external ion_katm_ptr,ion_rion_ptr,psp_zv_ptr
835
836      call nwpw_timing_start(5)
837      call Parallel2d_np_j(np_j)
838      call Parallel2d_taskid_j(taskid_j)
839      call D3dB_n2ft3d_map(1,n2ft3d_map)
840      call D3dB_n2ft3d(1,n2ft3d)
841      nion = ion_nion()
842
843*     **** constants ****
844      sqrt_pi = dsqrt(4.0d0*datan(1.0d0))
845
846      call D3dB_nx(1,np1)
847      call D3dB_ny(1,np2)
848      call D3dB_nz(1,np3)
849      dv = lattice_omega()/dble(np1*np2*np3)
850
851*     ***** allocate temporary space ****
852      if (.not.BA_push_get(mt_dbl,3*nion,'ftmp',ftmp(2),ftmp(1)))
853     > call errquit('grad_v_lr_local:out of stack memory',0, MA_ERR)
854
855
856      call dcopy(3*nion,0.0d0,0,dbl_mb(ftmp(1)),1)
857
858      if (control_fast_erf()) then
859
860      do jj=1,nion_paw
861         if (mod(jj-1,np_j).eq.taskid_j) then
862            j = int_mb(ion_pawtoion(1)+jj-1)
863            x = ion_rion(1,j)
864            y = ion_rion(2,j)
865            z = ion_rion(3,j)
866
867            q = -psp_zv(ion_katm(j))
868            c = 1.0d0/psp_rlocal(ion_katm(j))
869            fx = 0.0d0
870            fy = 0.0d0
871            fz = 0.0d0
872            do i=1,n2ft3d_map
873               rx = x - r_grid(1,i)
874               ry = y - r_grid(2,i)
875               rz = z - r_grid(3,i)
876               r  = dsqrt( rx**2 + ry**2 + rz**2)
877
878               if (r .gt. 1.0d-8) then
879                 yerf=r*c
880                 fterf = (1.0d0
881     >                 + yerf*(c1 + yerf*(c2
882     >                 + yerf*(c3 + yerf*(c4
883     >                 + yerf*(c5 + yerf*c6))))))**4
884                 verf = (1.0d0 - 1.0d0/fterf**4)
885c                 verf = util_erf(yerf)
886                 v    = q*( (2.0d0/sqrt_pi)*(r*c)*exp(-(r*c)**2)
887     >                    - verf)/r**3
888               else
889                 v = 0.0d0
890               end if
891
892               fx = fx + rho(i)*rx*v
893               fy = fy + rho(i)*ry*v
894               fz = fz + rho(i)*rz*v
895            end do
896            dbl_mb(ftmp(1)+3*(j-1))   = -fx*dv
897            dbl_mb(ftmp(1)+3*(j-1)+1) = -fy*dv
898            dbl_mb(ftmp(1)+3*(j-1)+2) = -fz*dv
899
900         end if
901      end do
902
903      else
904
905      do jj=1,nion_paw
906         if (mod(jj-1,np_j).eq.taskid_j) then
907            j = int_mb(ion_pawtoion(1)+jj-1)
908            x = ion_rion(1,j)
909            y = ion_rion(2,j)
910            z = ion_rion(3,j)
911
912
913            q = -psp_zv(ion_katm(j))
914            c = 1.0d0/psp_rlocal(ion_katm(j))
915            fx = 0.0d0
916            fy = 0.0d0
917            fz = 0.0d0
918            do i=1,n2ft3d_map
919               rx = x - r_grid(1,i)
920               ry = y - r_grid(2,i)
921               rz = z - r_grid(3,i)
922               r  = dsqrt( rx**2 + ry**2 + rz**2)
923
924               if (r .gt. 1.0d-8) then
925                 yerf=r*c
926c                fterf = (1.0d0
927c    >                 + yerf*(c1 + yerf*(c2
928c    >                 + yerf*(c3 + yerf*(c4
929c    >                 + yerf*(c5 + yerf*c6))))))**4
930c                verf = (1.0d0 - 1.0d0/fterf**4)
931                 verf = util_erf(yerf)
932                 v    = q*( (2.0d0/sqrt_pi)*(r*c)*exp(-(r*c)**2)
933     >                    - verf)/r**3
934               else
935                 v = 0.0d0
936               end if
937
938               fx = fx + rho(i)*rx*v
939               fy = fy + rho(i)*ry*v
940               fz = fz + rho(i)*rz*v
941            end do
942            dbl_mb(ftmp(1)+3*(j-1))   = -fx*dv
943            dbl_mb(ftmp(1)+3*(j-1)+1) = -fy*dv
944            dbl_mb(ftmp(1)+3*(j-1)+2) = -fz*dv
945
946*        fion(1,j) = fion(1,j) - ddot(n2ft3d,rho,1,gv(1,1),3)*dv
947*        fion(2,j) = fion(2,j) - ddot(n2ft3d,rho,1,gv(2,1),3)*dv
948*        fion(3,j) = fion(3,j) - ddot(n2ft3d,rho,1,gv(3,1),3)*dv
949c         call D3dB_SumAll(fx)
950c         call D3dB_SumAll(fy)
951c         call D3dB_SumAll(fz)
952c         fion(1,j) = fion(1,j) - fx*dv
953c         fion(2,j) = fion(2,j) - fy*dv
954c         fion(3,j) = fion(3,j) - fz*dv
955
956         end if
957      end do
958
959      end if
960
961      call Parallel_Vector_SumAll(3*nion,dbl_mb(ftmp(1)))
962      call daxpy(3*nion,1.0d0,dbl_mb(ftmp(1)),1,fion,1)
963
964      if (.not.BA_pop_stack(ftmp(2)))
965     > call errquit('paw_grad_v_lr_local:popping stack',1,MA_ERR)
966
967      call nwpw_timing_end(5)
968
969      return
970      end
971
972
973*     ***********************************
974*     *				        *
975*     *	 	  v_lr_local_paw  	*
976*     *					*
977*     ***********************************
978*
979*     This routine calculates the long-range part of the
980*     local pseudopotential (used by version4)
981*
982      subroutine v_lr_local_paw(r_grid,vlr_out)
983      implicit none
984      real*8     r_grid(3,*)
985      real*8     vlr_out(*)
986
987#include "bafdecls.fh"
988#include "errquit.fh"
989#include "psp.fh"
990
991*     **** Error function parameters ****
992      real*8 xerf,yerf
993c     real*8 c1,c2,c3,c4,c5,c6,yerf,xerf
994      real*8 c1,c2,c3,c4,c5,c6
995      parameter (c1=0.07052307840d0,c2=0.04228201230d0)
996      parameter (c3=0.00927052720d0)
997      parameter (c4=0.00015201430d0,c5=0.00027656720d0)
998      parameter (c6=0.00004306380d0)
999
1000*     **** local variables ****
1001      integer taskid_j,np_j
1002      integer i,j,jj,n2ft3d,n2ft3d_map,l,m,lm
1003      real*8 x,y,z,q,c,r,sqrt_pi
1004
1005*     **** external functions ****
1006      logical  control_fast_erf
1007      integer  ion_nion,ion_katm
1008      real*8   ion_rion,psp_rlocal,psp_zv,util_erf
1009      external control_fast_erf
1010      external ion_nion,ion_katm
1011      external ion_rion,psp_rlocal,psp_zv,util_erf
1012      integer  ion_katm_ptr,ion_rion_ptr,psp_zv_ptr
1013      external ion_katm_ptr,ion_rion_ptr,psp_zv_ptr
1014
1015      call nwpw_timing_start(5)
1016      call Parallel2d_np_j(np_j)
1017      call Parallel2d_taskid_j(taskid_j)
1018      call D3dB_n2ft3d(1,n2ft3d)
1019      call D3dB_n2ft3d_map(1,n2ft3d_map)
1020
1021      sqrt_pi = dsqrt(4.0d0*datan(1.0d0))
1022      call dcopy(n2ft3d,0.0d0,0,vlr_out,1)
1023
1024
1025      if (control_fast_erf()) then
1026
1027      do jj=1,nion_paw
1028         if (mod(jj-1,np_j).eq.taskid_j) then
1029            j = int_mb(ion_pawtoion(1)+jj-1)
1030            x = ion_rion(1,j)
1031            y = ion_rion(2,j)
1032            z = ion_rion(3,j)
1033
1034            q = -psp_zv(ion_katm(j))
1035            c = 1.0d0/psp_rlocal(ion_katm(j))
1036
1037            do i=1,n2ft3d_map
1038               r = dsqrt( (r_grid(1,i)-x)**2
1039     >                  + (r_grid(2,i)-y)**2
1040     >                  + (r_grid(3,i)-z)**2)
1041               if (r.gt.1.0d-15) then
1042                 xerf=r*c
1043                 yerf = (1.0d0
1044     >                 + xerf*(c1 + xerf*(c2
1045     >                 + xerf*(c3 + xerf*(c4
1046     >                 + xerf*(c5 + xerf*c6))))))**4
1047                 yerf = (1.0d0 - 1.0d0/yerf**4)
1048c                 yerf = util_erf(xerf)
1049                 vlr_out(i) = vlr_out(i) + (q/r)*yerf
1050               else
1051                 vlr_out(i) = vlr_out(i) + 2.0d0*q*c/sqrt_pi
1052               end if
1053            end do
1054
1055         end if
1056      end do
1057
1058      else
1059
1060      do jj=1,nion_paw
1061         if (mod(jj-1,np_j).eq.taskid_j) then
1062            j = int_mb(ion_pawtoion(1)+jj-1)
1063            x = ion_rion(1,j)
1064            y = ion_rion(2,j)
1065            z = ion_rion(3,j)
1066
1067            q = -psp_zv(ion_katm(j))
1068            c = 1.0d0/psp_rlocal(ion_katm(j))
1069
1070            do i=1,n2ft3d_map
1071               r = dsqrt( (r_grid(1,i)-x)**2
1072     >                  + (r_grid(2,i)-y)**2
1073     >                  + (r_grid(3,i)-z)**2)
1074               if (r.gt.1.0d-15) then
1075                 xerf=r*c
1076                 yerf = util_erf(xerf)
1077                 vlr_out(i) = vlr_out(i) + (q/r)*yerf
1078c                vlr_out(i) = vlr_out(i) + (q/r)*erf(r*c)
1079               else
1080                 vlr_out(i) = vlr_out(i) + 2.0d0*q*c/sqrt_pi
1081               end if
1082            end do
1083
1084         end if
1085      end do
1086
1087      end if
1088      if (np_j.gt.1) call D1dB_Vector_SumAll(n2ft3d_map,vlr_out)
1089
1090      call nwpw_timing_end(5)
1091
1092      return
1093      end
1094
1095
1096
1097
1098*     ***********************************
1099*     *					*
1100*     *	 	v_locals_paw		*
1101*     *					*
1102*     ***********************************
1103
1104      subroutine v_locals_paw(vlpaw_out,move,dng,fion)
1105      implicit none
1106      complex*16 vlpaw_out(*)
1107      logical    move
1108      complex*16 dng(*)
1109      real*8     fion(3,*)
1110
1111#include "bafdecls.fh"
1112#include "errquit.fh"
1113#include "psp.fh"
1114
1115
1116*     *** local variables ***
1117      integer taskid_j,np_j
1118      integer npack0,nion
1119      integer i,ii,ia,iii
1120      integer exi(2),vtmp(2),xtmp(2),G(3)
1121      logical value,periodic,inside
1122      real*8  rxyz(3),fxyz(3)
1123
1124*     **** external functions ****
1125      integer  Pack_G_indx,ion_nion,ion_katm,control_version
1126      real*8   ion_rion
1127      external Pack_G_indx,ion_nion,ion_katm,control_version
1128      external ion_rion
1129
1130      if (pawexist)  then
1131
1132      call nwpw_timing_start(5)
1133      call Parallel2d_np_j(np_j)
1134      call Parallel2d_taskid_j(taskid_j)
1135      call Pack_npack(0,npack0)
1136      nion     = ion_nion()
1137      periodic = (control_version().eq.3)
1138
1139      value = BA_push_get(mt_dcpl,npack0,'exi', exi(2), exi(1))
1140      if (.not. value)
1141     >  call errquit('v_locals_paw:out of stack memory',0,MA_ERR)
1142
1143*     **** define Gx,Gy and Gz in packed space ****
1144      if (move) then
1145         value = BA_push_get(mt_dcpl,npack0,'vtmp',vtmp(2),vtmp(1))
1146         value = value.and.
1147     >           BA_push_get(mt_dbl, npack0,'xtmp',xtmp(2),xtmp(1))
1148         if (.not. value)
1149     >   call errquit('v_locals_paw: out of stack memory',0, MA_ERR)
1150         G(1)  = Pack_G_indx(0,1)
1151         G(2)  = Pack_G_indx(0,2)
1152         G(3)  = Pack_G_indx(0,3)
1153         call dcopy(3*nion,0.0d0,0,fion,1)
1154      end if
1155
1156      call dcopy((4*npack0),0.0d0,0,vlpaw_out,1)
1157      do iii=1,nion_paw
1158       if (mod(iii-1,np_j).eq.taskid_j) then
1159          ii = int_mb(ion_pawtoion(1)+iii-1)
1160
1161          if (.not.periodic) then
1162             rxyz(1) = ion_rion(1,ii)
1163             rxyz(2) = ion_rion(2,ii)
1164             rxyz(3) = ion_rion(3,ii)
1165             call lattice_r1_to_frac(1,rxyz,fxyz)
1166             inside =((dabs(fxyz(1)).le.0.4d0).and.
1167     >                (dabs(fxyz(2)).le.0.4d0).and.
1168     >                (dabs(fxyz(3)).le.0.4d0))
1169          else
1170             inside = .true.
1171          end if
1172
1173          if (inside) then
1174           ia=ion_katm(ii)
1175
1176*          **** structure factor and local pseudopotential ****
1177           call strfac_pack(0,ii,dcpl_mb(exi(1)))
1178
1179*          **** add to local psp ****
1180           call Pack_tc_MulAdd(0,dbl_mb(vlpaw(1)+npack0*(ia-1)),
1181     >                      dcpl_mb(exi(1)),
1182     >                      vlpaw_out)
1183           call Pack_tc_MulAdd(0,dbl_mb(vl(1)+npack0*(ia-1)),
1184     >                      dcpl_mb(exi(1)),
1185     >                      vlpaw_out(1+npack0))
1186
1187           if (move) then
1188              call Pack_ttcc_AddMul(0,dbl_mb(vlpaw(1)+npack0*(ia-1)),
1189     >                              dbl_mb(vl(1)+npack0*(ia-1)),
1190     >                              dcpl_mb(exi(1)),
1191     >                              dcpl_mb(vtmp(1)))
1192            call Pack_cct_iconjgMulb(0,dng,
1193     >                                 dcpl_mb(vtmp(1)),
1194     >                                 dbl_mb(xtmp(1)))
1195            call Pack_tt_idot(0,dbl_mb(G(1)),dbl_mb(xtmp(1)),fion(1,ii))
1196            call Pack_tt_idot(0,dbl_mb(G(2)),dbl_mb(xtmp(1)),fion(2,ii))
1197            call Pack_tt_idot(0,dbl_mb(G(3)),dbl_mb(xtmp(1)),fion(3,ii))
1198           end if
1199          end if
1200
1201       end if
1202      end do
1203      if (np_j.gt.1) then
1204         call D1dB_Vector_SumAll(4*npack0,vlpaw_out)
1205      end if
1206      if (move) call Parallel_Vector_SumAll(3*nion,fion)
1207
1208      value = .true.
1209      if (move) then
1210         value = value.and.BA_pop_stack(xtmp(2))
1211         value = value.and.BA_pop_stack(vtmp(2))
1212      end if
1213      value = value.and.BA_pop_stack(exi(2))
1214      if (.not. value)
1215     >   call errquit('v_locals_paw:popping stack',0, MA_ERR)
1216
1217      call nwpw_timing_end(5)
1218      end if
1219      return
1220      end
1221
1222
1223
1224
1225*     ***********************************
1226*     *					*
1227*     *	 	   f_vlocals_paw	*
1228*     *					*
1229*     ***********************************
1230      subroutine f_vlocal_paw(dng,fion)
1231      implicit none
1232      complex*16 dng(*)
1233      real*8     fion(3,*)
1234
1235#include "bafdecls.fh"
1236#include "errquit.fh"
1237#include "psp.fh"
1238
1239
1240*     *** local variables ***
1241      logical value,periodic,inside
1242      integer taskid_j,np_j
1243      integer npack0,nion
1244      integer i,ii,ia,iii
1245      integer exi(2),vtmp(2),xtmp(2),G(3)
1246c      integer Gx(2),Gy(2),Gz(2)
1247      real*8 rxyz(3),fxyz(3)
1248
1249*     **** external functions ****
1250      integer  Pack_G_indx,ion_nion,ion_katm,control_version
1251      real*8   ion_rion
1252      external Pack_G_indx,ion_nion,ion_katm,control_version
1253      external ion_rion
1254
1255      if (pawexist)  then
1256      call nwpw_timing_start(5)
1257      call Parallel2d_np_j(np_j)
1258      call Parallel2d_taskid_j(taskid_j)
1259      call Pack_npack(0,npack0)
1260      nion     = ion_nion()
1261      periodic = (control_version().eq.3)
1262
1263      value = BA_push_get(mt_dcpl,npack0,'exi', exi(2), exi(1))
1264      value = value.and.
1265     >        BA_push_get(mt_dcpl,npack0,'vtmp',vtmp(2),vtmp(1))
1266      value = value.and.
1267     >        BA_push_get(mt_dbl, npack0,'xtmp',xtmp(2),xtmp(1))
1268      if (.not. value) call errquit('out of stack memory',0, MA_ERR)
1269
1270c     **** define Gx,Gy and Gz in packed space ****
1271      G(1)  = Pack_G_indx(0,1)
1272      G(2)  = Pack_G_indx(0,2)
1273      G(3)  = Pack_G_indx(0,3)
1274      call dcopy(3*nion,0.0d0,0,fion,1)
1275
1276      do iii=1,nion_paw
1277        if (mod(iii-1,np_j).eq.taskid_j) then
1278          ii = int_mb(ion_pawtoion(1)+iii-1)
1279
1280          if (.not.periodic) then
1281             rxyz(1) = ion_rion(1,ii)
1282             rxyz(2) = ion_rion(2,ii)
1283             rxyz(3) = ion_rion(3,ii)
1284             call lattice_r1_to_frac(1,rxyz,fxyz)
1285             inside =((dabs(fxyz(1)).le.0.4d0).and.
1286     >                (dabs(fxyz(2)).le.0.4d0).and.
1287     >                (dabs(fxyz(3)).le.0.4d0))
1288          else
1289             inside = .true.
1290          endif
1291
1292          if (inside) then
1293           ia=ion_katm(ii)
1294
1295*          **** structure factor and local pseudopotential ****
1296           call strfac_pack(0,ii,dcpl_mb(exi(1)))
1297
1298*          **** add to local psp ****
1299           call Pack_tc_Mul(0,dbl_mb(vlpaw(1)+npack0*(ia-1)),
1300     >                      dcpl_mb(exi(1)),
1301     >                      dcpl_mb(vtmp(1)))
1302
1303c#ifndef CRAY
1304c!DIR$ ivdep
1305c#endif
1306c            do i=1,npack0
1307c              dbl_mb(xtmp(1)+i-1)
1308c     >        = dimag(dng(i))* dble(dcpl_mb(vtmp(1)+i-1))
1309c     >         - dble(dng(i))*dimag(dcpl_mb(vtmp(1)+i-1))
1310c           end do
1311           call Pack_cct_iconjgMulb(0,dng,
1312     >                                dcpl_mb(vtmp(1)),
1313     >                                dbl_mb(xtmp(1)))
1314           call Pack_tt_idot(0,dbl_mb(G(1)),dbl_mb(xtmp(1)),fion(1,ii))
1315           call Pack_tt_idot(0,dbl_mb(G(2)),dbl_mb(xtmp(1)),fion(2,ii))
1316           call Pack_tt_idot(0,dbl_mb(G(3)),dbl_mb(xtmp(1)),fion(3,ii))
1317          end if
1318
1319        end if
1320      end do
1321      call Parallel_Vector_SumAll(3*nion,fion)
1322
1323      value =           BA_pop_stack(xtmp(2))
1324      value = value.and.BA_pop_stack(vtmp(2))
1325      value = value.and.BA_pop_stack(exi(2))
1326      if (.not. value) call errquit('popping stack',0, MA_ERR)
1327
1328      call nwpw_timing_end(5)
1329      end if
1330      return
1331      end
1332
1333
1334
1335*     ***********************************
1336*     *                                 *
1337*     *            psp_pawexist         *
1338*     *                                 *
1339*     ***********************************
1340      logical function psp_pawexist()
1341      implicit none
1342
1343#include "psp.fh"
1344
1345      psp_pawexist = pawexist
1346      return
1347      end
1348
1349*     ***********************************
1350*     *                                 *
1351*     *        psp_paw_use_grid_cmp     *
1352*     *                                 *
1353*     ***********************************
1354      logical function psp_paw_use_grid_cmp()
1355      implicit none
1356
1357#include "psp.fh"
1358
1359      psp_paw_use_grid_cmp = use_grid_cmp
1360      return
1361      end
1362
1363*     ***********************************
1364*     *                                 *
1365*     *         psp_paw_mult_l_max      *
1366*     *                                 *
1367*     ***********************************
1368      integer function psp_paw_mult_l_max()
1369      implicit none
1370
1371      integer  nwpw_xc_mult_l_max,mult_l
1372      external nwpw_xc_mult_l_max
1373
1374      mult_l = nwpw_xc_mult_l_max()
1375      psp_paw_mult_l_max = mult_l
1376      return
1377      end
1378
1379*     ***********************************
1380*     *					*
1381*     *	 	   psp_overlap_S	*
1382*     *					*
1383*     ***********************************
1384
1385*    This routine computes the paw overlap S operator to psi1
1386*      psi2 = S*psi1
1387*
1388      subroutine psp_overlap_S(ispin,neq,psi1,psi2)
1389      implicit none
1390      integer    ispin,neq(2)
1391      complex*16 psi1(*)
1392      complex*16 psi2(*)
1393
1394#include "bafdecls.fh"
1395#include "psp.fh"
1396#include "errquit.fh"
1397
1398*     *** local variables ***
1399      integer npack1,ij
1400      integer ii,ia,iii,l,nn
1401      integer k,shift,l_prj,nproj,Gijl_indx
1402      real*8  omega,scal,scalsqr
1403      integer exi(2),sw1(2),sw2(2)
1404      logical value,sd_function
1405
1406*     **** external functions ****
1407      integer  ion_katm
1408      integer  psi_data_get_ptr
1409      real*8   lattice_omega
1410      external ion_katm
1411      external psi_data_get_ptr
1412      external lattice_omega
1413
1414
1415      call nwpw_timing_start(6)
1416
1417*     **** allocate local memory ****
1418      nn = neq(1)+neq(2)
1419      call Pack_npack(1,npack1)
1420
1421      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
1422      value = value.and.
1423     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
1424      value = value.and.
1425     >        BA_push_get(mt_dbl,nn*nprj_max,'sw2',sw2(2),sw2(1))
1426
1427      if (.not.value)
1428     >  call errquit('psp_overlap_S: out of stack',0,MA_ERR)
1429
1430      omega = lattice_omega()
1431      scal = 1.0d0/(omega)
1432      scalsqr = scal*scal
1433
1434      !call dcopy(2*npack1*nn,psi1,1,psi2,1)
1435      call Parallel_shared_vector_copy(.true.,2*npack1*nn,psi1,psi2)
1436      do iii=1,nion_paw
1437        ii = int_mb(ion_pawtoion(1)+iii-1)
1438        ia = ion_katm(ii)
1439
1440        nproj = int_mb(nprj(1)+ia-1)
1441
1442        if (nproj.gt.0) then
1443
1444*       **** structure factor and local pseudopotential ****
1445        call strfac_pack(1,ii,dcpl_mb(exi(1)))
1446
1447*       **** generate sw1's and projectors ****
1448        do l=1,nproj
1449
1450           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
1451           l_prj = int_mb(l_projector(1)+(l-1)
1452     >                                  + (ia-1)*(nmax_max*lmmax_max))
1453           !sd_function = .not.and(l_prj,1)
1454#ifdef GCC4
1455           k = iand(l_prj,1)
1456#else
1457           k = and(l_prj,1)
1458#endif
1459           sd_function = (k.eq.0)
1460
1461
1462*          **** phase factor does not matter therefore ****
1463*          **** (-i)^l is the same as (i)^l in the     ****
1464*          **** Rayleigh scattering formula            ****
1465
1466*           **** phase fact DOES matter for compensation charge!!!!     ****
1467*           **** assume that sign factor for proj is in kbpp formatting ****
1468
1469*          *** current function is s or d ****
1470           if (sd_function) then
1471              call Pack_tc_Mul(1, dbl_mb(shift),
1472     >                           dcpl_mb(exi(1)),
1473     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
1474*          *** current function is p or f ****
1475           else
1476              call Pack_tc_iMul(1,dbl_mb(shift),
1477     >                           dcpl_mb(exi(1)),
1478     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
1479           end if
1480           call Pack_cc_indot(1,nn,
1481     >                      psi1,
1482     >                      dcpl_mb(prjtmp(1)+(l-1)*npack1),
1483     >                      dbl_mb(sw1(1)+(l-1)*nn))
1484        end do
1485        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
1486
1487
1488*       **** sw2 = Sijl*sw1 ******
1489        Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),2)
1490        call Multiply_Gijl_sw1(nn,
1491     >                         nproj,
1492     >                         int_mb(nmax(1)+ia-1),
1493     >                         int_mb(lmax(1)+ia-1),
1494     >                         int_mb(n_projector(1)
1495     >                                + (ia-1)*(nmax_max*lmmax_max)),
1496     >                         int_mb(l_projector(1)
1497     >                                + (ia-1)*(nmax_max*lmmax_max)),
1498     >                         int_mb(m_projector(1)
1499     >                                + (ia-1)*(nmax_max*lmmax_max)),
1500     >                         dbl_mb(Gijl_indx),
1501     >                         dbl_mb(sw1(1)),
1502     >                         dbl_mb(sw2(1)))
1503
1504*       **** do Kleinman-Bylander Multiplication ****
1505        !scal = 1.0d0/(omega)
1506        !nproj = int_mb(nprj(1)+ia-1)
1507        !call dscal(nn*int_mb(nprj(1)+ia-1),scal,dbl_mb(sw2(1)),1)
1508        call DSCAL_OMP(nn*nproj,scal,dbl_mb(sw2(1)),1)
1509
1510        call DGEMM_OMP('N','T',2*npack1,nn,nproj,
1511     >             (1.0d0),
1512     >             dcpl_mb(prjtmp(1)), 2*npack1,
1513     >             dbl_mb(sw2(1)),     nn,
1514     >             (1.0d0),
1515     >             psi2,               2*npack1)
1516        end if !** nproj>0 **
1517      end do !** iii **
1518
1519      value =           BA_pop_stack(sw2(2))
1520      value = value.and.BA_pop_stack(sw1(2))
1521      value = value.and.BA_pop_stack(exi(2))
1522      if (.not.value) call errquit('psp_overlap_S: popping stack',3,
1523     &       MA_ERR)
1524      call nwpw_timing_end(6)
1525      return
1526      end
1527
1528
1529
1530
1531*     ***********************************
1532*     *					*
1533*     *	    psp_add_paw_extra_overlap1	*
1534*     *					*
1535*     ***********************************
1536
1537*    This routine adds the extra part of the paw overlap S operator to psi1
1538*      S = S + Smatrix, where Smatrix = <psi1| S-I|psi1>
1539*
1540      subroutine psp_add_paw_extra_overlap1(mb,psi1,S)
1541      implicit none
1542      integer    mb
1543      complex*16 psi1(*)
1544      real*8     S(*)
1545
1546#include "bafdecls.fh"
1547#include "errquit.fh"
1548#include "psp.fh"
1549
1550*     *** local variables ***
1551      integer npack1
1552      integer ii,ia,iii,l,nn,ms,shifts,shiftsw
1553      integer k,shift,l_prj,nproj,Gijl_indx,neq(2),ishift
1554      real*8  omega,scal,scalsqr
1555      integer exi(2),sw1(2),sw2(2)
1556      logical value,sd_function
1557
1558*     **** external functions ****
1559      integer  ion_katm
1560      integer  psi_data_get_ptr
1561      real*8   lattice_omega
1562      external ion_katm
1563      external psi_data_get_ptr
1564      external lattice_omega
1565
1566      call nwpw_timing_start(6)
1567
1568
1569cc*     **** S = transpose(psi)*psi ****
1570cc      call Dneall_ffm_sym_Multiply(mb,psi1,psi1,npack1,S)
1571
1572
1573      if (pawexist) then
1574
1575      call Pack_npack(1,npack1)
1576      call Dneall_neq(neq)
1577      if (mb.eq.0) then
1578         nn = neq(1) + neq(2)
1579         ishift = 1
1580      else
1581         nn = neq(mb)
1582         ishift = 1 + (mb-1)*neq(1)*npack1
1583      end if
1584
1585*     **** allocate local memory ****
1586      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
1587      value = value.and.
1588     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
1589      value = value.and.
1590     >        BA_push_get(mt_dbl,nn*nprj_max,'sw2',sw2(2),sw2(1))
1591      if (.not.value)
1592     >  call errquit('psp_paw_extra_overlap1:out of stack',0,MA_ERR)
1593
1594      omega = lattice_omega()
1595      scal    = 1.0d0/(omega)
1596      scalsqr = scal*scal
1597
1598      do iii=1,nion_paw
1599
1600        ii    = int_mb(ion_pawtoion(1)+iii-1)
1601        ia    = ion_katm(ii)
1602        nproj = int_mb(nprj(1)+ia-1)
1603
1604        if (nproj.gt.0) then
1605
1606*       **** structure factor and local pseudopotential ****
1607        call strfac_pack(1,ii,dcpl_mb(exi(1)))
1608
1609*       **** generate sw1's and projectors ****
1610        do l=1,nproj
1611
1612           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
1613           l_prj = int_mb(l_projector(1)+(l-1)
1614     >                                  + (ia-1)*(nmax_max*lmmax_max))
1615           !sd_function = .not.and(l_prj,1)
1616#ifdef GCC4
1617           k = iand(l_prj,1)
1618#else
1619           k = and(l_prj,1)
1620#endif
1621           sd_function = (k.eq.0)
1622
1623
1624*          **** phase factor does not matter therefore ****
1625*          **** (-i)^l is the same as (i)^l in the     ****
1626*          **** Rayleigh scattering formula            ****
1627
1628*          *** current function is s or d ****
1629           if (sd_function) then
1630              call Pack_tc_Mul(1,dbl_mb(shift),
1631     >                           dcpl_mb(exi(1)),
1632     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
1633*          *** current function is p or f ****
1634           else
1635              call Pack_tc_iMul(1,dbl_mb(shift),
1636     >                           dcpl_mb(exi(1)),
1637     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
1638           end if
1639           call Pack_cc_indot(1,nn,
1640     >                      psi1(ishift),
1641     >                      dcpl_mb(prjtmp(1)+(l-1)*npack1),
1642     >                      dbl_mb(sw1(1)+(l-1)*nn))
1643        end do
1644        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
1645
1646*       **** sw2 = Sijl*sw1 ******
1647        Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),2)
1648        call Multiply_Gijl_sw1(nn,
1649     >                         nproj,
1650     >                         int_mb(nmax(1)+ia-1),
1651     >                         int_mb(lmax(1)+ia-1),
1652     >                         int_mb(n_projector(1)
1653     >                                + (ia-1)*(nmax_max*lmmax_max)),
1654     >                         int_mb(l_projector(1)
1655     >                                + (ia-1)*(nmax_max*lmmax_max)),
1656     >                         int_mb(m_projector(1)
1657     >                                + (ia-1)*(nmax_max*lmmax_max)),
1658     >                         dbl_mb(Gijl_indx),
1659     >                         dbl_mb(sw1(1)),
1660     >                         dbl_mb(sw2(1)))
1661
1662*       **** S = S + sw1*transpose(sw2) ****
1663        call Dneall_m_add_sw1sw2(mb,nproj,scal,
1664     >                           dbl_mb(sw1(1)),
1665     >                           dbl_mb(sw2(1)),S)
1666
1667c*       *** routine needs to be parallelized over orbitals ****
1668c        do ms=1,ispin
1669c          shifts  = 1+(ms-1)*ne(1)*ne(1)
1670c          shiftsw =   (ms-1)*ne(1)
1671c          call DGEMM('N','T',
1672c     >              ne(ms),ne(ms),int_mb(nprj(1)+ia-1),
1673c     >              (scal),
1674c     >              dbl_mb(sw1(1)+shiftsw), nn,
1675c     >              dbl_mb(sw2(1)+shiftsw), nn,
1676c     >              (1.0d0),
1677c     >              S(shifts), ne(ms))
1678c        end do
1679
1680
1681        end if !** nproj>0 **
1682      end do !** iii **
1683
1684      value =           BA_pop_stack(sw2(2))
1685      value = value.and.BA_pop_stack(sw1(2))
1686      value = value.and.BA_pop_stack(exi(2))
1687      if (.not.value)
1688     >   call errquit('psp_paw_extra_overlap1: popping stack',3,MA_ERR)
1689      end if
1690      call nwpw_timing_end(6)
1691      return
1692      end
1693
1694
1695*     ***********************************
1696*     *					*
1697*     *	    psp_add_paw_extra_overlap2	*
1698*     *					*
1699*     ***********************************
1700
1701*    This routine adds the extra part of the paw overlap S operator to psi1
1702*      S = S + Smatrix where Smatrix = <psi1|S-I|psi2>
1703*
1704      subroutine psp_add_paw_extra_overlap2(mb,psi1,psi2,S)
1705      implicit none
1706      integer    mb
1707      complex*16 psi1(*)
1708      complex*16 psi2(*)
1709      real*8     S(*)
1710
1711#include "bafdecls.fh"
1712#include "errquit.fh"
1713#include "psp.fh"
1714
1715*     *** local variables ***
1716      integer npack1
1717      integer ii,ia,iii,l,nn,ms,shifts,shiftsw,ishift
1718      integer k,shift,l_prj,nproj,Gijl_indx,neq(2)
1719      real*8  omega,scal,scalsqr
1720      integer exi(2),sw0(2),sw1(2),sw2(2)
1721      logical value,sd_function
1722
1723*     **** external functions ****
1724      integer  ion_katm
1725      integer  psi_data_get_ptr
1726      real*8   lattice_omega
1727      external ion_katm
1728      external psi_data_get_ptr
1729      external lattice_omega
1730
1731      call nwpw_timing_start(6)
1732
1733c*     **** S = transpose(psi)*psi ****
1734c      call Dneall_ffm_Multiply(mb,psi1,psi2,npack1,S)
1735
1736
1737      if (pawexist) then
1738
1739      call Pack_npack(1,npack1)
1740      call Dneall_neq(neq)
1741      if (mb.eq.0) then
1742         nn = neq(1) + neq(2)
1743         ishift = 1
1744      else
1745         nn = neq(mb)
1746         ishift = 1+ (mb-1)*neq(1)*npack1
1747      end if
1748
1749*     **** allocate local memory ****
1750      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
1751      value = value.and.
1752     >        BA_push_get(mt_dbl,nn*nprj_max,'sw0',sw0(2),sw0(1))
1753      value = value.and.
1754     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
1755      value = value.and.
1756     >        BA_push_get(mt_dbl,nn*nprj_max,'sw2',sw2(2),sw2(1))
1757      if (.not.value)
1758     > call errquit('psp_add_paw_extra_overlap2:out of stack',0,MA_ERR)
1759
1760      omega = lattice_omega()
1761      scal    = 1.0d0/(omega)
1762      scalsqr = scal*scal
1763
1764      do iii=1,nion_paw
1765
1766        ii    = int_mb(ion_pawtoion(1)+iii-1)
1767        ia    = ion_katm(ii)
1768        nproj = int_mb(nprj(1)+ia-1)
1769
1770        if (nproj.gt.0) then
1771
1772*       **** structure factor and local pseudopotential ****
1773        call strfac_pack(1,ii,dcpl_mb(exi(1)))
1774
1775*       **** generate sw1's and projectors ****
1776        do l=1,nproj
1777
1778           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
1779           l_prj = int_mb(l_projector(1)+(l-1)
1780     >                                  + (ia-1)*(nmax_max*lmmax_max))
1781           !sd_function = .not.and(l_prj,1)
1782#ifdef GCC4
1783           k = iand(l_prj,1)
1784#else
1785           k = and(l_prj,1)
1786#endif
1787           sd_function = (k.eq.0)
1788
1789
1790*          **** phase factor does not matter therefore ****
1791*          **** (-i)^l is the same as (i)^l in the     ****
1792*          **** Rayleigh scattering formula            ****
1793
1794*          *** current function is s or d ****
1795           if (sd_function) then
1796              call Pack_tc_Mul(1,dbl_mb(shift),
1797     >                           dcpl_mb(exi(1)),
1798     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
1799*          *** current function is p or f ****
1800           else
1801              call Pack_tc_iMul(1,dbl_mb(shift),
1802     >                           dcpl_mb(exi(1)),
1803     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
1804           end if
1805           call Pack_cc_indot(1,nn,
1806     >                        psi1(ishift),
1807     >                        dcpl_mb(prjtmp(1)+(l-1)*npack1),
1808     >                        dbl_mb(sw0(1)+(l-1)*nn))
1809           call Pack_cc_indot(1,nn,
1810     >                        psi2(ishift),
1811     >                        dcpl_mb(prjtmp(1)+(l-1)*npack1),
1812     >                        dbl_mb(sw2(1)+(l-1)*nn))
1813        end do
1814        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw0(1)))
1815        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw2(1)))
1816
1817*       **** sw2 = Sijl*sw1 ******
1818        Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),2)
1819        call Multiply_Gijl_sw1(nn,
1820     >                         nproj,
1821     >                         int_mb(nmax(1)+ia-1),
1822     >                         int_mb(lmax(1)+ia-1),
1823     >                         int_mb(n_projector(1)
1824     >                                + (ia-1)*(nmax_max*lmmax_max)),
1825     >                         int_mb(l_projector(1)
1826     >                                + (ia-1)*(nmax_max*lmmax_max)),
1827     >                         int_mb(m_projector(1)
1828     >                                + (ia-1)*(nmax_max*lmmax_max)),
1829     >                         dbl_mb(Gijl_indx),
1830     >                         dbl_mb(sw0(1)),
1831     >                         dbl_mb(sw1(1)))
1832
1833*       **** S = S + sw1*transpose(sw2) ****
1834        call Dneall_m_add_sw1sw2(mb,nproj,scal,
1835     >                           dbl_mb(sw1(1)),
1836     >                           dbl_mb(sw2(1)),S)
1837
1838c*       *** routine needs to be parallelized over orbitals ****
1839c        do ms=1,ispin
1840c          shifts  = 1+(ms-1)*ne(1)*ne(1)
1841c          shiftsw =   (ms-1)*ne(1)
1842c          call DGEMM('N','T',
1843c     >              ne(ms),ne(ms),int_mb(nprj(1)+ia-1),
1844c     >              (scal),
1845c     >              dbl_mb(sw1(1)+shiftsw), nn,
1846c     >              dbl_mb(sw2(1)+shiftsw), nn,
1847c     >              (1.0d0),
1848c     >              S(shifts), ne(ms))
1849c        end do
1850
1851
1852        end if !** nproj>0 **
1853      end do !** iii **
1854
1855      value =           BA_pop_stack(sw2(2))
1856      value = value.and.BA_pop_stack(sw1(2))
1857      value = value.and.BA_pop_stack(sw0(2))
1858      value = value.and.BA_pop_stack(exi(2))
1859      if (.not.value)
1860     > call errquit('psp_add_paw_extra_overlap2:popping stack',3,MA_ERR)
1861      end if
1862      call nwpw_timing_end(6)
1863      return
1864      end
1865
1866
1867
1868
1869*     ***********************************
1870*     *					*
1871*     *	    psp_paw_overlap_fion	*
1872*     *					*
1873*     ***********************************
1874
1875*    This routine adds the extra part of the paw overlap S operator to psi1
1876*      S = S + Smatrix where Smatrix = <psi1|S-I|psi2>
1877*
1878      subroutine psp_paw_overlap_fion(ispin,lmbda,psi1,fion)
1879      implicit none
1880      integer    ispin
1881      real*8     lmbda(*)
1882      complex*16 psi1(*)
1883      real*8     fion(3,*)
1884
1885#include "bafdecls.fh"
1886#include "errquit.fh"
1887#include "psp.fh"
1888
1889*     *** local variables ***
1890      integer npack1
1891      integer ii,ia,iii,l,nn,ms,shifts,shiftsw
1892      integer k,shift,l_prj,nproj,Gijl_indx,neq(2)
1893      real*8  omega,scal,scalsqr,ff(3)
1894      integer exi(2),sw1(2),sw2(2),sw1x(2),sw1y(2),sw1z(2)
1895      integer S12x(2),S12y(2),S12z(2),Gx,Gy,Gz
1896      logical value,sd_function
1897
1898*     **** external functions ****
1899      logical  Dneall_m_push_get,Dneall_m_pop_stack
1900      integer  ion_katm,Pack_G_indx
1901      integer  psi_data_get_ptr
1902      real*8   lattice_omega
1903      external Dneall_m_push_get,Dneall_m_pop_stack
1904      external ion_katm,Pack_G_indx
1905      external psi_data_get_ptr
1906      external lattice_omega
1907
1908
1909      call nwpw_timing_start(6)
1910      if (pawexist) then
1911
1912      call Pack_npack(1,npack1)
1913      call Dneall_neq(neq)
1914      nn = neq(1) + neq(2)
1915
1916
1917*     **** allocate local memory ****
1918      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
1919      value = value.and.
1920     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
1921      value = value.and.
1922     >        BA_push_get(mt_dbl,nn*nprj_max,'sw2',sw2(2),sw2(1))
1923      value = value.and.
1924     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1x',sw1x(2),sw1x(1))
1925      value = value.and.
1926     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1y',sw1y(2),sw1y(1))
1927      value = value.and.
1928     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1z',sw1z(2),sw1z(1))
1929      value = value.and.Dneall_m_push_get(0,S12x)
1930      value = value.and.Dneall_m_push_get(0,S12y)
1931      value = value.and.Dneall_m_push_get(0,S12z)
1932      if (.not.value)
1933     > call errquit('psp_paw_overlap_fion:out of stack',0,MA_ERR)
1934
1935      Gx = Pack_G_indx(1,1)
1936      Gy = Pack_G_indx(1,2)
1937      Gz = Pack_G_indx(1,3)
1938
1939      omega = lattice_omega()
1940      scal    = 1.0d0/(omega)
1941      scalsqr = scal*scal
1942
1943      do iii=1,nion_paw
1944
1945        ii    = int_mb(ion_pawtoion(1)+iii-1)
1946        ia    = ion_katm(ii)
1947        nproj = int_mb(nprj(1)+ia-1)
1948
1949        if (nproj.gt.0) then
1950
1951*       **** structure factor and local pseudopotential ****
1952        call strfac_pack(1,ii,dcpl_mb(exi(1)))
1953
1954*       **** generate sw1's and projectors ****
1955        do l=1,nproj
1956
1957           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
1958           l_prj = int_mb(l_projector(1)+(l-1)
1959     >                                  + (ia-1)*(nmax_max*lmmax_max))
1960#ifdef GCC4
1961           k = iand(l_prj,1)
1962#else
1963           k = and(l_prj,1)
1964#endif
1965           sd_function = (k.eq.0)
1966
1967
1968*          **** phase factor does not matter therefore ****
1969*          **** (-i)^l is the same as (i)^l in the     ****
1970*          **** Rayleigh scattering formula            ****
1971
1972*          *** current function is s or d ****
1973           if (sd_function) then
1974              call Pack_tc_Mul(1,dbl_mb(shift),
1975     >                           dcpl_mb(exi(1)),
1976     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
1977*          *** current function is p or f ****
1978           else
1979              call Pack_tc_iMul(1,dbl_mb(shift),
1980     >                           dcpl_mb(exi(1)),
1981     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
1982           end if
1983           call Pack_cc_indot(1,nn,
1984     >                        psi1,
1985     >                        dcpl_mb(prjtmp(1)+(l-1)*npack1),
1986     >                        dbl_mb(sw1(1)+(l-1)*nn))
1987
1988           call Pack_conjg_tcc_indot(1,nn,
1989     >                        dbl_mb(Gx),
1990     >                        psi1,
1991     >                        dcpl_mb(prjtmp(1)+(l-1)*npack1),
1992     >                        dbl_mb(sw1x(1)+(l-1)*nn))
1993           call Pack_conjg_tcc_indot(1,nn,
1994     >                        dbl_mb(Gy),
1995     >                        psi1,
1996     >                        dcpl_mb(prjtmp(1)+(l-1)*npack1),
1997     >                        dbl_mb(sw1y(1)+(l-1)*nn))
1998           call Pack_conjg_tcc_indot(1,nn,
1999     >                        dbl_mb(Gz),
2000     >                        psi1,
2001     >                        dcpl_mb(prjtmp(1)+(l-1)*npack1),
2002     >                        dbl_mb(sw1z(1)+(l-1)*nn))
2003
2004        end do
2005        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
2006        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1x(1)))
2007        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1y(1)))
2008        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1z(1)))
2009
2010*       **** sw2 = Sijl*sw1 ******
2011        Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),2)
2012        call Multiply_Gijl_sw1(nn,
2013     >                         nproj,
2014     >                         int_mb(nmax(1)+ia-1),
2015     >                         int_mb(lmax(1)+ia-1),
2016     >                         int_mb(n_projector(1)
2017     >                                + (ia-1)*(nmax_max*lmmax_max)),
2018     >                         int_mb(l_projector(1)
2019     >                                + (ia-1)*(nmax_max*lmmax_max)),
2020     >                         int_mb(m_projector(1)
2021     >                                + (ia-1)*(nmax_max*lmmax_max)),
2022     >                         dbl_mb(Gijl_indx),
2023     >                         dbl_mb(sw1(1)),
2024     >                         dbl_mb(sw2(1)))
2025
2026        if (ispin.eq.1) call dscal_omp(nn*nproj,2.0d0,dbl_mb(sw2(1)),1)
2027
2028*       **** Sx = Sx + sw1x*transpose(sw2) ****
2029        call Dneall_m_zero(0,dbl_mb(S12x(1)))
2030        call Dneall_m_add_sw1sw2(0,nproj,scal,
2031     >                           dbl_mb(sw1x(1)),
2032     >                           dbl_mb(sw2(1)),
2033     >                           dbl_mb(S12x(1)))
2034*       **** Sy = Sy + sw1y*transpose(sw2) ****
2035        call Dneall_m_zero(0,dbl_mb(S12y(1)))
2036        call Dneall_m_add_sw1sw2(0,nproj,scal,
2037     >                           dbl_mb(sw1y(1)),
2038     >                           dbl_mb(sw2(1)),
2039     >                           dbl_mb(S12y(1)))
2040*       **** Sz = Sz + sw1z*transpose(sw2) ****
2041        call Dneall_m_zero(0,dbl_mb(S12z(1)))
2042        call Dneall_m_add_sw1sw2(0,nproj,scal,
2043     >                           dbl_mb(sw1z(1)),
2044     >                           dbl_mb(sw2(1)),
2045     >                           dbl_mb(S12z(1)))
2046!$OMP MASTER
2047        call Dneall_mm_sum(0,lmbda,dbl_mb(S12x(1)),ff(1))
2048        call Dneall_mm_sum(0,lmbda,dbl_mb(S12y(1)),ff(2))
2049        call Dneall_mm_sum(0,lmbda,dbl_mb(S12z(1)),ff(3))
2050
2051        fion(1,ii) = fion(1,ii)  - 2.0d0*ff(1)
2052        fion(2,ii) = fion(2,ii)  - 2.0d0*ff(2)
2053        fion(3,ii) = fion(3,ii)  - 2.0d0*ff(3)
2054!$OMP END MASTER
2055!$OMP BARRIER
2056
2057        end if !** nproj>0 **
2058
2059      end do !** iii **
2060
2061      value =           Dneall_m_pop_stack(S12z)
2062      value = value.and.Dneall_m_pop_stack(S12y)
2063      value = value.and.Dneall_m_pop_stack(S12x)
2064      value = value.and.BA_pop_stack(sw1z(2))
2065      value = value.and.BA_pop_stack(sw1y(2))
2066      value = value.and.BA_pop_stack(sw1x(2))
2067      value = value.and.BA_pop_stack(sw2(2))
2068      value = value.and.BA_pop_stack(sw1(2))
2069      value = value.and.BA_pop_stack(exi(2))
2070      if (.not.value)
2071     > call errquit('psp_paw_overlap_fion:popping stack',3,MA_ERR)
2072
2073      end if
2074      call nwpw_timing_end(6)
2075
2076
2077      return
2078      end
2079
2080
2081
2082
2083
2084c*     ***********************************
2085c*     *					*
2086c*     *	       psp_overlap_orb	        *
2087c*     *					*
2088c*     ***********************************
2089c
2090c*    This routine computes the paw overlap S operator to psi1
2091c*      psi2 = S*psi1
2092c*
2093c      subroutine psp_overlap_orb(n,psi1,S)
2094c      implicit none
2095c      integer    n
2096c      complex*16 psi1(*)
2097c      real*8     S(*)
2098c
2099c#include "bafdecls.fh"
2100c#include "errquit.fh"
2101c#include "psp.fh"
2102c
2103c*     *** local variables ***
2104c      integer npack1
2105c      integer ii,ia,iii,l
2106c      integer k,shift,l_prj,nproj,Gijl_indx
2107c      real*8  omega,scal,scalsqr
2108c      integer exi(2),sw1(2),sw2(2)
2109c      logical value,sd_function
2110c
2111c*     **** external functions ****
2112c      integer  ion_katm
2113c      integer  psi_data_get_ptr
2114c      real*8   lattice_omega
2115c      external ion_katm
2116c      external psi_data_get_ptr
2117c      external lattice_omega
2118c
2119c      if (pawexist) then
2120c      call nwpw_timing_start(6)
2121c
2122c*     **** S = transpose(psi)*psi ****
2123c      call Pack_ccm_sym_dot(1,n,psi1,psi1,S)
2124c
2125c
2126c*     **** allocate local memory ****
2127c      call Pack_npack(1,npack1)
2128c      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
2129c      value = value.and.
2130c     >        BA_push_get(mt_dbl,n*nprj_max,'sw1',sw1(2),sw1(1))
2131c      value = value.and.
2132c     >        BA_push_get(mt_dbl,n*nprj_max,'sw2',sw2(2),sw2(1))
2133c      if (.not.value)
2134c     >  call errquit('psp_overlap_orb: out of stack',0,MA_ERR)
2135c
2136c      omega = lattice_omega()
2137c      scal    = 1.0d0/(omega)
2138c      scalsqr = scal*scal
2139c
2140c
2141c      do iii=1,nion_paw
2142c        ii    = int_mb(ion_pawtoion(1)+iii-1)
2143c        ia    = ion_katm(ii)
2144c        nproj = int_mb(nprj(1)+ia-1)
2145c
2146c        if (nproj.gt.0) then
2147c
2148c*       **** structure factor and local pseudopotential ****
2149c        call strfac_pack(1,ii,dcpl_mb(exi(1)))
2150c
2151c*       **** generate sw1's and projectors ****
2152c        do l=1,nproj
2153c
2154c           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
2155c           l_prj = int_mb(l_projector(1)+(l-1)
2156c     >                                  + (ia-1)*(nmax_max*lmmax_max))
2157c           !sd_function = .not.and(l_prj,1)
2158c#ifdef GCC4
2159c           k = iand(l_prj,1)
2160c#else
2161c           k = and(l_prj,1)
2162c#endif
2163c           sd_function = (k.eq.0)
2164c
2165c
2166c*          **** phase factor does not matter therefore ****
2167c*          **** (-i)^l is the same as (i)^l in the     ****
2168c*          **** Rayleigh scattering formula            ****
2169c
2170c*          *** current function is s or d ****
2171c           if (sd_function) then
2172c              call Pack_tc_Mul(1,dbl_mb(shift),
2173c     >                           dcpl_mb(exi(1)),
2174c     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
2175c*          *** current function is p or f ****
2176c           else
2177c              call Pack_tc_iMul(1,dbl_mb(shift),
2178c     >                           dcpl_mb(exi(1)),
2179c     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
2180c           end if
2181c           call Pack_cc_indot(1,n,
2182c     >                      psi1,
2183c     >                      dcpl_mb(prjtmp(1)+(l-1)*npack1),
2184c     >                      dbl_mb(sw1(1)+(l-1)*n))
2185c
2186c        end do
2187c        call D3dB_Vector_SumAll((n*nproj),dbl_mb(sw1(1)))
2188c
2189c
2190c*       **** sw2 = Sijl*sw1 ******
2191c        Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),2)
2192c        call Multiply_Gijl_sw1(n,
2193c     >                         nproj,
2194c     >                         int_mb(nmax(1)+ia-1),
2195c     >                         int_mb(lmax(1)+ia-1),
2196c     >                         int_mb(n_projector(1)
2197c     >                                + (ia-1)*(nmax_max*lmmax_max)),
2198c     >                         int_mb(l_projector(1)
2199c     >                                + (ia-1)*(nmax_max*lmmax_max)),
2200c     >                         int_mb(m_projector(1)
2201c     >                                + (ia-1)*(nmax_max*lmmax_max)),
2202c     >                         dbl_mb(Gijl_indx),
2203c     >                         dbl_mb(sw1(1)),
2204c     >                         dbl_mb(sw2(1)))
2205c
2206c*       *** routine needs to be parallelized over orbitals ****
2207c*       **** S = S + sw1*transpose(sw2) ****
2208c        call DGEMM('N','T',n,n,int_mb(nprj(1)+ia-1),
2209c     >              (scal),
2210c     >              dbl_mb(sw1(1)), n,
2211c     >              dbl_mb(sw2(1)), n,
2212c     >              (1.0d0),
2213c     >              S, n)
2214c
2215c        end if !** nproj>0 **
2216c      end do !** ii **
2217c
2218c      value =           BA_pop_stack(sw2(2))
2219c      value = value.and.BA_pop_stack(sw1(2))
2220c      value = value.and.BA_pop_stack(exi(2))
2221c      if (.not.value) call errquit('psp_overlap_orb: popping stack',3,
2222c     &       MA_ERR)
2223c      call nwpw_timing_end(6)
2224c      end if
2225c      return
2226c      end
2227c
2228
2229
2230
2231*     ***********************************
2232*     *					*
2233*     *	        psp_kinetic_core   	*
2234*     *					*
2235*     ***********************************
2236*
2237*    This routine returns the paw kinetic energy for the core density
2238*
2239      real*8 function psp_kinetic_core()
2240      implicit none
2241
2242#include "bafdecls.fh"
2243#include "psp.fh"
2244
2245*     *** local variables ***
2246      integer ii,ia,iii
2247      real*8  ecore
2248
2249*     **** external functions ****
2250      integer  ion_katm
2251      external ion_katm
2252
2253      ecore = 0.0d0
2254      if (pawexist) then
2255         do iii=1,nion_paw
2256           ii    = int_mb(ion_pawtoion(1)+iii-1)
2257           ia    = ion_katm(ii)
2258           ecore = ecore + dbl_mb(core_kin(1)+ia-1)
2259         end do
2260      end if
2261
2262      psp_kinetic_core = ecore
2263      return
2264      end
2265
2266*     ***********************************
2267*     *                                 *
2268*     *         psp_ion_core            *
2269*     *                                 *
2270*     ***********************************
2271*
2272*    This routine returns the paw ion-core energy
2273*
2274      real*8 function psp_ion_core()
2275      implicit none
2276
2277#include "bafdecls.fh"
2278#include "psp.fh"
2279
2280*     *** local variables ***
2281      integer ii,ia,iii
2282      real*8  ecore
2283
2284*     **** external functions ****
2285      integer  ion_katm
2286      external ion_katm
2287
2288      ecore = 0.0d0
2289      if (pawexist) then
2290         do iii=1,nion_paw
2291            ii    = int_mb(ion_pawtoion(1)+iii-1)
2292            ia    = ion_katm(ii)
2293            ecore = ecore + dbl_mb(core_ion(1)+ia-1)
2294         end do
2295      end if
2296
2297      psp_ion_core = ecore
2298      return
2299      end
2300
2301
2302*     ***********************************
2303*     *					*
2304*     *	        psp_kinetic_atom	*
2305*     *					*
2306*     ***********************************
2307
2308*    This routine computes the paw atomic kinetic energy
2309*
2310      real*8 function psp_kinetic_atom(ispin,ne,psi1)
2311      implicit none
2312      integer    ispin,ne(2)
2313      complex*16 psi1(*)
2314
2315#include "bafdecls.fh"
2316#include "errquit.fh"
2317#include "psp.fh"
2318
2319*     *** local variables ***
2320      integer npack1
2321      integer ii,ia,iii,l,nn
2322      integer k,shift,l_prj,nproj,Gijl_indx
2323      real*8  omega,scal,scalsqr
2324      integer exi(2),sw1(2),sw2(2)
2325      logical value,sd_function
2326
2327      real*8  kinetic_atom
2328
2329*     **** external functions ****
2330      integer  ion_katm
2331      integer  psi_data_get_ptr
2332      real*8   lattice_omega
2333      external ion_katm
2334      external psi_data_get_ptr
2335      external lattice_omega
2336
2337      kinetic_atom = 0.0d0
2338      if (pawexist) then
2339
2340      nn = ne(1)+ne(2)
2341
2342*     **** allocate local memory ****
2343      call Pack_npack(1,npack1)
2344
2345      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
2346      value = value.and.
2347     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
2348      value = value.and.
2349     >        BA_push_get(mt_dbl,nn*nprj_max,'sw2',sw2(2),sw2(1))
2350      if (.not.value)
2351     >  call errquit('psp_overlap_orb: out of stack',0,MA_ERR)
2352
2353      omega = lattice_omega()
2354      scal    = 1.0d0/(omega)
2355      scalsqr = scal*scal
2356
2357
2358      do iii=1,nion_paw
2359        ii    = int_mb(ion_pawtoion(1)+iii-1)
2360        ia    = ion_katm(ii)
2361        nproj = int_mb(nprj(1)+ia-1)
2362
2363        if (nproj.gt.0) then
2364
2365*       **** structure factor and local pseudopotential ****
2366        call strfac_pack(1,ii,dcpl_mb(exi(1)))
2367
2368*       **** generate sw1's and projectors ****
2369        do l=1,nproj
2370
2371           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
2372           l_prj = int_mb(l_projector(1)+(l-1)
2373     >                                  + (ia-1)*(nmax_max*lmmax_max))
2374           !sd_function = .not.and(l_prj,1)
2375#ifdef GCC4
2376           k = iand(l_prj,1)
2377#else
2378           k = and(l_prj,1)
2379#endif
2380           sd_function = (k.eq.0)
2381
2382
2383*          **** phase factor does not matter therefore ****
2384*          **** (-i)^l is the same as (i)^l in the     ****
2385*          **** Rayleigh scattering formula            ****
2386
2387*          *** current function is s or d ****
2388           if (sd_function) then
2389              call Pack_tc_Mul(1,dbl_mb(shift),
2390     >                           dcpl_mb(exi(1)),
2391     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
2392*          *** current function is p or f ****
2393           else
2394              call Pack_tc_iMul(1,dbl_mb(shift),
2395     >                           dcpl_mb(exi(1)),
2396     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
2397           end if
2398           call Pack_cc_indot(1,nn,
2399     >                      psi1,
2400     >                      dcpl_mb(prjtmp(1)+(l-1)*npack1),
2401     >                      dbl_mb(sw1(1)+(l-1)*nn))
2402
2403        end do
2404        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
2405
2406
2407*       **** sw2 = Tijl*sw1 ******
2408        Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),3)
2409        call Multiply_Gijl_sw1(nn,
2410     >                         nproj,
2411     >                         int_mb(nmax(1)+ia-1),
2412     >                         int_mb(lmax(1)+ia-1),
2413     >                         int_mb(n_projector(1)
2414     >                                + (ia-1)*(nmax_max*lmmax_max)),
2415     >                         int_mb(l_projector(1)
2416     >                                + (ia-1)*(nmax_max*lmmax_max)),
2417     >                         int_mb(m_projector(1)
2418     >                                + (ia-1)*(nmax_max*lmmax_max)),
2419     >                         dbl_mb(Gijl_indx),
2420     >                         dbl_mb(sw1(1)),
2421     >                         dbl_mb(sw2(1)))
2422
2423*       **** keatom = transpose(sw1)*sw2) ****
2424        do l=0,(nn*nproj-1)
2425         kinetic_atom = kinetic_atom+dbl_mb(sw1(1)+l)*dbl_mb(sw2(1)+l)
2426        end do
2427
2428        end if !** nproj>0 **
2429      end do !** ii **
2430
2431      value =           BA_pop_stack(sw2(2))
2432      value = value.and.BA_pop_stack(sw1(2))
2433      value = value.and.BA_pop_stack(exi(2))
2434      if (.not.value) call errquit('psp_overlap_orb: popping stack',3,
2435     &       MA_ERR)
2436
2437      if (ispin.eq.1) kinetic_atom = kinetic_atom+kinetic_atom
2438      kinetic_atom = kinetic_atom*scal
2439
2440      end if
2441      call D1dB_SumAll(kinetic_atom)
2442      psp_kinetic_atom = kinetic_atom
2443      return
2444      end
2445
2446
2447*     ***********************************
2448*     *					*
2449*     *	     psp_valence_core_atom	*
2450*     *					*
2451*     ***********************************
2452*
2453*    This routine computes the paw atomic valence_core energy
2454*
2455      real*8 function psp_valence_core_atom(ispin,ne,psi1)
2456      implicit none
2457      integer    ispin,ne(2)
2458      complex*16 psi1(*)
2459
2460#include "bafdecls.fh"
2461#include "errquit.fh"
2462#include "psp.fh"
2463
2464*     *** local variables ***
2465      integer npack1
2466      integer ii,ia,iii,l,nn
2467      integer k,shift,l_prj,nproj,Gijl_indx
2468      real*8  omega,scal,scalsqr
2469      integer exi(2),sw1(2),sw2(2)
2470      logical value,sd_function
2471
2472      real*8  valence_core_atom
2473
2474*     **** external functions ****
2475      integer  ion_katm
2476      integer  psi_data_get_ptr
2477      real*8   lattice_omega
2478      external ion_katm
2479      external psi_data_get_ptr
2480      external lattice_omega
2481
2482      valence_core_atom = 0.0d0
2483      if (pawexist) then
2484
2485      nn = ne(1)+ne(2)
2486
2487*     **** allocate local memory ****
2488      call Pack_npack(1,npack1)
2489      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
2490      value = value.and.
2491     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
2492      value = value.and.
2493     >        BA_push_get(mt_dbl,nn*nprj_max,'sw2',sw2(2),sw2(1))
2494      if (.not.value)
2495     >  call errquit('psp_overlap_orb: out of stack',0,MA_ERR)
2496
2497      omega = lattice_omega()
2498      scal    = 1.0d0/(omega)
2499      scalsqr = scal*scal
2500
2501
2502      do iii=1,nion_paw
2503        ii    = int_mb(ion_pawtoion(1)+iii-1)
2504        ia    = ion_katm(ii)
2505        nproj = int_mb(nprj(1)+ia-1)
2506
2507        if (nproj.gt.0) then
2508
2509*       **** structure factor and local pseudopotential ****
2510        call strfac_pack(1,ii,dcpl_mb(exi(1)))
2511
2512*       **** generate sw1's and projectors ****
2513        do l=1,nproj
2514
2515           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
2516           l_prj = int_mb(l_projector(1)+(l-1)
2517     >                                  + (ia-1)*(nmax_max*lmmax_max))
2518           !sd_function = .not.and(l_prj,1)
2519#ifdef GCC4
2520           k = iand(l_prj,1)
2521#else
2522           k = and(l_prj,1)
2523#endif
2524           sd_function = (k.eq.0)
2525
2526
2527*          **** phase factor does not matter therefore ****
2528*          **** (-i)^l is the same as (i)^l in the     ****
2529*          **** Rayleigh scattering formula            ****
2530
2531*          *** current function is s or d ****
2532           if (sd_function) then
2533              call Pack_tc_Mul(1,dbl_mb(shift),
2534     >                           dcpl_mb(exi(1)),
2535     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
2536*          *** current function is p or f ****
2537           else
2538              call Pack_tc_iMul(1,dbl_mb(shift),
2539     >                           dcpl_mb(exi(1)),
2540     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
2541           end if
2542           call Pack_cc_indot(1,nn,
2543     >                      psi1,
2544     >                      dcpl_mb(prjtmp(1)+(l-1)*npack1),
2545     >                      dbl_mb(sw1(1)+(l-1)*nn))
2546
2547        end do
2548        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
2549
2550
2551*       **** sw2 = Vcoreijl*sw1 ******
2552        Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),5)
2553        call Multiply_Gijl_sw1(nn,
2554     >                         nproj,
2555     >                         int_mb(nmax(1)+ia-1),
2556     >                         int_mb(lmax(1)+ia-1),
2557     >                         int_mb(n_projector(1)
2558     >                                + (ia-1)*(nmax_max*lmmax_max)),
2559     >                         int_mb(l_projector(1)
2560     >                                + (ia-1)*(nmax_max*lmmax_max)),
2561     >                         int_mb(m_projector(1)
2562     >                                + (ia-1)*(nmax_max*lmmax_max)),
2563     >                         dbl_mb(Gijl_indx),
2564     >                         dbl_mb(sw1(1)),
2565     >                         dbl_mb(sw2(1)))
2566
2567*       **** keatom = transpose(sw1)*sw2) ****
2568        do l=0,(nn*nproj-1)
2569           valence_core_atom = valence_core_atom
2570     >                       + dbl_mb(sw1(1)+l)*dbl_mb(sw2(1)+l)
2571        end do
2572
2573        end if !** nproj>0 **
2574      end do !** ii **
2575
2576      value =           BA_pop_stack(sw2(2))
2577      value = value.and.BA_pop_stack(sw1(2))
2578      value = value.and.BA_pop_stack(exi(2))
2579      if (.not.value) call errquit('psp_overlap_orb: popping stack',3,
2580     &       MA_ERR)
2581
2582      if (ispin.eq.1)
2583     >   valence_core_atom = valence_core_atom+valence_core_atom
2584      valence_core_atom = valence_core_atom*scal
2585      end if
2586      call D1dB_SumAll(valence_core_atom)
2587
2588      psp_valence_core_atom = valence_core_atom
2589      return
2590      end
2591
2592
2593
2594
2595
2596*     ***********************************
2597*     *					*
2598*     *	        psp_vloc_atom		*
2599*     *					*
2600*     ***********************************
2601
2602*    This routine computes the paw atomic local psp energy
2603*
2604      real*8 function psp_vloc_atom(ispin,ne,psi1)
2605      implicit none
2606      integer    ispin,ne(2)
2607      complex*16 psi1(*)
2608
2609#include "bafdecls.fh"
2610#include "errquit.fh"
2611#include "psp.fh"
2612
2613*     *** local variables ***
2614      integer npack1
2615      integer ii,ia,iii,l,nn
2616      integer k,shift,l_prj,nproj,Gijl_indx
2617      real*8  omega,scal,scalsqr
2618      integer exi(2),sw1(2),sw2(2)
2619      logical value,sd_function
2620
2621      real*8  vloc_atom,vloc_atom0
2622      common /vloc_atom_tmp/ vloc_atom0
2623
2624      integer i,j,n,li,lj,mi,mj,ni,nj
2625      real*8 w,ee
2626
2627*     **** external functions ****
2628      integer  ion_katm
2629      integer  psi_data_get_ptr
2630      real*8   lattice_omega
2631      external ion_katm
2632      external psi_data_get_ptr
2633      external lattice_omega
2634
2635      vloc_atom = 0.0d0
2636      if (pawexist) then
2637
2638      nn = ne(1)+ne(2)
2639
2640*     **** allocate local memory ****
2641      call Pack_npack(1,npack1)
2642      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
2643      value = value.and.
2644     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
2645      value = value.and.
2646     >        BA_push_get(mt_dbl,nn*nprj_max,'sw2',sw2(2),sw2(1))
2647      if (.not.value)
2648     >  call errquit('psp_overlap_orb: out of stack',0,MA_ERR)
2649
2650      omega = lattice_omega()
2651      scal    = 1.0d0/(omega)
2652      scalsqr = scal*scal
2653
2654
2655      do iii=1,nion_paw
2656        ii    = int_mb(ion_pawtoion(1)+iii-1)
2657        ia    = ion_katm(ii)
2658        nproj = int_mb(nprj(1)+ia-1)
2659
2660        if (nproj.gt.0) then
2661
2662*       **** structure factor and local pseudopotential ****
2663        call strfac_pack(1,ii,dcpl_mb(exi(1)))
2664
2665*       **** generate sw1's and projectors ****
2666        do l=1,nproj
2667
2668           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
2669           l_prj = int_mb(l_projector(1)+(l-1)
2670     >                                  + (ia-1)*(nmax_max*lmmax_max))
2671           !sd_function = .not.and(l_prj,1)
2672#ifdef GCC4
2673           k = iand(l_prj,1)
2674#else
2675           k = and(l_prj,1)
2676#endif
2677           sd_function = (k.eq.0)
2678
2679
2680*          **** phase factor does not matter therefore ****
2681*          **** (-i)^l is the same as (i)^l in the     ****
2682*          **** Rayleigh scattering formula            ****
2683
2684*          *** current function is s or d ****
2685           if (sd_function) then
2686              call Pack_tc_Mul(1,dbl_mb(shift),
2687     >                           dcpl_mb(exi(1)),
2688     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
2689*          *** current function is p or f ****
2690           else
2691              call Pack_tc_iMul(1,dbl_mb(shift),
2692     >                           dcpl_mb(exi(1)),
2693     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
2694           end if
2695           call Pack_cc_indot(1,nn,
2696     >                      psi1,
2697     >                      dcpl_mb(prjtmp(1)+(l-1)*npack1),
2698     >                      dbl_mb(sw1(1)+(l-1)*nn))
2699
2700        end do
2701        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
2702
2703        Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),4)
2704c        ee = 0.0d0
2705c        do i=1,nproj
2706c        do j=1,nproj
2707c           w = 0.0d0
2708c           do n=1,nn
2709c              w = w + dbl_mb(sw1(1)+n-1+nn*(i-1))
2710c     >               *dbl_mb(sw1(1)+n-1+nn*(j-1))
2711c           end do
2712c           ni = int_mb(n_projector(1)+(i-1)
2713c     >                 + (ia-1)*(nmax_max*lmmax_max))
2714c           li = int_mb(l_projector(1)+(i-1)
2715c     >                 + (ia-1)*(nmax_max*lmmax_max))
2716c           mi = int_mb(m_projector(1)+(i-1)
2717c     >                 + (ia-1)*(nmax_max*lmmax_max))
2718c           nj = int_mb(n_projector(1)+(j-1)
2719c     >                 + (ia-1)*(nmax_max*lmmax_max))
2720c           lj = int_mb(l_projector(1)+(j-1)
2721c     >                 + (ia-1)*(nmax_max*lmmax_max))
2722c           mj = int_mb(l_projector(1)+(j-1)
2723c     >                 + (ia-1)*(nmax_max*lmmax_max))
2724c
2725c          if ((li.eq.lj).and.(mi.eq.mj)) then
2726c           ee = ee + w*scal*dbl_mb(Gijl_indx+(ni-1)
2727c     >             +(nj-1)*int_mb(nmax(1)+ia-1)
2728c     >             + li*int_mb(nmax(1)+ia-1)**2)
2729c
2730c          end if
2731c
2732c        end do
2733c        end do
2734
2735
2736*       **** sw2 = Tijl*sw1 ******
2737        Gijl_indx = psi_data_get_ptr(int_mb(Gijl(1)+ia-1),4)
2738        call Multiply_Gijl_sw1(nn,
2739     >                         nproj,
2740     >                         int_mb(nmax(1)+ia-1),
2741     >                         int_mb(lmax(1)+ia-1),
2742     >                         int_mb(n_projector(1)
2743     >                                + (ia-1)*(nmax_max*lmmax_max)),
2744     >                         int_mb(l_projector(1)
2745     >                                + (ia-1)*(nmax_max*lmmax_max)),
2746     >                         int_mb(m_projector(1)
2747     >                                + (ia-1)*(nmax_max*lmmax_max)),
2748     >                         dbl_mb(Gijl_indx),
2749     >                         dbl_mb(sw1(1)),
2750     >                         dbl_mb(sw2(1)))
2751
2752*       **** vloc_atom = transpose(sw1)*sw2) ****
2753!$OMP MASTER
2754        vloc_atom0 = 0.0d0
2755!$OMP END MASTER
2756!$OMP BARRIER
2757!$OMP DO REDUCTION(+:vloc_atom0)
2758        do l=0,(nn*nproj-1)
2759         vloc_atom0 = vloc_atom0+dbl_mb(sw1(1)+l)*dbl_mb(sw2(1)+l)
2760        end do
2761!$OMP END DO
2762        vloc_atom = vloc_atom + vloc_atom0
2763
2764        end if !** nproj>0 **
2765      end do !** ii **
2766
2767      value =           BA_pop_stack(sw2(2))
2768      value = value.and.BA_pop_stack(sw1(2))
2769      value = value.and.BA_pop_stack(exi(2))
2770      if (.not.value) call errquit('psp_vloc_atom:pop stack',3,MA_ERR)
2771
2772      if (ispin.eq.1) vloc_atom = vloc_atom+vloc_atom
2773      vloc_atom = vloc_atom*scal
2774      end if
2775
2776      call D1dB_SumAll(vloc_atom)
2777
2778      psp_vloc_atom = vloc_atom
2779      return
2780      end
2781
2782*     *************************************************
2783*     *                                               *
2784*     *              psp_ncmp_vloc                    *
2785*     *                                               *
2786*     *************************************************
2787*
2788*    This routine calulates the hartree energy of the compensation charge density.
2789*
2790*                  /                     /                           /
2791*   E_ncmp_vloc =  | ncmp(r)*Vl1(r) dr = | ncmp_smooth(r)*Vl1(r)dr + | (ncmp-ncmp_smooth)*Vl1(r) dr
2792*                  /                     /                           /
2793*
2794*                  /
2795*                = | ncmp_smooth(r)*Vl1(r) dr
2796*                  /
2797*
2798*                  /
2799*                + | (ncmp(r)-ncmp_smooth(r))*(Vl1(r)-Vl2(r)) dr
2800*                  /
2801*
2802*                  /
2803*                + | (ncmp(r)-ncmp_smooth(r))*Vl2(r) dr
2804*                  /
2805*-----------------------------------------------------------------------------------------------------
2806*
2807*                  /
2808*   E_ncmp_vloc  = | (ncmp(r)*Vl2(r) + ncmp_smooth(r)*(Vl1(r)-Vl2(r))) dr - plane-wave integrals
2809*                  /
2810*
2811*                  /
2812*                + | (ncmp(r)-ncmp_smooth(r))*(Vl1(r)-Vl2(r)) dr  - Gaussian two-center integrals
2813*                  /
2814*
2815*-----------------------------------------------------------------------------------------------------
2816
2817      real*8 function psp_ncmp_vloc(ispin)
2818      implicit none
2819      integer ispin
2820
2821#include "bafdecls.fh"
2822#include "errquit.fh"
2823#include "psp.fh"
2824
2825*     **** local variables ****
2826      logical ok,periodic,move
2827      integer npack0,dng_cmp(2),dng_cmp_smooth(2),vl1(2),vl_notpaw(2)
2828      integer n2ft3d,rho_cmp(2),rho_cmp_smooth(2),vl2(2),r_grid(2)
2829      integer nx,ny,nz
2830      real*8 eh,e1,e2,eh0,eh1,eh2,scal1,dv
2831      real*8 dum1(3),dum2(3)
2832
2833*     **** external functions ****
2834      integer  control_version
2835      external control_version
2836      real*8   nwpw_compcharge_E_multipole_zv_ee,lattice_omega
2837      external nwpw_compcharge_E_multipole_zv_ee,lattice_omega
2838
2839
2840      eh = 0.0d0
2841      if (pawexist) then
2842
2843      periodic = (control_version().eq.3)
2844
2845*     *************************************
2846*     **** Periodic Boundary Condtions ****
2847*     *************************************
2848      if (periodic) then
2849         call Pack_npack(0,npack0)
2850         ok =        BA_push_get(mt_dcpl,npack0,'dng_cmp',
2851     >                           dng_cmp(2),dng_cmp(1))
2852         ok = ok.and.BA_push_get(mt_dcpl,npack0,'dng_cmp_smooth',
2853     >                           dng_cmp_smooth(2),dng_cmp_smooth(1))
2854         ok = ok.and.BA_push_get(mt_dcpl,npack0,'vl1',
2855     >                           vl1(2),vl1(1))
2856         ok = ok.and.BA_push_get(mt_dcpl,npack0,'vl_notpaw',
2857     >                           vl_notpaw(2),vl_notpaw(1))
2858         if (.not.ok)
2859     >      call errquit('psp_ncmp_vloc:out of stack',0,MA_ERR)
2860
2861*        **** Using pw grid only ***
2862         if (use_grid_cmp) then
2863            call nwpw_compcharge_gen_dn_cmp(ispin,dcpl_mb(dng_cmp(1)))
2864            move = .false.
2865            call v_local(dcpl_mb(vl1(1)),
2866     >               move,
2867     >               dum1,dum2)
2868            call Pack_cc_dot(0,dcpl_mb(dng_cmp(1)),
2869     >                         dcpl_mb(vl1(1)),eh)
2870
2871*        **** Using gaussian two-electron integrals and pw grid ***
2872         else
2873            eh = nwpw_compcharge_E_multipole_zv_ee(ispin,dbl_mb(zv(1)))  !*** Gaussian integrals
2874
2875            call v_local_seperate_paw(dcpl_mb(vl1(1)),
2876     >                                dcpl_mb(vl_notpaw(1)))
2877            call nwpw_compcharge_gen_v_cmp_smooth(dbl_mb(zv(1)),
2878     >                                            dcpl_mb(dng_cmp(1)))
2879            call Pack_cc_Sub2(0,dcpl_mb(dng_cmp(1)),
2880     >                          dcpl_mb(vl1(1)))
2881            call Pack_cc_Sum2(0,dcpl_mb(dng_cmp(1)),
2882     >                          dcpl_mb(vl_notpaw(1)))
2883            call nwpw_compcharge_gen_dn_cmp2(ispin,
2884     >                                       dcpl_mb(dng_cmp(1)),
2885     >                                       dcpl_mb(dng_cmp_smooth(1)))
2886            call Pack_cc_dot(0,dcpl_mb(dng_cmp(1)),
2887     >                         dcpl_mb(vl_notpaw(1)),e1)
2888
2889            call Pack_cc_dot(0,dcpl_mb(dng_cmp_smooth(1)),
2890     >                         dcpl_mb(vl1(1)),e2)
2891            eh = eh + (e1+e2)
2892         end if
2893         ok =        BA_pop_stack(vl_notpaw(2))
2894         ok = ok.and.BA_pop_stack(vl1(2))
2895         ok = ok.and.BA_pop_stack(dng_cmp_smooth(2))
2896         ok = ok.and.BA_pop_stack(dng_cmp(2))
2897         if (.not.ok)
2898     >      call errquit('psp_vloc_residual:pop stack',1,MA_ERR)
2899
2900*     **************************************
2901*     **** APeriodic Boundary Condtions ****
2902*     **************************************
2903      else
2904         call D3dB_n2ft3d(1,n2ft3d)
2905         ok =         BA_push_get(mt_dbl,n2ft3d,'rho_cmp',
2906     >                            rho_cmp(2),rho_cmp(1))
2907         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'rho_cmp_smooth',
2908     >                           rho_cmp_smooth(2),rho_cmp_smooth(1))
2909         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vl1',
2910     >                           vl1(2),vl1(1))
2911         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vl_notpaw',
2912     >                           vl_notpaw(2),vl_notpaw(1))
2913         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vl2',
2914     >                           vl2(2),vl2(1))
2915         ok = ok.and.BA_push_get(mt_dbl,3*n2ft3d,'rgrid_cmp',
2916     >                           r_grid(2),r_grid(1))
2917         if (.not.ok)
2918     >      call errquit('psp_ncmp_vloc:out of stack',2,MA_ERR)
2919         call lattice_r_grid(dbl_mb(r_grid(1)))
2920         call D3dB_nx(1,nx)
2921         call D3dB_ny(1,ny)
2922         call D3dB_nz(1,nz)
2923         scal1 = 1.0d0/(nx*ny*nz)
2924         dv = scal1*lattice_omega()
2925
2926*        **** Using pw grid only ***
2927         if (use_grid_cmp) then
2928            call nwpw_compcharge_gen_dn_cmp(ispin,dbl_mb(rho_cmp(1)))
2929     >
2930            move = .false.
2931            call v_local(dbl_mb(vl1(1)),
2932     >               move,
2933     >               dum1,dum2)
2934            call Pack_cc_dot(0,dbl_mb(rho_cmp(1)),
2935     >                         dbl_mb(vl1(1)),eh)
2936
2937            call Pack_c_unpack(0,dbl_mb(rho_cmp(1)))
2938            call D3dB_cr_fft3b(1,dbl_mb(rho_cmp(1)))
2939
2940            call v_lr_local(dbl_mb(r_grid(1)),
2941     >                      dbl_mb(vl1(1)))
2942            call D3dB_rr_dot(1,dbl_mb(rho_cmp(1)),
2943     >                         dbl_mb(vl1(1)),e1)
2944            eh = eh + e1*dv
2945
2946*        **** Using gaussian two-electron integrals and pw grid ***
2947         else
2948            eh = nwpw_compcharge_E_multipole_zv_ee(ispin,dbl_mb(zv(1)))
2949
2950*           **** long-range terms ****
2951            call v_lr_local_seperate_paw(dbl_mb(r_grid(1)),
2952     >                                   dbl_mb(vl1(1)),
2953     >                                   dbl_mb(vl_notpaw(1)))
2954            call nwpw_compcharge_gen_vlr_cmp_smooth(dbl_mb(zv(1)),
2955     >                                           dbl_mb(r_grid(1)),
2956     >                                           dbl_mb(rho_cmp(1)))
2957
2958            call D3dB_rr_Sum2(1,dbl_mb(rho_cmp(1)),dbl_mb(vl_notpaw(1)))
2959            call D3dB_rc_pfft3f(1,0,dbl_mb(vl_notpaw(1)))
2960            call Pack_c_pack(0,dbl_mb(vl_notpaw(1)))
2961            call Pack_c_SMul1(0,dv,dbl_mb(vl_notpaw(1)))
2962
2963            call D3dB_rr_Sub2(1,dbl_mb(rho_cmp(1)),dbl_mb(vl1(1)))
2964            call D3dB_rc_pfft3f(1,0,dbl_mb(vl1(1)))
2965            call Pack_c_pack(0,dbl_mb(vl1(1)))
2966            call Pack_c_SMul1(0,dv,dbl_mb(vl1(1)))
2967
2968*           **** short-range terms ****
2969            call v_local_seperate_paw(dbl_mb(rho_cmp_smooth(1)),
2970     >                                dbl_mb(rho_cmp(1)))
2971            call Pack_cc_Sum2(0,
2972     >                        dbl_mb(rho_cmp(1)),
2973     >                        dbl_mb(vl_notpaw(1)))
2974            call Pack_cc_Sum2(0,
2975     >                        dbl_mb(rho_cmp_smooth(1)),
2976     >                        dbl_mb(vl1(1)))
2977
2978            call nwpw_compcharge_gen_dn_cmp2(ispin,
2979     >                                       dbl_mb(rho_cmp(1)),
2980     >                                       dbl_mb(rho_cmp_smooth(1)))
2981            call Pack_cc_dot(0,dbl_mb(rho_cmp(1)),
2982     >                         dbl_mb(vl_notpaw(1)),e1)
2983            call Pack_cc_dot(0,dbl_mb(rho_cmp_smooth(1)),
2984     >                         dbl_mb(vl1(1)),e2)
2985            eh = eh + (e1+e2)
2986
2987         end if
2988         ok =        BA_pop_stack(r_grid(2))
2989         ok = ok.and.BA_pop_stack(vl2(2))
2990         ok = ok.and.BA_pop_stack(vl_notpaw(2))
2991         ok = ok.and.BA_pop_stack(vl1(2))
2992         ok = ok.and.BA_pop_stack(rho_cmp_smooth(2))
2993         ok = ok.and.BA_pop_stack(rho_cmp(2))
2994         if (.not.ok)
2995     >      call errquit('psp_vloc_ncmp_vloc:pop stack',3,MA_ERR)
2996
2997      end if
2998
2999
3000      end if
3001
3002      psp_ncmp_vloc = eh
3003      return
3004      end
3005
3006*     *************************************************
3007*     *                                               *
3008*     *              psp_dE_ncmp_vloc_Qlm             *
3009*     *                                               *
3010*     *************************************************
3011
3012      subroutine psp_dE_ncmp_vloc_Qlm(ispin,move,fion)
3013      implicit none
3014      integer ispin
3015      logical move
3016      real*8 fion(3,*)
3017
3018#include "bafdecls.fh"
3019#include "errquit.fh"
3020#include "psp.fh"
3021
3022*     **** local variables ****
3023      logical ok,periodic
3024      integer npack0,dng_cmp(2),dng_cmp_smooth(2),vl1(2),vl_notpaw(2)
3025      integer n2ft3d,rho_cmp(2),rho_cmp_smooth(2),vl2(2),r_grid(2)
3026      integer nx,ny,nz
3027      real*8 eh,e1,e2,eh0,eh1,eh2,scal1,dv
3028      real*8 dum1(3),dum2(3)
3029
3030*     **** external functions ****
3031      integer  control_version
3032      external control_version
3033      real*8   nwpw_compcharge_E_multipole_zv_ee,lattice_omega
3034      external nwpw_compcharge_E_multipole_zv_ee,lattice_omega
3035
3036
3037      eh = 0.0d0
3038      if (pawexist) then
3039
3040      periodic = (control_version().eq.3)
3041
3042*     *************************************
3043*     **** Periodic Boundary Condtions ****
3044*     *************************************
3045      if (periodic) then
3046         call Pack_npack(0,npack0)
3047         ok =        BA_push_get(mt_dcpl,npack0,'dng_cmp',
3048     >                           dng_cmp(2),dng_cmp(1))
3049         ok = ok.and.BA_push_get(mt_dcpl,npack0,'dng_cmp_smooth',
3050     >                           dng_cmp_smooth(2),dng_cmp_smooth(1))
3051         ok = ok.and.BA_push_get(mt_dcpl,npack0,'vl1',
3052     >                           vl1(2),vl1(1))
3053         ok = ok.and.BA_push_get(mt_dcpl,npack0,'vl_notpaw',
3054     >                           vl_notpaw(2),vl_notpaw(1))
3055         if (.not.ok)
3056     >      call errquit('psp_ncmp_vloc:out of stack',0,MA_ERR)
3057
3058*        **** Using pw grid only ***
3059         if (use_grid_cmp) then
3060c            call nwpw_compcharge_gen_dn_cmp(ispin,dcpl_mb(dng_cmp(1)))
3061            call v_local(dcpl_mb(vl1(1)),
3062     >               .false.,
3063     >               dum1,dum2)
3064c            call Pack_cc_dot(0,dcpl_mb(dng_cmp(1)),
3065c     >                         dcpl_mb(vl1(1)),eh)
3066
3067            call nwpw_compcharge_gen_dE_ncmp_vloc_Qlm_pw(ispin,
3068     >                                               dcpl_mb(vl1(1)),
3069     >                                               move,fion)
3070
3071*        **** Using gaussian two-electron integrals and pw grid ***
3072         else
3073            call v_local_seperate_paw(dcpl_mb(vl1(1)),
3074     >                                dcpl_mb(vl_notpaw(1)))
3075            call nwpw_compcharge_gen_v_cmp_smooth(dbl_mb(zv(1)),
3076     >                                            dcpl_mb(dng_cmp(1)))
3077
3078            call Pack_cc_Sub2(0,dcpl_mb(dng_cmp(1)),
3079     >                          dcpl_mb(vl1(1)))
3080            call Pack_cc_Sum2(0,dcpl_mb(dng_cmp(1)),
3081     >                          dcpl_mb(vl_notpaw(1)))
3082
3083            call nwpw_compcharge_gen_dE_ncmp_vloc_Qlm(ispin,
3084     >                                            dbl_mb(zv(1)),
3085     >                                            dcpl_mb(vl_notpaw(1)),
3086     >                                            dcpl_mb(vl1(1)),
3087     >                                            move,fion)
3088            if (move) then
3089               call nwpw_compcharge_gen_dn_cmp2(ispin,
3090     >                                       dcpl_mb(dng_cmp(1)),
3091     >                                       dcpl_mb(dng_cmp_smooth(1)))
3092               call f_local_seperate_paw(dcpl_mb(dng_cmp_smooth(1)),
3093     >                                dcpl_mb(dng_cmp(1)),
3094     >                                fion)
3095               call Pack_cc_Sub2(0,dcpl_mb(dng_cmp_smooth(1)),
3096     >                             dcpl_mb(dng_cmp(1)))
3097               call nwpw_compcharge_gen_f_cmp_smooth(dbl_mb(zv(1)),
3098     >                                            dcpl_mb(dng_cmp(1)),
3099     >                                            fion)
3100            end if
3101
3102         end if
3103         ok =        BA_pop_stack(vl_notpaw(2))
3104         ok = ok.and.BA_pop_stack(vl1(2))
3105         ok = ok.and.BA_pop_stack(dng_cmp_smooth(2))
3106         ok = ok.and.BA_pop_stack(dng_cmp(2))
3107         if (.not.ok)
3108     >      call errquit('psp_vloc_residual:pop stack',1,MA_ERR)
3109
3110*     **************************************
3111*     **** APeriodic Boundary Condtions ****
3112*     **************************************
3113      else
3114         call D3dB_n2ft3d(1,n2ft3d)
3115         ok =         BA_push_get(mt_dbl,n2ft3d,'rho_cmp',
3116     >                            rho_cmp(2),rho_cmp(1))
3117         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'rho_cmp_smooth',
3118     >                           rho_cmp_smooth(2),rho_cmp_smooth(1))
3119         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vl1',
3120     >                           vl1(2),vl1(1))
3121         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vl_notpaw',
3122     >                           vl_notpaw(2),vl_notpaw(1))
3123         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vl2',
3124     >                           vl2(2),vl2(1))
3125         ok = ok.and.BA_push_get(mt_dbl,3*n2ft3d,'rgrid_cmp',
3126     >                           r_grid(2),r_grid(1))
3127         if (.not.ok)
3128     >      call errquit('psp_ncmp_vloc:out of stack',2,MA_ERR)
3129         call lattice_r_grid(dbl_mb(r_grid(1)))
3130         call D3dB_nx(1,nx)
3131         call D3dB_ny(1,ny)
3132         call D3dB_nz(1,nz)
3133         scal1 = 1.0d0/(nx*ny*nz)
3134         dv = scal1*lattice_omega()
3135
3136*        **** Using pw grid only ***
3137         if (use_grid_cmp) then
3138            call v_local(dbl_mb(vl1(1)),
3139     >               .false.,
3140     >               dum1,dum2)
3141            call v_lr_local(dbl_mb(r_grid(1)),
3142     >                      dbl_mb(rho_cmp(1)))
3143            call D3dB_rc_pfft3f(1,0,dbl_mb(rho_cmp(1)))
3144            call Pack_c_pack(0,dbl_mb(rho_cmp(1)))
3145            call Pack_c_SMul1(0,dv,dbl_mb(rho_cmp(1)))
3146            call Pack_cc_Sum2(0,dbl_mb(rho_cmp(1)),dbl_mb(vl1(1)))
3147
3148            call nwpw_compcharge_gen_dE_ncmp_vloc_Qlm_pw(ispin,
3149     >                                               dbl_mb(vl1(1)),
3150     >                                               move,fion)
3151
3152*        **** Using gaussian two-electron integrals and pw grid ***
3153         else
3154
3155*           **** long-range terms ****
3156            call v_lr_local_seperate_paw(dbl_mb(r_grid(1)),
3157     >                                   dbl_mb(vl1(1)),
3158     >                                   dbl_mb(vl_notpaw(1)))
3159            call nwpw_compcharge_gen_vlr_cmp_smooth(dbl_mb(zv(1)),
3160     >                                           dbl_mb(r_grid(1)),
3161     >                                           dbl_mb(rho_cmp(1)))
3162            call D3dB_rr_Sum2(1,dbl_mb(rho_cmp(1)),dbl_mb(vl_notpaw(1)))
3163            call D3dB_rc_pfft3f(1,0,dbl_mb(vl_notpaw(1)))
3164            call Pack_c_pack(0,dbl_mb(vl_notpaw(1)))
3165            call Pack_c_SMul1(0,dv,dbl_mb(vl_notpaw(1)))
3166
3167            call D3dB_rr_Sub2(1,dbl_mb(rho_cmp(1)),dbl_mb(vl1(1)))
3168            call D3dB_rc_pfft3f(1,0,dbl_mb(vl1(1)))
3169            call Pack_c_pack(0,dbl_mb(vl1(1)))
3170            call Pack_c_SMul1(0,dv,dbl_mb(vl1(1)))
3171
3172*           **** short-range terms ****
3173            call v_local_seperate_paw(dbl_mb(rho_cmp_smooth(1)),
3174     >                                dbl_mb(rho_cmp(1)))
3175            call Pack_cc_Sum2(0,
3176     >                        dbl_mb(rho_cmp(1)),
3177     >                        dbl_mb(vl_notpaw(1)))
3178            call Pack_cc_Sum2(0,
3179     >                        dbl_mb(rho_cmp_smooth(1)),
3180     >                        dbl_mb(vl1(1)))
3181
3182            call nwpw_compcharge_gen_dE_ncmp_vloc_Qlm(ispin,
3183     >                                            dbl_mb(zv(1)),
3184     >                                            dbl_mb(vl_notpaw(1)),
3185     >                                            dbl_mb(vl1(1)),
3186     >                                            move,fion)
3187            if (move) then
3188               call nwpw_compcharge_gen_dn_cmp2(ispin,
3189     >                                       dbl_mb(rho_cmp(1)),
3190     >                                       dbl_mb(rho_cmp_smooth(1)))
3191               call f_local_seperate_paw(dbl_mb(rho_cmp_smooth(1)),
3192     >                                   dbl_mb(rho_cmp(1)),
3193     >                                   fion)
3194
3195               call Pack_c_unpack(0,dbl_mb(rho_cmp(1)))
3196               call D3dB_cr_pfft3b(1,0,dbl_mb(rho_cmp(1)))
3197
3198               call Pack_c_unpack(0,dbl_mb(rho_cmp_smooth(1)))
3199               call D3dB_cr_pfft3b(1,0,dbl_mb(rho_cmp_smooth(1)))
3200
3201               call f_lr_local_seperate_paw(dbl_mb(r_grid(1)),
3202     >                                      dbl_mb(rho_cmp_smooth(1)),
3203     >                                      dbl_mb(rho_cmp(1)),
3204     >                                      fion)
3205               call D3dB_rr_Sub2(1,dbl_mb(rho_cmp_smooth(1)),
3206     >                             dbl_mb(rho_cmp(1)))
3207               call nwpw_compcharge_gen_f_lr_cmp_smooth(dbl_mb(zv(1)),
3208     >                                               dbl_mb(r_grid(1)),
3209     >                                               dbl_mb(rho_cmp(1)),
3210     >                                               fion)
3211            end if
3212         end if
3213         ok =        BA_pop_stack(r_grid(2))
3214         ok = ok.and.BA_pop_stack(vl2(2))
3215         ok = ok.and.BA_pop_stack(vl_notpaw(2))
3216         ok = ok.and.BA_pop_stack(vl1(2))
3217         ok = ok.and.BA_pop_stack(rho_cmp_smooth(2))
3218         ok = ok.and.BA_pop_stack(rho_cmp(2))
3219         if (.not.ok)
3220     >      call errquit('psp_vloc_ncmp_vloc:pop stack',3,MA_ERR)
3221
3222      end if
3223
3224
3225      end if
3226
3227      return
3228      end
3229
3230
3231*     ***********************************
3232*     *					*
3233*     *	        psp_xc_atom		*
3234*     *					*
3235*     ***********************************
3236
3237*    This routine computes the paw atomic exc and pxc psp energies
3238*
3239      subroutine psp_xc_atom(ispin,ne,psi1,exc,pxc)
3240      implicit none
3241      integer    ispin,ne(2)
3242      complex*16 psi1(*)
3243      real*8 exc,pxc
3244
3245#include "bafdecls.fh"
3246#include "errquit.fh"
3247#include "psp.fh"
3248
3249*     *** local variables ***
3250      integer npack1
3251      integer ii,ia,iii,l,nn
3252      integer k,shift,l_prj,nproj,Gijl_indx
3253      real*8  omega,scal,scalsqr
3254      integer exi(2),sw1(2),sw2(2)
3255      logical value,sd_function
3256
3257      real*8 pxc0
3258      common /pxc_atom_pdx0/ pxc0
3259
3260
3261*     **** external functions ****
3262      integer  ion_katm
3263      integer  psi_data_get_chnk,psi_data_get_ptr
3264      real*8   lattice_omega,nwpw_xc_energy_atom
3265      external ion_katm
3266      external psi_data_get_chnk,psi_data_get_ptr
3267      external lattice_omega,nwpw_xc_energy_atom
3268
3269
3270      exc = 0.0d0
3271      pxc = 0.0d0
3272!$OMP MASTER
3273      pxc0 = 0.0d0
3274!$OMP END MASTER
3275
3276      if (pawexist) then
3277      nn = ne(1)+ne(2)
3278
3279*     **** allocate local memory ****
3280      call Pack_npack(1,npack1)
3281      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
3282      value = value.and.
3283     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
3284      value = value.and.
3285     >        BA_push_get(mt_dbl,nn*nprj_max,'sw2',sw2(2),sw2(1))
3286      if (.not.value)
3287     >  call errquit('psp_overlap_orb: out of stack',0,MA_ERR)
3288
3289
3290      omega = lattice_omega()
3291      scal    = 1.0d0/(omega)
3292      scalsqr = scal*scal
3293
3294
3295      do iii=1,nion_paw
3296        ii    = int_mb(ion_pawtoion(1)+iii-1)
3297        ia    = ion_katm(ii)
3298        nproj = int_mb(nprj(1)+ia-1)
3299
3300        if (nproj.gt.0) then
3301
3302*       **** structure factor and local pseudopotential ****
3303        call strfac_pack(1,ii,dcpl_mb(exi(1)))
3304
3305*       **** generate sw1's and projectors ****
3306        do l=1,nproj
3307
3308           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
3309           l_prj = int_mb(l_projector(1)+(l-1)
3310     >                                  + (ia-1)*(nmax_max*lmmax_max))
3311           !sd_function = .not.and(l_prj,1)
3312#ifdef GCC4
3313           k = iand(l_prj,1)
3314#else
3315           k = and(l_prj,1)
3316#endif
3317           sd_function = (k.eq.0)
3318
3319
3320*          **** phase factor does not matter therefore ****
3321*          **** (-i)^l is the same as (i)^l in the     ****
3322*          **** Rayleigh scattering formula            ****
3323
3324*          *** current function is s or d ****
3325           if (sd_function) then
3326              call Pack_tc_Mul(1,dbl_mb(shift),
3327     >                           dcpl_mb(exi(1)),
3328     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
3329*          *** current function is p or f ****
3330           else
3331              call Pack_tc_iMul(1,dbl_mb(shift),
3332     >                           dcpl_mb(exi(1)),
3333     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
3334           end if
3335           call Pack_cc_indot(1,nn,
3336     >                      psi1,
3337     >                      dcpl_mb(prjtmp(1)+(l-1)*npack1),
3338     >                      dbl_mb(sw1(1)+(l-1)*nn))
3339
3340        end do
3341        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
3342
3343
3344*       **** sw2 = sw2 + Vxcijl*sw1 ******
3345        !call dcopy(nn*nproj,0.0d0,0,dbl_mb(sw2(1)),1)
3346        call Parallel_shared_vector_zero(.true.,nn*nproj,dbl_mb(sw2(1)))
3347        call nwpw_xc_solve(ii,ia,
3348     >     int_mb(n1dgrid(1)+ia-1),
3349     >     int_mb(n1dbasis(1)+ia-1),
3350     >     dbl_mb(psi_data_get_chnk(int_mb(phi_ae(1)+ia-1))),
3351     >     dbl_mb(psi_data_get_chnk(int_mb(phi_ps(1)+ia-1))),
3352     >     dbl_mb(psi_data_get_chnk(int_mb(dphi_ae(1)+ia-1))),
3353     >     dbl_mb(psi_data_get_chnk(int_mb(dphi_ps(1)+ia-1))),
3354     >     dbl_mb(psi_data_get_chnk(int_mb(core_ae(1)+ia-1))),
3355     >     dbl_mb(psi_data_get_chnk(int_mb(core_ps(1)+ia-1))),
3356     >     dbl_mb(psi_data_get_chnk(int_mb(core_ae_prime(1)+ia-1))),
3357     >     dbl_mb(psi_data_get_chnk(int_mb(core_ps_prime(1)+ia-1))),
3358     >     dbl_mb(psi_data_get_chnk(int_mb(rgrid(1)+ia-1))),
3359     >     dbl_mb(log_amesh(1)+ia-1),
3360     >     ispin,ne,int_mb(nprj(1)+ia-1),dbl_mb(sw1(1)),dbl_mb(sw2(1)))
3361
3362*          **** pxc = transpose(sw1)*sw2) ****
3363!$OMP MASTER
3364           do l=0,(nn*nproj-1)
3365            pxc0 = pxc0 + dbl_mb(sw1(1)+l)*dbl_mb(sw2(1)+l)
3366           end do
3367!$OMP END MASTER
3368
3369        end if !** nproj>0 **
3370      end do !** ii **
3371!$OMP MASTER
3372      pxc0 = pxc0*scal
3373      if (ispin.eq.1) pxc0 = pxc0 + pxc0
3374!$OMP END MASTER
3375!$OMP BARRIER
3376
3377      value =           BA_pop_stack(sw2(2))
3378      value = value.and.BA_pop_stack(sw1(2))
3379      value = value.and.BA_pop_stack(exi(2))
3380      if (.not.value) call errquit('psp_xc_atom: popping stack',3,
3381     &       MA_ERR)
3382
3383      call D1dB_SumAll(pxc0)
3384      pxc = pxc0
3385      exc = nwpw_xc_energy_atom()
3386      end if
3387      return
3388      end
3389
3390
3391
3392
3393*     *******************************************************
3394*     *                                                     *
3395*     *                 psp_rholm_solve                     *
3396*     *                                                     *
3397*     *******************************************************
3398
3399      subroutine psp_rholm_solve(ispin,ne,nproj,sw1,
3400     >                            l_prj,m_prj,projtobasis,
3401     >                            n1dgrid,n1dbasis,
3402     >                            rgrid,phi_ae,phi_ps,
3403     >                            lmax,lmax2,
3404     >                            rholm_ae,rholm_ps)
3405      implicit none
3406      integer ispin,ne(2),nproj
3407      real*8  sw1(ne(1)+ne(2),nproj)
3408      integer l_prj(*), m_prj(*),projtobasis(*)
3409
3410      integer n1dgrid,n1dbasis
3411      real*8  rgrid(n1dgrid)
3412      real*8  phi_ae(n1dgrid,n1dbasis)
3413      real*8  phi_ps(n1dgrid,n1dbasis)
3414      integer lmax,lmax2
3415      real*8 rholm_ae(n1dgrid,ispin,lmax2)
3416      real*8 rholm_ps(n1dgrid,ispin,lmax2)
3417
3418*     ***** local variables *****
3419      integer i,j,l,m,lm,ms,n,ig,n1(2),n2(2)
3420      real*8  wij,taunt
3421
3422*     ***** external functions *****
3423      real*8   taunt_coeff
3424      external taunt_coeff
3425
3426      n1(1) = 1
3427      n1(2) = ne(1)+1
3428      n2(1) = ne(1)
3429      n2(2) = ne(1)+ne(2)
3430
3431      do i=1,nproj
3432         do j=1,nproj
3433
3434*           **** generate overlap matrix wij(ms) = Sum(n=1,ne(ms)) <psi(n)|prj(i)> * <prj(j)*psi(n)> ****
3435            do ms=1,ispin
3436               wij = 0.0
3437               do n=n1(ms),n2(ms)
3438                  wij = wij + sw1(n,i)*sw1(n,j)
3439               end do
3440
3441               do ig=1,n1dgrid
3442                  rholm_ae(ig,ms,1) = wij
3443     >                               *phi_ae(ig,projtobasis(i))
3444     >                               *phi_ae(ig,projtobasis(j))
3445     >                               /rgrid(ig)**2
3446                  rholm_ps(ig,ms,1) = wij
3447     >                               *phi_ps(ig,projtobasis(i))
3448     >                               *phi_ps(ig,projtobasis(j))
3449     >                               /rgrid(ig)**2
3450               end do
3451            end do
3452
3453            lm = 2
3454            do l=1,lmax
3455               do m=-l,l
3456c                  taunt = taunt_coeff(l,m,
3457c     >                                l_prj(j),m_prj(j),
3458c     >                                l_prj(i),m_prj(i))
3459                  do ms=1,ispin
3460                     do ig=1,n1dgrid
3461                        rholm_ae(ig,ms,lm) = taunt*rholm_ae(ig,ms,1)
3462                        rholm_ps(ig,ms,lm) = taunt*rholm_ps(ig,ms,1)
3463                     end do
3464                  end do
3465                 lm = lm + 1
3466               end do
3467            end do
3468c            taunt = taunt_coeff(0,0,
3469c     >                          l_prj(j),m_prj(j),
3470c     >                          l_prj(i),m_prj(i))
3471            do ms=1,ispin
3472               do ig=1,n1dgrid
3473                  rholm_ae(ig,ms,1) = taunt*rholm_ae(ig,ms,1)
3474                  rholm_ps(ig,ms,1) = taunt*rholm_ps(ig,ms,1)
3475               end do
3476            end do
3477
3478         end do
3479      end do
3480      return
3481      end
3482
3483
3484*     ***********************************
3485*     *					*
3486*     *	        psp_qlm_atom		*
3487*     *					*
3488*     ***********************************
3489
3490*    This routine computes the multipole expansion
3491*
3492      subroutine psp_qlm_atom(ispin,ne,psi1)
3493      implicit none
3494      integer    ispin,ne(2)
3495      complex*16 psi1(*)
3496
3497#include "bafdecls.fh"
3498#include "errquit.fh"
3499#include "psp.fh"
3500
3501*     *** local variables ***
3502      integer npack1
3503      integer ii,ia,iii,l,nn
3504      integer k,shift,l_prj,nproj,Gijl_indx
3505      real*8  omega,scal,scalsqr
3506      integer exi(2),sw1(2),sw2(2)
3507      logical value,sd_function
3508
3509
3510*     **** external functions ****
3511      integer  ion_katm
3512      integer  psi_data_get_ptr
3513      real*8   lattice_omega
3514      external ion_katm
3515      external psi_data_get_ptr
3516      external lattice_omega
3517
3518      if (pawexist) then
3519
3520      nn = ne(1)+ne(2)
3521
3522*     **** allocate local memory ****
3523      call Pack_npack(1,npack1)
3524      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
3525      value = value.and.
3526     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
3527      if (.not.value)
3528     >  call errquit('psp_qlm_atom: out of stack',0,MA_ERR)
3529
3530      omega = lattice_omega()
3531      scal    = 1.0d0/(omega)
3532      scalsqr = scal*scal
3533
3534      do iii=1,nion_paw
3535        ii    = int_mb(ion_pawtoion(1)+iii-1)
3536        ia    = ion_katm(ii)
3537        nproj = int_mb(nprj(1)+ia-1)
3538
3539        if (nproj.gt.0) then
3540
3541*       **** structure factor and local pseudopotential ****
3542        call strfac_pack(1,ii,dcpl_mb(exi(1)))
3543
3544*       **** generate sw1's and projectors ****
3545        do l=1,nproj
3546
3547           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
3548           l_prj = int_mb(l_projector(1)+(l-1)
3549     >                                  + (ia-1)*(nmax_max*lmmax_max))
3550           !sd_function = .not.and(l_prj,1)
3551#ifdef GCC4
3552           k = iand(l_prj,1)
3553#else
3554           k = and(l_prj,1)
3555#endif
3556           sd_function = (k.eq.0)
3557
3558
3559*          **** phase factor does not matter therefore ****
3560*          **** (-i)^l is the same as (i)^l in the     ****
3561*          **** Rayleigh scattering formula            ****
3562
3563*          *** current function is s or d ****
3564           if (sd_function) then
3565              call Pack_tc_Mul(1,dbl_mb(shift),
3566     >                           dcpl_mb(exi(1)),
3567     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
3568*          *** current function is p or f ****
3569           else
3570              call Pack_tc_iMul(1,dbl_mb(shift),
3571     >                           dcpl_mb(exi(1)),
3572     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
3573           end if
3574           call Pack_cc_indot(1,nn,
3575     >                      psi1,
3576     >                      dcpl_mb(prjtmp(1)+(l-1)*npack1),
3577     >                      dbl_mb(sw1(1)+(l-1)*nn))
3578
3579        end do
3580        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
3581
3582
3583*       **** paw atom - generate it's atomic density matrix ****
3584        call psp_gen_density_matrix(ispin,ne,nproj,
3585     >                              dbl_mb(sw1(1)),
3586     >                              dbl_mb(wtmp(1)))
3587
3588*       **** paw atom - generate it's compcharge ***
3589        call nwpw_compcharge_gen_Qlm(ii,ia,ispin,nproj,
3590     >                               dbl_mb(wtmp(1)))
3591
3592
3593
3594        end if !** nproj>0 **
3595      end do !** ii **
3596
3597      value = value.and.BA_pop_stack(sw1(2))
3598      value = value.and.BA_pop_stack(exi(2))
3599      if (.not.value) call errquit('psp_qlm_atom: popping stack',3,
3600     >       MA_ERR)
3601
3602      end if
3603      return
3604      end
3605
3606
3607*     *************************************************
3608*     *                                               *
3609*     *              psp_hartree_cmp_cmp              *
3610*     *                                               *
3611*     *************************************************
3612*
3613*    This routine calulates the hartree energy of the compensation charge density.
3614*
3615*                         //
3616*    E_cmp_cmp   = 0.5 * || (ncmp(r))*(ncmp(r'))
3617*                        || --------------------  dr dr'
3618*                       //       |r-r'|
3619*
3620*
3621*
3622*    using either a Fourier grid (use_grid_cmp=.true.)
3623*    or a combinations of Fourier grids and two electron two center Gaussian Coulomb integrals (use_grid_cmp=.false.).
3624*
3625      real*8 function psp_hartree_cmp_cmp(ispin)
3626      implicit none
3627      integer ispin
3628
3629#include "bafdecls.fh"
3630#include "errquit.fh"
3631#include "psp.fh"
3632
3633*     **** local variables ****
3634      logical ok,periodic
3635      integer npack0,dng_cmp(2),dng_cmp_smooth(2),vcmp_smooth(2)
3636      integer n2ft3d,rho_cmp(2),rho_cmp_smooth(2)
3637      integer nx,ny,nz
3638      real*8 eh,e1,e2,eh0,eh1,eh2,scal1,dv
3639      common /psp_cmp_eh12/ eh,e1,e2
3640
3641*     **** external functions ****
3642      integer  control_version
3643      external control_version
3644      real*8   nwpw_compcharge_E_multipole_zv_ee,lattice_omega
3645      external nwpw_compcharge_E_multipole_zv_ee,lattice_omega
3646      real*8   nwpw_compcharge_E_multipole_zv
3647      external nwpw_compcharge_E_multipole_zv
3648      real*8   nwpw_compcharge_E_multipole_zv_zv
3649      external nwpw_compcharge_E_multipole_zv_zv
3650      real*8   nwpw_compcharge_E_multipole
3651      external nwpw_compcharge_E_multipole
3652
3653      if (pawexist) then
3654
3655      periodic = (control_version().eq.3)
3656
3657*     *************************************
3658*     **** Periodic Boundary Condtions ****
3659*     *************************************
3660      if (periodic) then
3661         call Pack_npack(0,npack0)
3662         ok =        BA_push_get(mt_dcpl,npack0,'dng_cmp',
3663     >                           dng_cmp(2),dng_cmp(1))
3664         ok = ok.and.BA_push_get(mt_dcpl,npack0,'vcmp_smooth',
3665     >                           vcmp_smooth(2),vcmp_smooth(1))
3666         if (.not.ok)
3667     >      call errquit('psp_hartree_cmp_cmp:out of stack',0,MA_ERR)
3668
3669*        **** Using pw grid ***
3670         if (use_grid_cmp) then
3671            call nwpw_compcharge_gen_dn_cmp(ispin,dcpl_mb(dng_cmp(1)))
3672            call coulomb_v(dcpl_mb(dng_cmp(1)),dcpl_mb(vcmp_smooth(1)))
3673            call Pack_cc_dot(0,dcpl_mb(dng_cmp(1)),
3674     >                         dcpl_mb(vcmp_smooth(1)),e1)
3675            eh = 0.5d0*e1*lattice_omega()
3676
3677*        **** Using gaussian Multipole energies ****
3678         else
3679
3680*           **** multipole energy ****
3681            eh0 = nwpw_compcharge_E_multipole(ispin)
3682
3683            ok = BA_push_get(mt_dcpl,npack0,'dng_cmp_smooth',
3684     >                       dng_cmp_smooth(2),dng_cmp_smooth(1))
3685            if (.not.ok)
3686     >         call errquit('psp_hartree_cmp_cmp:out of stack',1,MA_ERR)
3687
3688            call nwpw_compcharge_gen_dn_cmp2(ispin,
3689     >                                       dcpl_mb(dng_cmp(1)),
3690     >                                       dcpl_mb(dng_cmp_smooth(1)))
3691            call coulomb_v(dcpl_mb(dng_cmp_smooth(1)),
3692     >                     dcpl_mb(vcmp_smooth(1)))
3693            call Pack_cc_dot(0,dcpl_mb(dng_cmp(1)),
3694     >                         dcpl_mb(vcmp_smooth(1)),e1)
3695            call Pack_cc_dot(0,dcpl_mb(dng_cmp_smooth(1)),
3696     >                         dcpl_mb(vcmp_smooth(1)),e2)
3697
3698!$OMP MASTER
3699*           **** add cmp energies ****
3700            eh = eh0 + (e1-0.5d0*e2)*lattice_omega()
3701!$OMP END MASTER
3702
3703
3704            ok = ok.and.BA_pop_stack(dng_cmp_smooth(2))
3705            if (.not.ok)
3706     >        call errquit('psp_hartree_cmp_cmp:popping stack',2,MA_ERR)
3707
3708         end if
3709
3710         ok =        BA_pop_stack(vcmp_smooth(2))
3711         ok = ok.and.BA_pop_stack(dng_cmp(2))
3712         if (.not.ok)
3713     >      call errquit('psp_hartree_cmp_cmp:popping stack',3,MA_ERR)
3714
3715
3716*     **************************************
3717*     **** APeriodic Boundary Condtions ****
3718*     **************************************
3719      else
3720         call D3dB_n2ft3d(1,n2ft3d)
3721         ok =         BA_push_get(mt_dbl,n2ft3d,'rho_cmp',
3722     >                            rho_cmp(2),rho_cmp(1))
3723         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vcmp_smooth',
3724     >                           vcmp_smooth(2),vcmp_smooth(1))
3725         if (.not.ok)
3726     >      call errquit('psp_hartree_cmp_cmp:out of stack',4,MA_ERR)
3727         call D3dB_nx(1,nx)
3728         call D3dB_ny(1,ny)
3729         call D3dB_nz(1,nz)
3730         scal1 = 1.0d0/(nx*ny*nz)
3731         dv = scal1*lattice_omega()
3732
3733*        **** Using pw grid ***
3734         if (use_grid_cmp) then
3735            call nwpw_compcharge_gen_dn_cmp(ispin,dbl_mb(rho_cmp(1)))
3736
3737            call Pack_c_unpack(0,dbl_mb(rho_cmp(1)))
3738            call D3dB_cr_fft3b(1,dbl_mb(rho_cmp(1)))
3739            call coulomb2_v(dbl_mb(rho_cmp(1)),
3740     >                      dbl_mb(vcmp_smooth(1)))
3741            call D3dB_rr_dot(1,dbl_mb(rho_cmp(1)),
3742     >                         dbl_mb(vcmp_smooth(1)),e1)
3743!$OMP MASTER
3744            eh = 0.5d0*e1*dv
3745!$OMP END MASTER
3746
3747*        **** Using gaussian Multipole energies ****
3748         else
3749
3750*           **** multipole energy ****
3751            eh0  = nwpw_compcharge_E_multipole(ispin)
3752c            eh0 = nwpw_compcharge_E_multipole_zv(ispin,dbl_mb(zv(1)))
3753c            eh2 = nwpw_compcharge_E_multipole_zv_zv(ispin,dbl_mb(zv(1)))
3754c            eh1 = nwpw_compcharge_E_multipole_zv_ee(ispin,dbl_mb(zv(1)))
3755
3756
3757            ok = BA_push_get(mt_dbl,n2ft3d,'rho_cmp_smooth',
3758     >                       rho_cmp_smooth(2),rho_cmp_smooth(1))
3759            if (.not.ok)
3760     >         call errquit('psp_hartree_cmp_cmp:out of stack',5,MA_ERR)
3761
3762            call nwpw_compcharge_gen_dn_cmp2(ispin,
3763     >                                       dbl_mb(rho_cmp(1)),
3764     >                                       dbl_mb(rho_cmp_smooth(1)))
3765
3766            call Pack_c_unpack(0,dbl_mb(rho_cmp(1)))
3767            call D3dB_cr_fft3b(1,dbl_mb(rho_cmp(1)))
3768            call Pack_c_unpack(0,dbl_mb(rho_cmp_smooth(1)))
3769            call D3dB_cr_fft3b(1,dbl_mb(rho_cmp_smooth(1)))
3770
3771            call coulomb2_v(dbl_mb(rho_cmp_smooth(1)),
3772     >                      dbl_mb(vcmp_smooth(1)))
3773            call D3dB_rr_dot(1,dbl_mb(rho_cmp(1)),
3774     >                         dbl_mb(vcmp_smooth(1)),e1)
3775            call D3dB_rr_dot(1,dbl_mb(rho_cmp_smooth(1)),
3776     >                         dbl_mb(vcmp_smooth(1)),e2)
3777
3778!$OMP MASTER
3779            eh = eh + (e1-0.5d0*e2)*dv
3780!$OMP END MASTER
3781
3782            ok = BA_pop_stack(rho_cmp_smooth(2))
3783            if (.not.ok)
3784     >        call errquit('psp_hartree_cmp_cmp:popping stack',2,MA_ERR)
3785         end if
3786
3787         ok =        BA_pop_stack(vcmp_smooth(2))
3788         ok = ok.and.BA_pop_stack(rho_cmp(2))
3789         if (.not.ok)
3790     >      call errquit('psp_hartree_cmp_cmp:popping stack',6,MA_ERR)
3791      end if
3792
3793      end if
3794!$OMP BARRIER
3795      psp_hartree_cmp_cmp = eh
3796      return
3797      end
3798
3799*     *************************************************
3800*     *                                               *
3801*     *              psp_hartree_cmp_pw               *
3802*     *                                               *
3803*     *************************************************
3804*
3805*    This routine calulates the hartree energy of the compensation charge density.
3806*
3807*                         //
3808*    E_cmp_smooth     =  || (ncmp(r))*(npw(r'))
3809*                        || -----------------------  dr dr'
3810*                       //         |r-r'|
3811*
3812*    using either a Fourier grid
3813*
3814      real*8 function psp_hartree_cmp_pw(ispin,dng,rho)
3815      implicit none
3816      integer ispin
3817      complex*16 dng(*)
3818      real*8     rho(*)
3819
3820#include "bafdecls.fh"
3821#include "errquit.fh"
3822#include "psp.fh"
3823
3824*     **** local variables ****
3825      logical ok,periodic
3826      integer npack0,dng_cmp(2),vcmp(2)
3827      integer n2ft3d,rho_cmp(2),nx,ny,nz
3828      real*8 eh,scal1,dv
3829      common /psp_cmp_eh/ eh
3830
3831*     **** external functions ****
3832      integer  control_version
3833      external control_version
3834      real*8   lattice_omega
3835      external lattice_omega
3836
3837      if (pawexist) then
3838
3839      periodic = (control_version().eq.3)
3840
3841*     ****************************************
3842*     ***** periodic boundary conditions *****
3843*     ****************************************
3844      if (periodic) then
3845         call Pack_npack(0,npack0)
3846         ok =BA_push_get(mt_dcpl,npack0,'dng_cmp',dng_cmp(2),dng_cmp(1))
3847         ok =ok.and.BA_push_get(mt_dcpl,npack0,'vcmp',vcmp(2),vcmp(1))
3848         if (.not.ok)
3849     >      call errquit('psp_hartree_cmp_pw:out of stack',0,MA_ERR)
3850
3851         call nwpw_compcharge_gen_dn_cmp(ispin,dcpl_mb(dng_cmp(1)))
3852         call coulomb_v(dcpl_mb(dng_cmp(1)),dcpl_mb(vcmp(1)))
3853         call Pack_cc_dot(0,dng,dcpl_mb(vcmp(1)),eh)
3854!$OMP MASTER
3855         eh = eh*lattice_omega()
3856!$OMP END MASTER
3857
3858         ok =        BA_pop_stack(vcmp(2))
3859         ok = ok.and.BA_pop_stack(dng_cmp(2))
3860         if (.not.ok)
3861     >      call errquit('psp_hartree_cmp_pw:popping stack',1,MA_ERR)
3862
3863
3864*     *****************************************
3865*     ***** aperiodic boundary conditions *****
3866*     *****************************************
3867      else
3868         call D3dB_n2ft3d(1,n2ft3d)
3869         ok =BA_push_get(mt_dbl,n2ft3d,'rho_cmp',rho_cmp(2),rho_cmp(1))
3870         ok =ok.and.BA_push_get(mt_dbl,n2ft3d,'vcmp',vcmp(2),vcmp(1))
3871         if (.not.ok)
3872     >      call errquit('psp_hartree_cmp_pw:out of stack',2,MA_ERR)
3873         call D3db_nx(1,nx)
3874         call D3db_ny(1,ny)
3875         call D3db_nz(1,nz)
3876         scal1 = 1.0d0/(nx*ny*nz)
3877         dv    = lattice_omega()*scal1
3878
3879         call nwpw_compcharge_gen_dn_cmp(ispin,dbl_mb(rho_cmp(1)))
3880
3881         call Pack_c_unpack(0,dbl_mb(rho_cmp(1)))
3882         call D3dB_cr_fft3b(1,dbl_mb(rho_cmp(1)))
3883         call coulomb2_v(dbl_mb(rho_cmp(1)),dbl_mb(vcmp(1)))
3884         call D3dB_rr_Sum(1,rho,rho(1+(ispin-1)*n2ft3d),
3885     >                    dbl_mb(rho_cmp(1)))
3886         call D3dB_rr_dot(1,dbl_mb(vcmp(1)),dbl_mb(rho_cmp(1)),eh)
3887!$OMP MASTER
3888         eh = eh*dv
3889!$OMP END MASTER
3890
3891         ok =        BA_pop_stack(vcmp(2))
3892         ok = ok.and.BA_pop_stack(rho_cmp(2))
3893         if (.not.ok)
3894     >      call errquit('psp_hartree_cmp_pw:popping stack',2,MA_ERR)
3895      end if
3896
3897      end if
3898      psp_hartree_cmp_pw = eh
3899      return
3900      end
3901
3902
3903*     *************************************************
3904*     *                                               *
3905*     *              psp_hartree_atom                 *
3906*     *                                               *
3907*     *************************************************
3908*
3909*   This routine computes the sum of Watom = Sum(I=1,nionpaw) W1atom^I + W2atom^I+ W3atom^I where
3910*
3911*                      //
3912*    W1atom^I = 0.5 * || (n^I(r)*n^I(r') - ntilde^I(r)*ntilde^I(r'))
3913*                     || -------------------------------------------  dr dr'
3914*                    //                    |r-r'|
3915*
3916*                      //
3917*    W2atom^I =   -   || ntilde^I(r)*ncmp^I(r')
3918*                     || ----------------------  dr dr'
3919*                    //         |r-r'|
3920*
3921*                      //
3922*    W3atom^I = -0.5* || ncmp^I(r)*ncmp^I(r')
3923*                     || --------------------  dr dr'
3924*                    //         |r-r'|
3925*
3926      real*8 function psp_hartree_atom(ispin,neq,psi1)
3927      implicit none
3928      integer    ispin,neq(2)
3929      complex*16 psi1(*)
3930
3931#include "bafdecls.fh"
3932#include "errquit.fh"
3933#include "psp.fh"
3934
3935*     **** local variables ****
3936      logical value,sd_function
3937      integer ii,ia,iii,nproj,npack1,nn,k,l,shift,l_prj
3938      integer exi(2),sw1(2)
3939      real*8  Watom
3940
3941*     ***** external functions ****
3942      integer  ion_katm,psi_data_get_ptr
3943      external ion_katm,psi_data_get_ptr
3944      real*8   nwpw_compcharge_coulomb_e_atom
3945      external nwpw_compcharge_coulomb_e_atom
3946
3947      Watom = 0.0d0
3948      if (pawexist) then
3949
3950      nn = neq(1)+neq(2)
3951
3952*     **** allocate local memory ****
3953      call Pack_npack(1,npack1)
3954      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
3955      value = value.and.
3956     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
3957      if (.not.value)
3958     >  call errquit('psp_hartree_atom: out of stack',0,MA_ERR)
3959
3960      do iii=1,nion_paw
3961         ii = int_mb(ion_pawtoion(1)+iii-1)
3962         ia = ion_katm(ii)
3963         nproj = int_mb(nprj(1)+ia-1)
3964         if (nproj.gt.0) then
3965
3966*           **** structure factor ****
3967            call strfac_pack(1,ii,dcpl_mb(exi(1)))
3968
3969*           **** generate sw1's and projectors ****
3970            do l=1,nproj
3971
3972               shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
3973               l_prj = int_mb(l_projector(1)+(l-1)
3974     >                                  + (ia-1)*(nmax_max*lmmax_max))
3975#ifdef GCC4
3976               k = iand(l_prj,1)
3977#else
3978               k = and(l_prj,1)
3979#endif
3980               sd_function = (k.eq.0)
3981
3982*              **** phase factor does not matter therefore ****
3983*              **** (-i)^l is the same as (i)^l in the     ****
3984*              **** Rayleigh scattering formula            ****
3985
3986*              *** current function is s or d ****
3987               if (sd_function) then
3988                  call Pack_tc_Mul(1,dbl_mb(shift),
3989     >                               dcpl_mb(exi(1)),
3990     >                               dcpl_mb(prjtmp(1)+(l-1)*npack1))
3991*              *** current function is p or f ****
3992               else
3993                  call Pack_tc_iMul(1,dbl_mb(shift),
3994     >                               dcpl_mb(exi(1)),
3995     >                               dcpl_mb(prjtmp(1)+(l-1)*npack1))
3996               end if
3997               call Pack_cc_indot(1,nn,
3998     >                          psi1,
3999     >                          dcpl_mb(prjtmp(1)+(l-1)*npack1),
4000     >                          dbl_mb(sw1(1)+(l-1)*nn))
4001
4002            end do
4003            call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
4004
4005*           **** paw atom - generate it's atomic density matrix ****
4006            call psp_gen_density_matrix(ispin,neq,nproj,
4007     >                                  dbl_mb(sw1(1)),
4008     >                                  dbl_mb(wtmp(1)))
4009
4010c*           **** paw atom - generate it's compcharge ***
4011c            call nwpw_compcharge_gen_Qlm(ii,ia,ispin,nproj,
4012c     >                                   dbl_mb(wtmp(1)))
4013
4014
4015            Watom = Watom
4016     >            + nwpw_compcharge_coulomb_e_atom(ii,ia,ispin,nproj,
4017     >                                             dbl_mb(wtmp(1)))
4018         end if
4019      end do
4020
4021*     **** deallocate stack memory ****
4022      value =           BA_pop_stack(sw1(2))
4023      value = value.and.BA_pop_stack(exi(2))
4024      if (.not.value)
4025     >  call errquit('psp_hartree_atom:popping stack',1,MA_ERR)
4026
4027      end if
4028      !call D1dB_SumAll(Watom)
4029
4030
4031      psp_hartree_atom = Watom
4032      return
4033      end
4034
4035*     ***********************************
4036*     *					*
4037*     *	    psp_gen_density_matrix      *
4038*     *					*
4039*     ***********************************
4040*
4041*   This routine computes the atomic spin density matrix from a atomic spin sw1.
4042*
4043      subroutine psp_gen_density_matrix(ispin,ne,nprj,sw1,wmatrix)
4044      implicit none
4045      integer ispin,ne(2),nprj
4046      real*8  sw1(ne(1)+ne(2),nprj)
4047      real*8  wmatrix(nprj,nprj,ispin)
4048
4049*     **** local variables ****
4050      integer i,j,ms,n,n1(2),n2(2)
4051      real*8  tmp,taskid_i,np_i,icount
4052
4053      call Parallel2d_taskid_i(taskid_i)
4054      call Parallel2d_np_i(np_i)
4055      icount = 0
4056
4057      n1(1) = 1
4058      n2(1) = ne(1)
4059      n1(2) = ne(1)+1
4060      n2(2) = ne(1)+ne(2)
4061
4062c     **************************************************************
4063c     **** this loop should be restructed to parallelize over n ****
4064c     **************************************************************
4065      !call dcopy(ispin*nprj*nprj,0.0d0,0,wmatrix,1)
4066      call Parallel_shared_vector_zero(.true.,ispin*nprj*nprj,wmatrix)
4067!$OMP DO
4068      do j=1,nprj
4069         icount = j-1
4070         if (mod(icount,np_i).eq.taskid_i) then
4071            do ms=1,ispin
4072            do n=n1(ms),n2(ms)
4073               wmatrix(j,j,ms) = wmatrix(j,j,ms) + sw1(n,j)*sw1(n,j)
4074            end do
4075            end do
4076         end if
4077         icount = icount + 1
4078      !end do
4079
4080      !do j=1,nprj
4081         do i=j+1,nprj
4082            if (mod(icount,np_i).eq.taskid_i) then
4083               do ms=1,ispin
4084               do n=n1(ms),n2(ms)
4085                  tmp = sw1(n,i)*sw1(n,j)
4086                  wmatrix(i,j,ms) = wmatrix(i,j,ms) + tmp
4087                  wmatrix(j,i,ms) = wmatrix(j,i,ms) + tmp
4088               end do
4089               end do
4090            end if
4091            icount = icount + 1
4092         end do
4093      end do
4094!$OMP END DO
4095      !call D1dB_Vector_SumAll(ispin*nprj*nprj,wmatrix)
4096      call Parallel_Vector_SumAll(ispin*nprj*nprj,wmatrix)
4097
4098      return
4099      end
4100
4101
4102
4103*     ***********************************
4104*     *					*
4105*     *	        psp_efg_atoms		*
4106*     *					*
4107*     ***********************************
4108*    This routine computes the efg
4109*
4110      subroutine psp_efg_atoms(ispin,ne,psi1,efg)
4111      implicit none
4112      integer    ispin,ne(2)
4113      complex*16 psi1(*)
4114      real*8 efg(3,3,*)
4115
4116#include "bafdecls.fh"
4117#include "errquit.fh"
4118#include "psp.fh"
4119
4120*     *** local variables ***
4121      integer npack1,nion
4122      integer ii,ia,l,nn
4123      integer k,shift,l_prj,nproj,Gijl_indx
4124      real*8  omega,scal,scalsqr
4125      integer exi(2),sw1(2),sw2(2)
4126      logical value,sd_function
4127
4128*     **** external functions ****
4129      integer  ion_nion,ion_katm
4130      integer  psi_data_get_chnk,psi_data_get_ptr
4131      real*8   lattice_omega,nwpw_xc_energy_atom
4132      external ion_nion,ion_katm
4133      external psi_data_get_chnk,psi_data_get_ptr
4134      external lattice_omega,nwpw_xc_energy_atom
4135
4136      nn = ne(1)+ne(2)
4137
4138*     **** allocate local memory ****
4139      nion = ion_nion()
4140      call Pack_npack(1,npack1)
4141
4142      value = BA_push_get(mt_dcpl,npack1,'exi', exi(2), exi(1))
4143      value = value.and.
4144     >        BA_push_get(mt_dbl,nn*nprj_max,'sw1',sw1(2),sw1(1))
4145      if (.not.value)
4146     >  call errquit('psp_efg_atoms: out of stack',0,MA_ERR)
4147
4148      omega = lattice_omega()
4149      scal    = 1.0d0/(omega)
4150      scalsqr = scal*scal
4151
4152
4153      do ii=1,nion
4154        ia    = ion_katm(ii)
4155        nproj = int_mb(nprj(1)+ia-1)
4156
4157        if ((int_mb(psp_type(1)+ia-1).ne.4).and.(nproj.gt.0)) then
4158
4159*       **** structure factor and local pseudopotential ****
4160        call strfac_pack(1,ii,dcpl_mb(exi(1)))
4161
4162*       **** generate sw1's and projectors ****
4163        do l=1,nproj
4164
4165           shift = psi_data_get_ptr(int_mb(vnl(1)+ia-1),l)
4166           l_prj = int_mb(l_projector(1)+(l-1)
4167     >                                  + (ia-1)*(nmax_max*lmmax_max))
4168           !sd_function = .not.and(l_prj,1)
4169#ifdef GCC4
4170           k = iand(l_prj,1)
4171#else
4172           k = and(l_prj,1)
4173#endif
4174           sd_function = (k.eq.0)
4175
4176
4177*          **** phase factor does not matter therefore ****
4178*          **** (-i)^l is the same as (i)^l in the     ****
4179*          **** Rayleigh scattering formula            ****
4180
4181*          *** current function is s or d ****
4182           if (sd_function) then
4183              call Pack_tc_Mul(1,dbl_mb(shift),
4184     >                           dcpl_mb(exi(1)),
4185     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
4186*          *** current function is p or f ****
4187           else
4188              call Pack_tc_iMul(1,dbl_mb(shift),
4189     >                           dcpl_mb(exi(1)),
4190     >                           dcpl_mb(prjtmp(1)+(l-1)*npack1))
4191           end if
4192           call Pack_cc_indot(1,nn,
4193     >                      psi1,
4194     >                      dcpl_mb(prjtmp(1)+(l-1)*npack1),
4195     >                      dbl_mb(sw1(1)+(l-1)*nn))
4196
4197        end do
4198        call D3dB_Vector_SumAll((nn*nproj),dbl_mb(sw1(1)))
4199
4200        call psp_efg_solve(ia,int_mb(lmax(1)+ia-1),
4201     >     int_mb(l_projector(1)+(ia-1)*(nmax_max*lmmax_max)),
4202     >     int_mb(m_projector(1)+(ia-1)*(nmax_max*lmmax_max)),
4203     >     dbl_mb(psi_data_get_chnk(int_mb(r3_matrix(1)+ia-1))),
4204     >     ispin,ne,int_mb(nprj(1)+ia-1),dbl_mb(sw1(1)),efg(1,1,ii))
4205
4206        end if !** nproj>0 **
4207      end do !** ii **
4208
4209      value =           BA_pop_stack(sw1(2))
4210      value = value.and.BA_pop_stack(exi(2))
4211      if (.not.value) call errquit('psp_efg_atom: popping stack',3,
4212     &       MA_ERR)
4213      return
4214      end
4215
4216*     ********************************************************
4217*     *                                                      *
4218*     *                psp_efg_solve                         *
4219*     *                                                      *
4220*     ********************************************************
4221******************************************************************************************
4222*** Warning! This routine is not functioning. Need to rederive the efg for atoms...EJB ***
4223******************************************************************************************
4224      subroutine psp_efg_solve(ia,lmax,l_prj,m_prj,
4225     >                          r3_matrix,
4226     >                          ispin,ne,nprj,sw1,
4227     >                          efg)
4228      implicit none
4229      integer ia,lmax
4230      integer l_prj(*)
4231      integer m_prj(*)
4232      real*8 r3_matrix(0:lmax,0:lmax)
4233      integer ispin,ne(2),nprj
4234      real*8 sw1(ne(1)+ne(2),nprj)
4235      real*8 efg(3,3)
4236
4237*     **** external functions ****
4238      real*8   nwpw_gaunt,lattice_omega
4239      external nwpw_gaunt,lattice_omega
4240
4241*     **** local variables ****
4242      integer i,j,li,lj,mi,mj,l,m,lm,n
4243      real*8  coeflm(6),tmp,scal,pi,c1,c2,c3
4244
4245      pi = 4.0d0*datan(1.0d0)
4246
4247      scal = 1.0d0/lattice_omega()
4248      do lm=1,6
4249         coeflm(lm) = 0.0d0
4250      end do
4251
4252cccc these formulas need to be redirived!!!
4253c      do j=1,nprj
4254c         lj=l_prj(j)
4255c         mj=m_prj(j)
4256c         do i=1,nprj
4257c            li=l_prj(i)
4258c            mi=m_prj(i)
4259c
4260c            tmp = 0.0d0
4261c            do n=1,(ne(1)+ne(2))
4262c               tmp = tmp + sw1(n,i)*sw1(n,j)
4263c            end do
4264c            tmp = tmp*scal*r3_matrix(li,lj)
4265c
4266c            lm = 1
4267c            do l=0,2,2
4268c               do m=-l,l
4269c                  coeflm(lm) = coeflm(lm)
4270c     >                       + tmp*nwpw_gaunt(.false.,l,m,li,mi,lj,mj)
4271c                  lm = lm + 1
4272c               end do
4273c            end do
4274c         end do
4275c      end do
4276
4277      c1 = 2.0d0*dsqrt(pi)
4278      c2 = 6.0d0*dsqrt(pi/15.0d0)
4279      c3 = 2.0d0*dsqrt(pi/5.0d0)
4280
4281      !*** 2*sqrt(pi)*(l=0,m=0) + 6*sqrt(pi/15)*(l=2,m=2) + 2*sqrt(pi/5)*(l=2,m=0) ****
4282      efg(1,1) = efg(1,1) + c1*coeflm(1) + c2*coeflm(6) + c3*coeflm(4)
4283
4284      !*** 6*sqrt(pi/15)*(l=2,m=-2)***
4285      efg(2,1) = efg(2,1) + c1*coeflm(2)
4286      efg(1,2) = efg(1,2) + efg(2,1)
4287
4288      !*** 6*sqrt(pi/15)*(l=2,m=1)***
4289      efg(3,1) = efg(3,1) + c1*coeflm(5)
4290      efg(1,3) = efg(1,3) + efg(3,1)
4291
4292      !*** 2*sqrt(pi)*(l=0,m=0) - 6*sqrt(pi/15)*(l=2,m=2) + 2*sqrt(pi/5)*(l=2,m=0) ****
4293      efg(2,2) = efg(2,2) + c1*coeflm(1) -c2*coeflm(6) + c3*coeflm(4)
4294
4295      !*** 6*sqrt(pi/15)*(l=2,m=-1)***
4296      efg(3,2) = efg(3,2) + c2*coeflm(3)
4297      efg(2,3) = efg(2,3) + efg(3,2)
4298
4299      !*** 4*(l=2,m=0)+ 2*sqrt(pi)(l=0,m=0) ***
4300      efg(3,3) = efg(3,3) + 4.0d0*coeflm(4) + c1*coeflm(1)
4301
4302      return
4303      end
4304
4305
4306
4307
4308
4309
4310*     *************************************************
4311*     *                                               *
4312*     *              psp_dE_ncmp_vloc_Qlm_test        *
4313*     *                                               *
4314*     *************************************************
4315
4316      subroutine psp_dE_ncmp_vloc_Qlm_test(ispin,move,fion)
4317      implicit none
4318      integer ispin
4319      logical move
4320      real*8 fion(3,*)
4321
4322#include "bafdecls.fh"
4323#include "errquit.fh"
4324#include "psp.fh"
4325
4326*     **** local variables ****
4327      logical ok,periodic
4328      integer npack0,dng_cmp(2),dng_cmp_smooth(2),vl1(2),vl_notpaw(2)
4329      integer n2ft3d,rho_cmp(2),rho_cmp_smooth(2),vl2(2),r_grid(2)
4330      integer nx,ny,nz
4331      real*8 eh,e1,e2,eh0,eh1,eh2,scal1,dv
4332      real*8 dum1(3),dum2(3)
4333
4334*     **** external functions ****
4335      integer  control_version
4336      external control_version
4337      real*8   nwpw_compcharge_E_multipole_zv_ee,lattice_omega
4338      external nwpw_compcharge_E_multipole_zv_ee,lattice_omega
4339
4340
4341      eh = 0.0d0
4342      if (pawexist) then
4343
4344      periodic = (control_version().eq.3)
4345
4346*     *************************************
4347*     **** Periodic Boundary Condtions ****
4348*     *************************************
4349      if (periodic) then
4350         call Pack_npack(0,npack0)
4351         ok =        BA_push_get(mt_dcpl,npack0,'dng_cmp',
4352     >                           dng_cmp(2),dng_cmp(1))
4353         ok = ok.and.BA_push_get(mt_dcpl,npack0,'dng_cmp_smooth',
4354     >                           dng_cmp_smooth(2),dng_cmp_smooth(1))
4355         ok = ok.and.BA_push_get(mt_dcpl,npack0,'vl1',
4356     >                           vl1(2),vl1(1))
4357         ok = ok.and.BA_push_get(mt_dcpl,npack0,'vl_notpaw',
4358     >                           vl_notpaw(2),vl_notpaw(1))
4359         if (.not.ok)
4360     >      call errquit('psp_ncmp_vloc:out of stack',0,MA_ERR)
4361
4362*        **** Using pw grid only ***
4363         if (use_grid_cmp) then
4364c            call nwpw_compcharge_gen_dn_cmp(ispin,dcpl_mb(dng_cmp(1)))
4365            call v_local(dcpl_mb(vl1(1)),
4366     >               .false.,
4367     >               dum1,dum2)
4368c            call Pack_cc_dot(0,dcpl_mb(dng_cmp(1)),
4369c     >                         dcpl_mb(vl1(1)),eh)
4370
4371            call nwpw_compcharge_gen_dE_ncmp_vloc_Qlm_pw(ispin,
4372     >                                               dcpl_mb(vl1(1)),
4373     >                                               move,fion)
4374
4375*        **** Using gaussian two-electron integrals and pw grid ***
4376         else
4377            call v_local_seperate_paw(dcpl_mb(vl1(1)),
4378     >                                dcpl_mb(vl_notpaw(1)))
4379            call nwpw_compcharge_gen_v_cmp_smooth(dbl_mb(zv(1)),
4380     >                                            dcpl_mb(dng_cmp(1)))
4381
4382            call Pack_cc_Sub2(0,dcpl_mb(dng_cmp(1)),
4383     >                          dcpl_mb(vl1(1)))
4384            call Pack_cc_Sum2(0,dcpl_mb(dng_cmp(1)),
4385     >                          dcpl_mb(vl_notpaw(1)))
4386
4387            call nwpw_compcharge_gen_dE_ncmp_vloc_Qlm_test(
4388     >                                            ispin,dbl_mb(zv(1)),
4389     >                                            dcpl_mb(vl_notpaw(1)),
4390     >                                            dcpl_mb(vl1(1)),
4391     >                                            move,fion)
4392            call nwpw_compcharge_gen_dn_cmp2(ispin,
4393     >                                       dcpl_mb(dng_cmp(1)),
4394     >                                       dcpl_mb(dng_cmp_smooth(1)))
4395            call f_local_seperate_paw(dcpl_mb(dng_cmp_smooth(1)),
4396     >                                dcpl_mb(dng_cmp(1)),
4397     >                                fion)
4398            call Pack_cc_Sub2(0,dcpl_mb(dng_cmp_smooth(1)),
4399     >                          dcpl_mb(dng_cmp(1)))
4400            call nwpw_compcharge_gen_f_cmp_smooth(dbl_mb(zv(1)),
4401     >                                            dcpl_mb(dng_cmp(1)),
4402     >                                            fion)
4403         end if
4404         ok =        BA_pop_stack(vl_notpaw(2))
4405         ok = ok.and.BA_pop_stack(vl1(2))
4406         ok = ok.and.BA_pop_stack(dng_cmp_smooth(2))
4407         ok = ok.and.BA_pop_stack(dng_cmp(2))
4408         if (.not.ok)
4409     >      call errquit('psp_vloc_residual:pop stack',1,MA_ERR)
4410
4411*     **************************************
4412*     **** APeriodic Boundary Condtions ****
4413*     **************************************
4414      else
4415         call D3dB_n2ft3d(1,n2ft3d)
4416         ok =         BA_push_get(mt_dbl,n2ft3d,'rho_cmp',
4417     >                            rho_cmp(2),rho_cmp(1))
4418         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'rho_cmp_smooth',
4419     >                           rho_cmp_smooth(2),rho_cmp_smooth(1))
4420         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vl1',
4421     >                           vl1(2),vl1(1))
4422         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vl_notpaw',
4423     >                           vl_notpaw(2),vl_notpaw(1))
4424         ok = ok.and.BA_push_get(mt_dbl,n2ft3d,'vl2',
4425     >                           vl2(2),vl2(1))
4426         ok = ok.and.BA_push_get(mt_dbl,3*n2ft3d,'rgrid_cmp',
4427     >                           r_grid(2),r_grid(1))
4428         if (.not.ok)
4429     >      call errquit('psp_ncmp_vloc:out of stack',2,MA_ERR)
4430         call lattice_r_grid(dbl_mb(r_grid(1)))
4431         call D3dB_nx(1,nx)
4432         call D3dB_ny(1,ny)
4433         call D3dB_nz(1,nz)
4434         scal1 = 1.0d0/(nx*ny*nz)
4435         dv = scal1*lattice_omega()
4436
4437*        **** Using pw grid only ***
4438         if (use_grid_cmp) then
4439            call v_local(dbl_mb(vl1(1)),
4440     >               .false.,
4441     >               dum1,dum2)
4442            call v_lr_local(dbl_mb(r_grid(1)),
4443     >                      dbl_mb(rho_cmp(1)))
4444            call D3dB_rc_pfft3f(1,0,dbl_mb(rho_cmp(1)))
4445            call Pack_c_pack(0,dbl_mb(rho_cmp(1)))
4446            call Pack_c_SMul1(0,dv,dbl_mb(rho_cmp(1)))
4447            call Pack_cc_Sum2(0,dbl_mb(rho_cmp(1)),dbl_mb(vl1(1)))
4448
4449            call nwpw_compcharge_gen_dE_ncmp_vloc_Qlm_pw(ispin,
4450     >                                               dbl_mb(vl1(1)),
4451     >                                               move,fion)
4452
4453*        **** Using gaussian two-electron integrals and pw grid ***
4454         else
4455
4456*           **** long-range terms ****
4457            call v_lr_local_seperate_paw(dbl_mb(r_grid(1)),
4458     >                                   dbl_mb(vl1(1)),
4459     >                                   dbl_mb(vl_notpaw(1)))
4460            call nwpw_compcharge_gen_vlr_cmp_smooth(dbl_mb(zv(1)),
4461     >                                           dbl_mb(r_grid(1)),
4462     >                                           dbl_mb(rho_cmp(1)))
4463
4464            call D3dB_rr_Sum2(1,dbl_mb(rho_cmp(1)),dbl_mb(vl_notpaw(1)))
4465            call D3dB_rc_pfft3f(1,0,dbl_mb(vl_notpaw(1)))
4466            call Pack_c_pack(0,dbl_mb(vl_notpaw(1)))
4467            call Pack_c_SMul1(0,dv,dbl_mb(vl_notpaw(1)))
4468
4469            call D3dB_rr_Sub2(1,dbl_mb(rho_cmp(1)),dbl_mb(vl1(1)))
4470            call D3dB_rc_pfft3f(1,0,dbl_mb(vl1(1)))
4471            call Pack_c_pack(0,dbl_mb(vl1(1)))
4472            call Pack_c_SMul1(0,dv,dbl_mb(vl1(1)))
4473
4474*           **** short-range terms ****
4475            call v_local_seperate_paw(dbl_mb(rho_cmp_smooth(1)),
4476     >                                dbl_mb(rho_cmp(1)))
4477            call Pack_cc_Sum2(0,
4478     >                        dbl_mb(rho_cmp(1)),
4479     >                        dbl_mb(vl_notpaw(1)))
4480            call Pack_cc_Sum2(0,
4481     >                        dbl_mb(rho_cmp_smooth(1)),
4482     >                        dbl_mb(vl1(1)))
4483
4484            call nwpw_compcharge_gen_dE_ncmp_vloc_Qlm_test(
4485     >                                            ispin,dbl_mb(zv(1)),
4486     >                                            dbl_mb(vl_notpaw(1)),
4487     >                                            dbl_mb(vl1(1)),
4488     >                                            move,fion)
4489
4490               call nwpw_compcharge_gen_dn_cmp2(ispin,
4491     >                                       dbl_mb(rho_cmp(1)),
4492     >                                       dbl_mb(rho_cmp_smooth(1)))
4493               call f_local_seperate_paw(dbl_mb(rho_cmp_smooth(1)),
4494     >                                   dbl_mb(rho_cmp(1)),
4495     >                                   fion)
4496
4497               call Pack_c_unpack(0,dbl_mb(rho_cmp(1)))
4498               call D3dB_cr_pfft3b(1,0,dbl_mb(rho_cmp(1)))
4499
4500               call Pack_c_unpack(0,dbl_mb(rho_cmp_smooth(1)))
4501               call D3dB_cr_pfft3b(1,0,dbl_mb(rho_cmp_smooth(1)))
4502
4503               call f_lr_local_seperate_paw(dbl_mb(r_grid(1)),
4504     >                                      dbl_mb(rho_cmp_smooth(1)),
4505     >                                      dbl_mb(rho_cmp(1)),
4506     >                                      fion)
4507
4508               call D3dB_rr_Sub2(1,dbl_mb(rho_cmp_smooth(1)),
4509     >                             dbl_mb(rho_cmp(1)))
4510               call nwpw_compcharge_gen_f_lr_cmp_smooth(dbl_mb(zv(1)),
4511     >                                               dbl_mb(r_grid(1)),
4512     >                                               dbl_mb(rho_cmp(1)),
4513     >                                               fion)
4514         end if
4515         ok =        BA_pop_stack(r_grid(2))
4516         ok = ok.and.BA_pop_stack(vl2(2))
4517         ok = ok.and.BA_pop_stack(vl_notpaw(2))
4518         ok = ok.and.BA_pop_stack(vl1(2))
4519         ok = ok.and.BA_pop_stack(rho_cmp_smooth(2))
4520         ok = ok.and.BA_pop_stack(rho_cmp(2))
4521         if (.not.ok)
4522     >      call errquit('psp_vloc_ncmp_vloc:pop stack',3,MA_ERR)
4523
4524      end if
4525
4526
4527      end if
4528
4529      return
4530      end
4531