1SUBROUTINE getvofr_sphere( np_in_sp_me, np_in_sp, &
2        hcub, rho, v, vnew, vold, vold2,tSelf, d_cur, d_n, d_o, d_o2, cgstep)
3    !=======================================================================================
4    ! Code Version 1.0 (Princeton University, September 2014)
5    !=======================================================================================
6    !===============================================================
7    ! Given charge density, get the potential using multipole expansion
8    ! for boundary region and solving poisson equation for inside box
9    ! Adapted from PARSEC by Lingzhu Kong,  http://parsec.ices.utexas.edu/
10    !===============================================================
11    !
12    USE kinds,                   ONLY  :  DP
13    USE io_global,               ONLY  : stdout
14    USE exx_module,              ONLY  :  coeke, nord2, lmax, n_exx
15    USE exx_module,              ONLY  :  thdtood_in_sp
16    USE mp_global,               ONLY  :  me_image
17    USE parallel_include
18    !
19    IMPLICIT NONE
20    !
21    ! test if we are in the case of self interaction
22    LOGICAL tSelf
23    ! pair distances and extrapolation coefficients
24    REAL(DP) d_cur, d_n, d_o, d_o2, Cex(3)
25    INTEGER  np_in_sp_me, np_in_sp
26    REAL(DP) hcub
27    REAL(DP) rho(np_in_sp)
28    !OUTPUT
29    REAL(DP) v(np_in_sp_me)
30    !INOUT
31    REAL(DP) vnew(np_in_sp), vold(np_in_sp), vold2(np_in_sp)
32    !
33    REAL(DP), ALLOCATABLE  :: v_in_sp(:)
34    COMPLEX(DP) qlm(0:lmax, 0:lmax)
35    INTEGER  i, j, k, ii, jj, kk, ir, ip, irg, ifg, cgstep
36    !========================================================================
37    !-----------------------------------------------------------------------
38    !First, we calculate the potential outside the inner sphere, which will
39    !also give the boundary values for potential inside the sphere.
40    !-----------------------------------------------------------------------
41    !
42    CALL start_clock('getvofr_qlm')
43    CALL getqlm_sphere(np_in_sp, hcub, rho, qlm)
44    CALL stop_clock('getvofr_qlm')
45    ALLOCATE( v_in_sp(1:np_in_sp_me) )
46    !
47    v_in_sp = 0.d0
48    Cex = 0.0D0
49    CALL start_clock('getvofr_bound')
50    CALL exx_boundaryv_sphere( np_in_sp_me, np_in_sp, v_in_sp, qlm)
51    CALL stop_clock('getvofr_bound')
52    !
53    !$omp parallel do
54    DO ir = 1+np_in_sp, np_in_sp_me
55      v(ir) = v_in_sp(ir)
56    END DO
57    !$omp end parallel do
58    !-------------------------------------------------------------------------------
59    !                   NEXT, THE POTENTIAL INSIDE THE SPHERE
60    !-------------------------------------------------------------------------------
61    !
62    IF (tSelf) THEN
63      ! Selfv calculation
64      IF (n_exx .EQ. 2) THEN
65        !
66        !$omp parallel do
67        DO ir = 1, np_in_sp
68          !
69          v_in_sp(ir) = 2.D0*vnew(ir) - vold(ir)
70          !
71        ENDDO
72        !$omp end parallel do
73        !
74      ELSE IF (n_exx .GT. 2) THEN
75        !
76        !$omp parallel do
77        DO ir = 1, np_in_sp
78          !
79          v_in_sp(ir) = 3.D0*vnew(ir) - 3.D0*vold(ir) + vold2(ir)
80          !
81        END DO
82        !$omp end parallel do
83        !
84      END IF
85      !
86    ELSE
87      ! Pair v calculation
88      IF (n_exx .EQ. 2) THEN
89        ! pair distances extrapolation
90        IF (ABS(d_n - d_o).GT.1.D-8) THEN
91          !
92          Cex (1) = (d_cur - d_o)/(d_n - d_o)
93          Cex (2) = (d_cur - d_n)/(d_o - d_n)
94          IF ((ABS(Cex(1)).GT.3.D0).OR.(ABS(Cex(2)).GT.2.0D0)) THEN
95            !
96            Cex (1) = 2.0D0
97            Cex (2) = -1.D0
98            !
99          END IF
100          !
101        ELSE
102          !
103          Cex(1) = 2.0D0
104          Cex(2) = -1.D0
105          !
106        END IF
107        !$omp parallel do
108        DO ir = 1, np_in_sp
109          ! Hybrid time and pair-dist extrapolation
110          v_in_sp(ir) = 0.5D0*((2.0D0+Cex(1))*vnew(ir) &
111              + (Cex(2)-1.0D0)*vold(ir))
112          !
113        END DO
114        !$omp end parallel do
115        !
116      ELSE IF(n_exx .Gt. 2)THEN
117        ! pair distances extrapolation
118        IF ((ABS(d_n - d_o).GT.1.D-8).OR.(ABS(d_n - d_o2).GT.1.D-8).OR.(abs(d_o - d_o2).GT.1.D-8)) THEN
119          !
120          Cex(1) = (d_cur-d_o2)*(d_cur-d_o)/(d_n-d_o2)/(d_n-d_o)
121          Cex(2) = (d_cur-d_o2)*(d_cur-d_n)/(d_o-d_o2)/(d_o-d_n)
122          Cex(3) = (d_cur-d_o)*(d_cur-d_n)/(d_o2-d_o)/(d_o2-d_n)
123          !
124          IF ((ABS(Cex(1)).GT.5.D0).OR.(ABS(Cex(2)).GT.5.0D0).OR.(ABS(Cex(3)).GT.3.D0)) THEN
125            !
126            Cex (1) =  3.0D0
127            Cex (2) = -3.0D0
128            Cex (3) =  1.0D0
129            !
130          END IF
131          !
132        ELSE
133          !
134          Cex (1) =  3.0D0
135          Cex (2) = -3.0D0
136          Cex (3) =  1.0D0
137          !
138        END IF
139        !
140        !$omp parallel do
141        DO ir = 1, np_in_sp
142          ! Hybrid time and pair-dist extrapolation
143          v_in_sp(ir) = 0.5D0*((3.D0+Cex(1))*vnew(ir) &
144              +(-3.D0+Cex(2))*vold(ir)+(1.D0+Cex(3))*vold2(ir))
145          !
146        END DO
147        !$omp end parallel do
148        !
149      END IF
150      !
151    END IF
152    !
153    !!!!! This part is original !!!!!!!!!!
154    CALL start_clock('getvofr_geterho')
155    CALL geterho_sphere(np_in_sp_me, np_in_sp,rho, v_in_sp)
156    CALL stop_clock('getvofr_geterho')
157    CALL start_clock('getvofr_hpotcg')
158    CALL hpotcg(np_in_sp_me, np_in_sp, rho, v_in_sp,.TRUE.,cgstep)
159    CALL stop_clock('getvofr_hpotcg')
160    !
161    IF ( n_exx .EQ. 1) THEN
162      !
163      !$omp parallel do
164      DO ir = 1, np_in_sp
165        !
166        vnew(ir) = v_in_sp(ir)
167        vold(ir) = vnew(ir)
168        !
169      END DO
170      !$omp end parallel do
171      !
172      IF (.NOT.tSelf) THEN
173        !
174        d_n = d_cur
175        d_o = d_n
176        !
177      END IF
178      !
179    ELSE IF ( n_exx .EQ. 2) THEN
180      !
181      !$omp parallel do
182      DO ir = 1, np_in_sp
183        !
184        vold(ir) = vnew(ir)
185        vnew(ir) = v_in_sp(ir)
186        !
187      END DO
188      !$omp end parallel do
189      !
190      IF (.NOT.tSelf) THEN
191        !
192        d_o = d_n
193        d_n = d_cur
194        !
195      END IF
196      !
197    ELSE
198      !
199      !$omp parallel do
200      DO ir = 1, np_in_sp
201        !
202        vold2(ir) = vold(ir)
203        vold(ir) = vnew(ir)
204        vnew(ir) = v_in_sp(ir)
205        !
206      END DO
207      !$omp end parallel do
208      !
209      IF (.NOT.tSelf) THEN
210        !
211        d_o2 = d_o
212        d_o  = d_n
213        d_n  = d_cur
214        !
215      END IF
216      !
217    END IF
218    !
219    !$omp parallel do
220    DO ir = 1, np_in_sp
221      !
222      v(ir) = v_in_sp(ir)
223      !
224    END DO
225    !$omp end parallel do
226    !
227    DEALLOCATE( v_in_sp)
228    !
229    RETURN
230    !
231END SUBROUTINE getvofr_sphere
232!===============================================================================
233
234!===============================================================================
235SUBROUTINE getqlm_sphere(np_in_sp, hcub, rho, qlm)
236    !
237    USE kinds,            ONLY  :  DP
238    USE exx_module,       ONLY  :  lpole=>lmax
239    USE exx_module,       ONLY  :  xx_in_sp, yy_in_sp,  zz_in_sp, clm
240    !
241    IMPLICIT NONE
242    !
243    INTEGER    np_in_sp
244    REAL(DP)   hcub, rho(1:np_in_sp)
245    COMPLEX(DP) qlm(0:lpole, 0:lpole)
246    !
247    REAL(DP)   rrho,xx,yy,zz,xx2,yy2,zz2,xy,r2,x,y
248    REAL(DP)   rinv(0:lpole),r(0:lpole)
249    !
250    REAL(DP)    plm(0:lpole, 0:lpole)  !temporary storage of the associated Legendre polynom
251    COMPLEX(DP) cxy(1:lpole)      !coefficient array: e^{i m \phi_j} = cos(phi_j) + i*sin(phi_j)
252    !
253    REAL(DP)   hcub2, zero, one, two , tmp1, tmp2
254    PARAMETER (zero = 1.0E-10, one = 1.d0, two = 2.d0 )
255    INTEGER    i,j,l,m
256    !------------------------------------------------------------------------------
257    !
258    hcub2 = two*hcub
259    qlm(:,:) = 0.d0
260    !
261    !$omp parallel do private(r,plm,cxy,rrho,tmp1,tmp2,xx,yy,zz,xx2,yy2,zz2,r2,rinv,xy,x,y), reduction(+:qlm)
262    DO j = 1,np_in_sp
263      rrho = rho(j)
264      !
265      xx   = xx_in_sp(j)
266      yy   = yy_in_sp(j)
267      zz   = zz_in_sp(j)
268      !
269      xx2  = xx*xx
270      yy2  = yy*yy
271      zz2  = zz*zz
272      r2   = xx2 + yy2 + zz2
273      !
274      r(1) = dsqrt(r2)
275      qlm(0,0) = qlm(0,0) + rrho
276      !
277      IF (r(1) .GT. zero) THEN
278        DO l = 2, lpole
279          r(l) = r(l-1)*r(1)           !r(l) = r^l (higher powers of r)
280        END DO
281        !
282        rinv(1) = one/r(1)
283        xy = DSQRT(xx2+yy2)
284        !
285        x = zz*rinv(1)                  ! x = cos(theta_j)
286        y = xy*rinv(1)                  ! y = sin(theta_j)
287        !
288        CALL setplm(x, y, lpole, plm)   ! associate Legendre polynomials in plm(l,m)
289        !
290        DO l = 1, lpole
291          qlm(l,0) = qlm(l,0) + rrho*plm(l,0)*r(l)   !qlm(l,m=0)
292        END DO
293        !
294        IF (xy .GT. zero) THEN
295          !
296          cxy(1) = CMPLX(xx,yy,KIND=dp)
297          cxy(1) = cxy(1)/xy
298          DO m = 2, lpole
299            cxy(m) = cxy(m-1)*cxy(1)  !cxy(m) = exp(i*m*phi_j) = (cos(phi_j) + i*sin(phi_j))^m
300          END DO                         !       = ((xx + i*yy)/(sqrt(xx^2 + yy^2))^m
301          !
302          DO l = 1, lpole
303            tmp1 = rrho*r(l)
304            DO m = 1, l
305              tmp2     = plm(l,m)*tmp1
306              qlm(l,m) = qlm(l,m) + cxy(m)*tmp2      !qlm(l, m>0)
307            END DO
308          END DO
309          !
310        END IF
311      END IF
312    END DO
313    !$omp end parallel do
314    !
315    DO l = 0, lpole
316      qlm(l,0) = qlm(l,0)*clm(l,0)*hcub
317    END DO
318    !
319    DO l = 1, lpole
320      DO m = 1, l, 1
321        qlm(l,m) = qlm(l,m)*clm(l,m)*hcub2
322      END DO
323    END DO
324    !
325    RETURN
326END SUBROUTINE getqlm_sphere
327!===============================================================================
328
329!===============================================================================
330SUBROUTINE exx_boundaryv_sphere(np_in_sp_me, np_in_sp,v_in_sp,qlm)
331    !
332    USE kinds,            ONLY  :  DP
333    USE exx_module,       ONLY  :  lpole=>lmax
334    USE exx_module,       ONLY  :  xx_in_sp, yy_in_sp,  zz_in_sp, clm
335    !
336    IMPLICIT  NONE
337    !
338    INTEGER   np_in_sp_me, np_in_sp
339    REAL(DP)  v_in_sp( 1:np_in_sp_me )
340    !
341    ! For each grid point:
342    REAL(dp)   plm(0:lpole, 0:lpole)
343    COMPLEX*16 qlm(0:lpole, 0:lpole)
344    COMPLEX*16 cxy(1:lpole)   ! e^{i m \phi_j} = cos(phi_j) + i*sin(phi_j)
345    COMPLEX*16 cpole, qlmc
346    !
347    REAL(dp)   rinv(0:lpole), r(0:lpole)
348    REAL(dp)   xh,yh,zh, xx2, yy2, zz2, xy, x, y, r2, zero, one, plmr
349    INTEGER    i, l, m
350    !
351    zero = 0.d0
352    one  = 1.d0
353    !
354    !$omp parallel do private(r,plm,cxy,xh,yh,zh,xx2,yy2,zz2,r2,rinv,xy,x,y,plmr,cpole,qlmc)
355    DO i = 1 + np_in_sp, np_in_sp_me
356      !
357      xh = xx_in_sp(i)
358      yh = yy_in_sp(i)
359      zh = zz_in_sp(i)
360      !
361      xx2 = xh*xh
362      yy2 = yh*yh
363      zz2 = zh*zh
364      !
365      r2 = xx2 + yy2 + zz2
366      r(1) = dsqrt(r2)
367      IF (r(1) .LT. 0.000001) PRINT *, 'i =',i, r(1)
368      rinv(0) = one/r(1)
369      DO l = 1,lpole
370        rinv(l) = rinv(l-1)*rinv(0)  !rinv(l) = 1/r^(l+1).
371      END DO
372      !
373      xy= DSQRT(xx2+yy2)
374      x = zh*rinv(0)           ! x=cos(theta)
375      y = xy*rinv(0)           ! y=sin(theta)
376      !
377      ! evaluate associate Legendre polynomials for x and y with l from 0 to
378      ! lpole and m from 0 to l. store each in plm(l,m)
379      CALL setplm(x, y, lpole, plm)
380      !
381      cpole = zero
382      !
383      DO l = 0,lpole
384        plmr = plm(l,0)*rinv(l)
385        cpole = cpole + qlm(l,0)*plmr    ! m=0 terms
386      END DO
387      !
388      IF (xy .GT. zero) THEN
389        cxy(1) = CMPLX(xh,-yh,KIND=dp)
390        cxy(1) = cxy(1)/xy
391        DO m=2, lpole, 1
392          cxy(m) = cxy(m-1)*cxy(1)
393        END DO
394        !
395        DO l = 1, lpole
396          DO m = 1, l, 1                          ! m>0 terms
397            plmr = plm(l,m)*rinv(l)
398            qlmc = qlm(l,m)*cxy(m)
399            cpole = cpole + plmr * qlmc
400          END DO
401        END DO
402      END IF
403      !
404      v_in_sp(i) = REAL(cpole)
405      !
406    END DO
407    !$omp end parallel do
408    !
409    RETURN
410    !
411END SUBROUTINE exx_boundaryv_sphere
412! ===========================================================================
413
414! ===========================================================================
415SUBROUTINE geterho_sphere(np_in_sp_me, np_in_sp, rho, v_in_sp)
416    !
417    USE kinds,            ONLY  :  DP
418    USE exx_module,       ONLY  :  nord2,coeke
419    USE exx_module,       ONLY  :  odtothd_in_sp, thdtood_in_sp
420    USE constants,        ONLY  :  eps12
421    !
422    IMPLICIT NONE
423    !
424    INTEGER np_in_sp, np_in_sp_me
425    REAL(DP) rho(np_in_sp), v_in_sp( 1:np_in_sp_me )
426    !
427    INTEGER :: i, j, k, ip, jp, ish, ipp, ipm, jpp, jpm, kpp, kpm
428    INTEGER :: ipjp, ipjm, imjp, imjm
429    !
430    !$omp parallel do private(i,j,k,ipp,ipm,jpp,jpm,kpp,kpm)
431    DO ip = 1, np_in_sp
432      !
433      !(i,j,k) is within the first sphere
434      i = odtothd_in_sp(1,ip)
435      j = odtothd_in_sp(2,ip)
436      k = odtothd_in_sp(3,ip)
437      !
438      DO ish  = 1, nord2
439        !
440        ipp = thdtood_in_sp( i+ish, j,     k     )
441        ipm = thdtood_in_sp( i-ish, j,     k     )
442        jpp = thdtood_in_sp( i,     j+ish, k     )
443        jpm = thdtood_in_sp( i,     j-ish, k     )
444        kpp = thdtood_in_sp( i,     j,     k+ish )
445        kpm = thdtood_in_sp( i,     j,     k-ish )
446        !
447        IF(ipp .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,1,1)*v_in_sp(ipp) ! apply finite difference stencil
448        IF(ipm .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,1,1)*v_in_sp(ipm) ! apply finite difference stencil
449        IF(jpp .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,2,2)*v_in_sp(jpp) ! apply finite difference stencil
450        IF(jpm .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,2,2)*v_in_sp(jpm) ! apply finite difference stencil
451        IF(kpp .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,3,3)*v_in_sp(kpp) ! apply finite difference stencil
452        IF(kpm .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,3,3)*v_in_sp(kpm) ! apply finite difference stencil
453        !
454      END DO
455      !
456    END DO
457    !$omp end parallel do
458    !
459    !! cross derivatives
460    !
461    IF (ABS(coeke(1,1,2)).GT.eps12) THEN
462      !$omp parallel do private(i,j,k,ipjp,ipjm,imjp,imjm)
463      DO ip = 1, np_in_sp
464        !(i,j,k) is within the first sphere
465        i = odtothd_in_sp(1,ip)
466        j = odtothd_in_sp(2,ip)
467        k = odtothd_in_sp(3,ip)
468        !
469        DO ish  = 1, nord2
470          !
471          ipjp = thdtood_in_sp( i+ish, j+ish, k )
472          ipjm = thdtood_in_sp( i+ish, j-ish, k )
473          imjp = thdtood_in_sp( i-ish, j+ish, k )
474          imjm = thdtood_in_sp( i-ish, j-ish, k )
475          !
476          IF(ipjp .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,1,2)*v_in_sp(ipjp)
477          IF(ipjm .GT. np_in_sp) rho(ip) = rho(ip) + coeke(ish,1,2)*v_in_sp(ipjm)
478          IF(imjp .GT. np_in_sp) rho(ip) = rho(ip) + coeke(ish,1,2)*v_in_sp(imjp)
479          IF(imjm .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,1,2)*v_in_sp(imjm)
480          !
481        END DO
482        !
483      END DO
484      !$omp end parallel do
485    END IF
486    !
487    IF (ABS(coeke(1,1,3)).GT.eps12) THEN
488      !$omp parallel do private(i,j,k,ipjp,ipjm,imjp,imjm)
489      DO ip = 1, np_in_sp
490        !(i,j,k) is within the first sphere
491        i = odtothd_in_sp(1,ip)
492        j = odtothd_in_sp(2,ip)
493        k = odtothd_in_sp(3,ip)
494        !
495        DO ish  = 1, nord2
496          !
497          ipjp = thdtood_in_sp( i+ish, j, k+ish )
498          ipjm = thdtood_in_sp( i+ish, j, k-ish )
499          imjp = thdtood_in_sp( i-ish, j, k+ish )
500          imjm = thdtood_in_sp( i-ish, j, k-ish )
501          !
502          IF(ipjp .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,1,3)*v_in_sp(ipjp)
503          IF(ipjm .GT. np_in_sp) rho(ip) = rho(ip) + coeke(ish,1,3)*v_in_sp(ipjm)
504          IF(imjp .GT. np_in_sp) rho(ip) = rho(ip) + coeke(ish,1,3)*v_in_sp(imjp)
505          IF(imjm .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,1,3)*v_in_sp(imjm)
506          !
507        END DO
508        !
509      END DO
510      !$omp end parallel do
511    END IF
512    !
513    IF (ABS(coeke(1,2,3)).GT.eps12) THEN
514      !$omp parallel do private(i,j,k,ipjp,ipjm,imjp,imjm)
515      DO ip = 1, np_in_sp
516        !(i,j,k) is within the first sphere
517        i = odtothd_in_sp(1,ip)
518        j = odtothd_in_sp(2,ip)
519        k = odtothd_in_sp(3,ip)
520        !
521        DO ish  = 1, nord2
522          !
523          ipjp = thdtood_in_sp( i, j+ish, k+ish )
524          ipjm = thdtood_in_sp( i, j+ish, k-ish )
525          imjp = thdtood_in_sp( i, j-ish, k+ish )
526          imjm = thdtood_in_sp( i, j-ish, k-ish )
527          !
528          IF(ipjp .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,2,3)*v_in_sp(ipjp)
529          IF(ipjm .GT. np_in_sp) rho(ip) = rho(ip) + coeke(ish,2,3)*v_in_sp(ipjm)
530          IF(imjp .GT. np_in_sp) rho(ip) = rho(ip) + coeke(ish,2,3)*v_in_sp(imjp)
531          IF(imjm .GT. np_in_sp) rho(ip) = rho(ip) - coeke(ish,2,3)*v_in_sp(imjm)
532          !
533        END DO
534        !
535      END DO
536      !$omp end parallel do
537    END IF
538    !
539    RETURN
540END SUBROUTINE geterho_sphere
541! ==================================================================
542
543!==================================================================
544SUBROUTINE setplm(x, y, lpole, plm)
545    !
546    ! this subroutine calculates all of the associated Legendre polynomials up
547    ! to a supplied lpole (maximum lpole is 9).
548    !
549    IMPLICIT NONE
550    !
551    ! INPUT VARIABLES
552    ! cos(theta),sin(theta), for a given grid point
553    REAL*8 x,y
554    ! order of multipole expansion
555    INTEGER lpole
556    ! OUTPUT VARIABLES
557    ! array containing P_{lm}, the associated Legendre polynomials
558    REAL*8 plm(0:lpole, 0:lpole)
559    ! WORK VARIABLES:
560    ! powers of x, y: xn=x^n, yn=y^n
561    REAL*8 x2, x3, x4, x5, x6, x7, x8, x9
562    REAL*8 y2, y3, y4, y5, y6, y7, y8, y9
563    !
564    plm(0,0) = 1.0
565    IF (lpole .GE. 1) THEN
566      plm(1,0) = x
567      plm(1,1) = -y
568    END IF
569    IF (lpole .GE. 2) THEN
570      x2 = x*x
571      y2 = y*y
572      plm(2,0) =  1.5d0*x2 - 0.5d0
573      plm(2,1) = -3.0d0*x*y
574      plm(2,2) =  3.0d0*y2
575    END IF
576    IF (lpole .GE. 3) THEN
577      x3 = x2*x
578      y3 = y2*y
579      plm(3,0) =   2.5d0*x3 - 1.5d0*x
580      plm(3,1) = (-7.5d0*x2 + 1.5d0)*y
581      plm(3,2) =  15.0d0*x*y2
582      plm(3,3) = -15.0d0*y3
583    END IF
584    IF (lpole .GE. 4) THEN
585      x4 = x2*x2
586      y4 = y2*y2
587      plm(4,0) =  4.375d0*x4 - 3.75d0*x2 + 0.375d0
588      plm(4,1) = (-17.5d0*x3 + 7.5d0*x)*y
589      plm(4,2) = ( 52.5d0*x2 - 7.5d0  )*y2
590      plm(4,3) = -105.0d0*x*y3
591      plm(4,4) =  105.0d0*y4
592    END IF
593    IF (lpole .GE. 5) THEN
594      x5 = x3*x2
595      y5 = y3*y2
596      plm(5,0) =  7.875d0*x5 - 8.75d0*x3 + 1.875d0*x
597      plm(5,1) = (-39.375d0*x4 + 26.25d0*x2 - 1.875d0)*y
598      plm(5,2) = ( 157.5d0*x3 - 52.5d0*x)*y2
599      plm(5,3) = (-472.5d0*x2 + 52.5d0)  *y3
600      plm(5,4) =  945.0d0*x*y4
601      plm(5,5) = -945.0d0*y5
602    END IF
603    IF (lpole .GE. 6) THEN
604      x6 = x3*x3
605      y6 = y3*y3
606      plm(6,0) = 14.4375d0*x6 - 19.6875d0*x4 + 6.5625d0*x2 - 0.3125d0
607      plm(6,1) = (-86.625d0*x5 + 78.75d0*x3 - 13.125d0*x)*y
608      plm(6,2) = ( 433.125d0*x4 - 236.25d0*x2 + 13.125d0)*y2
609      plm(6,3) = (-1732.5d0*x3 + 472.5d0*x            )*y3
610      plm(6,4) = ( 5197.5d0*x2 - 472.5d0              )*y4
611      plm(6,5) = -10395.0d0*x*y5
612      plm(6,6) =  10395.0d0*y6
613    END IF
614    IF (lpole .GE. 7) THEN
615      x7 = x4*x3
616      y7 = y4*y3
617      plm(7,0) = 26.8125d0*x7 - 43.3125d0*x5 + 19.6875d0*x3 - 2.1875d0*x
618      plm(7,1) = -187.6875d0*x6*y + 216.5625d0*x4*y - 59.0625d0*x2*y + 2.1875d0*y
619      plm(7,2) = 1126.125d0*x5*y2 - 866.25d0*x3*y2 + 118.125d0*x*y2
620      plm(7,3) = -5630.625d0*x4*y3 + 2598.75d0*x2*y3 - 118.125d0*y3
621      plm(7,4) = 22522.5d0*x3*y4 - 5197.5d0*x*y4
622      plm(7,5) = -67567.5d0*x2*y5 + 5197.5d0*y5
623      plm(7,6) = 135135.0d0*x*y6
624      plm(7,7) = -135135.0d0*y7
625    END IF
626    IF (lpole .GE. 8) THEN
627      x8 = x4*x4
628      y8 = y4*y4
629      plm(8,0) = 50.2734375d0*x8 - 93.84375d0*x6 + 54.140625d0*x4 - 9.84375d0*x2 + 0.2734375d0
630      plm(8,1) = -402.1875d0*x7*y + 563.0625d0*x5*y - 216.5625d0*x3*y + 19.6875d0*x*y
631      plm(8,2) = 2815.3125d0*x6*y2 - 2815.3125d0*x4*y2 + 649.6875d0*x2*y2 - 19.6875d0*y2
632      plm(8,3) = -16891.875d0*x5*y3 + 11261.25d0*x3*y3 - 1299.375d0*x*y3
633      plm(8,4) = 84459.375d0*x4*y4 - 33783.75d0*x2*y4 + 1299.375d0*y4
634      plm(8,5) = -337837.5d0*x3*y5 + 67567.5d0*x*y5
635      plm(8,6) = 1013512.5d0*x2*y6 - 67567.5d0*y6
636      plm(8,7) = -2027025.0d0*x*y7
637      plm(8,8) = 2027025.0d0*y8
638    END IF
639    IF (lpole .GE. 9) THEN
640      y9 = y5*y4
641      x9 = x5*x4
642      plm(9,0) = 94.9609375d0*x9 - 201.09375d0*x7 + 140.765625d0*x5 - 36.09375d0*x3 + 2.4609375d0*x
643      plm(9,1) = -854.6484375d0*x8*y + 1407.65625d0*x6*y - 703.828125d0*x4*y + 108.28125d0*x2*y - 2.4609375d0*y
644      plm(9,2) = 6837.1875d0*x7*y2 - 8445.9375d0*x5*y2 + 2815.3125d0*x3*y2 - 216.5625d0*x*y2
645      plm(9,3) = -47860.3125d0*x6*y3 + 42229.6875d0*x4*y3 - 8445.9375d0*x2*y3 + 216.5625d0*y3
646      plm(9,4) = 287161.875d0*x5*y4 - 168918.75d0*x3*y4 + 16891.875d0*x*y4
647      plm(9,5) = -1435809.375d0*x4*y5 + 506756.25d0*x2*y5 - 16891.875d0*y5
648      plm(9,6) = 5743237.5d0*x3*y6 - 1013512.5d0*x*y6
649      plm(9,7) = -17229712.5d0*x2*y7 + 1013512.5d0*y7
650      plm(9,8) = 34459425.0d0*x*y8
651      plm(9,9) = -34459425.0d0*y9
652    END IF
653    !
654    !--------------------------------------------------------------------
655    RETURN
656    !--------------------------------------------------------------------
657END SUBROUTINE setplm
658
659module multipole_expansion
660  use kinds, only : dp
661  implicit none
662contains
663
664    function get_plm(x, y, l, m) result (plm)
665    !
666    ! this subroutine calculates all of the associated Legendre polynomials up
667    ! to a supplied lpole (maximum lpole is 9).
668    !
669    IMPLICIT NONE
670    !
671    ! INPUT VARIABLES
672    ! cos(theta),sin(theta), for a given grid point
673    real(dp), value :: x,y
674    ! order of multipole expansion
675    integer, value :: l, m
676    real(dp) :: plm
677    !
678    ! WORK VARIABLES:
679    ! powers of x, y: xn=x^n, yn=y^n
680    REAL(dp) x2, x3, x4, x5, x6, x7, x8, x9
681    REAL(dp) y2, y3, y4, y5, y6, y7, y8, y9
682    !
683    select case (l)
684    case (0)
685      plm = 1.d0
686    case (1)
687      if (m.eq.0) then
688        plm = x
689      else
690        plm = -y
691      end if ! m.eq.0
692    case (2)
693      x2 = x*x
694      y2 = y*y
695      select case (m)
696      case (0)
697        plm =  1.5d0*x2 - 0.5d0
698      case (1)
699        plm = -3.0d0*x*y
700      case (2)
701        plm =  3.0d0*y2
702      end select ! m
703    case (3)
704      x2 = x*x
705      y2 = y*y
706      x3 = x2*x
707      y3 = y2*y
708      select case (m)
709      case (0)
710        plm =  2.5d0*x3 - 1.5d0*x
711      case (1)
712        plm = (-7.5d0*x2 + 1.5d0)*y
713      case (2)
714        plm =  15.0d0*x*y2
715      case (3)
716        plm = -15.0d0*y3
717      end select ! m
718    case (4)
719      x2 = x*x
720      y2 = y*y
721      x3 = x2*x
722      y3 = y2*y
723      x4 = x2*x2
724      y4 = y2*y2
725      select case (m)
726      case (0)
727        plm =  4.375d0*x4 - 3.75d0*x2 + 0.375d0
728      case (1)
729        plm = (-17.5d0*x3 + 7.5d0*x)*y
730      case (2)
731        plm = ( 52.5d0*x2 - 7.5d0  )*y2
732      case (3)
733        plm = -105.0d0*x*y3
734      case (4)
735        plm =  105.0d0*y4
736      end select ! m
737    case (5)
738      x2 = x*x
739      y2 = y*y
740      x3 = x2*x
741      y3 = y2*y
742      x4 = x2*x2
743      y4 = y2*y2
744      x5 = x3*x2
745      y5 = y3*y2
746      select case (m)
747      case (0)
748        plm =  7.875d0*x5 - 8.75d0*x3 + 1.875d0*x
749      case (1)
750        plm = (-39.375d0*x4 + 26.25d0*x2 - 1.875d0)*y
751      case (2)
752        plm = ( 157.5d0*x3 - 52.5d0*x)*y2
753      case (3)
754        plm = (-472.5d0*x2 + 52.5d0)  *y3
755      case (4)
756        plm =  945.0d0*x*y4
757      case (5)
758        plm = -945.0d0*y5
759      end select ! m
760    case (6)
761      x2 = x*x
762      y2 = y*y
763      x3 = x2*x
764      y3 = y2*y
765      x4 = x2*x2
766      y4 = y2*y2
767      x5 = x3*x2
768      y5 = y3*y2
769      x6 = x3*x3
770      y6 = y3*y3
771      select case (m)
772      case (0)
773        plm = 14.4375d0*x6 - 19.6875d0*x4 + 6.5625d0*x2 - 0.3125d0
774      case (1)
775        plm = (-86.625d0*x5 + 78.75d0*x3 - 13.125d0*x)*y
776      case (2)
777        plm = ( 433.125d0*x4 - 236.25d0*x2 + 13.125d0)*y2
778      case (3)
779        plm = (-1732.5d0*x3 + 472.5d0*x            )*y3
780      case (4)
781        plm = ( 5197.5d0*x2 - 472.5d0              )*y4
782      case (5)
783        plm = -10395.0d0*x*y5
784      case (6)
785        plm =  10395.0d0*y6
786      end select ! m
787    case (7)
788      x2 = x*x
789      y2 = y*y
790      x3 = x2*x
791      y3 = y2*y
792      x4 = x2*x2
793      y4 = y2*y2
794      x5 = x3*x2
795      y5 = y3*y2
796      x6 = x3*x3
797      y6 = y3*y3
798      x7 = x4*x3
799      y7 = y4*y3
800      select case (m)
801      case (0)
802        plm = 26.8125d0*x7 - 43.3125d0*x5 + 19.6875d0*x3 - 2.1875d0*x
803      case (1)
804        plm = -187.6875d0*x6*y + 216.5625d0*x4*y - 59.0625d0*x2*y + 2.1875d0*y
805      case (2)
806        plm = 1126.125d0*x5*y2 - 866.25d0*x3*y2 + 118.125d0*x*y2
807      case (3)
808        plm = -5630.625d0*x4*y3 + 2598.75d0*x2*y3 - 118.125d0*y3
809      case (4)
810        plm = 22522.5d0*x3*y4 - 5197.5d0*x*y4
811      case (5)
812        plm = -67567.5d0*x2*y5 + 5197.5d0*y5
813      case (6)
814        plm = 135135.0d0*x*y6
815      case (7)
816        plm = -135135.0d0*y7
817      end select ! m
818    case (8)
819      x2 = x*x
820      y2 = y*y
821      x3 = x2*x
822      y3 = y2*y
823      x4 = x2*x2
824      y4 = y2*y2
825      x5 = x3*x2
826      y5 = y3*y2
827      x6 = x3*x3
828      y6 = y3*y3
829      x7 = x4*x3
830      y7 = y4*y3
831      x8 = x4*x4
832      y8 = y4*y4
833      select case (m)
834      case (0)
835        plm = 50.2734375d0*x8 - 93.84375d0*x6 + 54.140625d0*x4 - 9.84375d0*x2 + 0.2734375d0
836      case (1)
837        plm = -402.1875d0*x7*y + 563.0625d0*x5*y - 216.5625d0*x3*y + 19.6875d0*x*y
838      case (2)
839        plm = 2815.3125d0*x6*y2 - 2815.3125d0*x4*y2 + 649.6875d0*x2*y2 - 19.6875d0*y2
840      case (3)
841        plm = -16891.875d0*x5*y3 + 11261.25d0*x3*y3 - 1299.375d0*x*y3
842      case (4)
843        plm = 84459.375d0*x4*y4 - 33783.75d0*x2*y4 + 1299.375d0*y4
844      case (5)
845        plm = -337837.5d0*x3*y5 + 67567.5d0*x*y5
846      case (6)
847        plm = 1013512.5d0*x2*y6 - 67567.5d0*y6
848      case (7)
849        plm = -2027025.0d0*x*y7
850      case (8)
851        plm = 2027025.0d0*y8
852      end select ! m
853    case (9)
854      x2 = x*x
855      y2 = y*y
856      x3 = x2*x
857      y3 = y2*y
858      x4 = x2*x2
859      y4 = y2*y2
860      x5 = x3*x2
861      y5 = y3*y2
862      x6 = x3*x3
863      y6 = y3*y3
864      x7 = x4*x3
865      y7 = y4*y3
866      x8 = x4*x4
867      y8 = y4*y4
868      y9 = y5*y4
869      x9 = x5*x4
870      select case (m)
871      case (0)
872        plm = 94.9609375d0*x9 - 201.09375d0*x7 + 140.765625d0*x5 - 36.09375d0*x3 + 2.4609375d0*x
873      case (1)
874        plm = -854.6484375d0*x8*y + 1407.65625d0*x6*y - 703.828125d0*x4*y + 108.28125d0*x2*y - 2.4609375d0*y
875      case (2)
876        plm = 6837.1875d0*x7*y2 - 8445.9375d0*x5*y2 + 2815.3125d0*x3*y2 - 216.5625d0*x*y2
877      case (3)
878        plm = -47860.3125d0*x6*y3 + 42229.6875d0*x4*y3 - 8445.9375d0*x2*y3 + 216.5625d0*y3
879      case (4)
880        plm = 287161.875d0*x5*y4 - 168918.75d0*x3*y4 + 16891.875d0*x*y4
881      case (5)
882        plm = -1435809.375d0*x4*y5 + 506756.25d0*x2*y5 - 16891.875d0*y5
883      case (6)
884        plm = 5743237.5d0*x3*y6 - 1013512.5d0*x*y6
885      case (7)
886        plm = -17229712.5d0*x2*y7 + 1013512.5d0*y7
887      case (8)
888        plm = 34459425.0d0*x*y8
889      case (9)
890        plm = -34459425.0d0*y9
891      end select ! m
892    end select ! l
893    return
894  end function get_plm
895
896end module multipole_expansion
897
898!===========================================================================================
899SUBROUTINE getvofr_cube(me_r, ps_r, n_me, n_ps, hcub, rhops, potme, guess_state, psgsn, rhops_old, potps_old, nstep)
900    !=======================================================================================
901    ! Code Version 1.0 (Princeton University, September 2014)
902    !=======================================================================================
903    ! Given charge density, get the potential using multipole expansion
904    ! for boundary region and solving poisson equation for inside box
905    ! Adapted from PARSEC by Lingzhu Kong,  http://parsec.ices.utexas.edu/
906    !=======================================================================================
907    !
908    USE kinds,                   ONLY  :  DP
909    USE io_global,               ONLY  :  stdout
910    USE fft_scalar,              ONLY  :  cfft3d
911    USE wannier_base,            ONLY  :  poisson_eps
912    USE exx_module,              ONLY  :  coemicf, coeke
913    USE exx_module,              ONLY  :  fbsscale
914    USE exx_module,              ONLY  :  nord2
915    USE exx_module,              ONLY  :  lmax, lm_mx
916    USE exx_module,              ONLY  :  n_exx
917    USE funct,                   ONLY  :  get_screening_parameter
918    USE mp_global,               ONLY  :  me_image
919    USE parallel_include
920    USE exx_module,              ONLY  :  pot_ps, rho_ps
921    !
922    IMPLICIT NONE
923    !
924    !-----------------------------------------------------------------------
925    ! --- call variable ---
926    !-----------------------------------------------------------------------
927    INTEGER                :: me_r(6)
928    INTEGER                :: ps_r(6)
929    INTEGER                :: n_me
930    INTEGER                :: n_ps
931    REAL(DP)               :: hcub
932    REAL(DP)               :: rhops(n_ps)
933    REAL(DP)               :: potme(n_me)
934    INTEGER                :: guess_state
935    INTEGER                :: psgsn
936    REAL(DP)               :: rhops_old(n_ps, psgsn)
937    REAL(DP)               :: potps_old(n_ps, psgsn)
938    INTEGER                :: nstep
939    !-----------------------------------------------------------------------
940
941    !-----------------------------------------------------------------------
942    ! --- local variable ---
943    !-----------------------------------------------------------------------
944    REAL(DP)                  :: eps
945    REAL(DP)                  :: normfactor
946    REAL(DP)                  :: rhosum
947    COMPLEX(DP), ALLOCATABLE  :: qlm(:, :)
948    COMPLEX(DP)               :: qlm_1d(lm_mx)
949    INTEGER                   :: gsn, ngsn
950    INTEGER                   :: ncb(3)
951    INTEGER                   :: itr,jtr
952    !-----------------------------------------------------------------------
953    REAL(DP)                  :: omega
954    !-----------------------------------------------------------------------
955    LOGICAL                   :: anti_alising = .FALSE.
956    REAL(DP)                  :: sigma = 0.0D0
957    !-----------------------------------------------------------------------
958
959
960    !-----------------------------------------------------------------------
961    ! --- external functions ---
962    !-----------------------------------------------------------------------
963    REAL(DP), EXTERNAL     :: dnrm2
964    !-----------------------------------------------------------------------
965    !-----------------------------------------------------------------------
966    ! --- initialize ---
967    !-----------------------------------------------------------------------
968    if (.not.allocated(rho_ps)) allocate( rho_ps(n_ps))
969    if (.not.allocated(pot_ps)) then
970      allocate( pot_ps(n_ps)        ); pot_ps  = 0.0d0
971    end if
972    !-----------------------------------------------------------------------
973    ncb(1)  = ps_r(4)-ps_r(1)+1
974    ncb(2)  = ps_r(5)-ps_r(2)+1
975    ncb(3)  = ps_r(6)-ps_r(3)+1
976    !-----------------------------------------------------------------------
977    gsn     = MIN(guess_state, psgsn)
978    !-----------------------------------------------------------------------
979    potme   = 0.0D0
980    !-----------------------------------------------------------------------
981    rho_ps = rhops ! TODO : loop (device)
982    pot_ps = potps_old(:,1)
983    !-----------------------------------------------------------------------
984    omega = get_screening_parameter()
985    !-----------------------------------------------------------------------
986    ALLOCATE(qlm(0:lmax, 0:lmax))
987    !---------------------------------------------------------------------------
988    ! Then, we calculate the potential outside the inner sphere, which will
989    ! also give the boundary values for potential inside the sphere.
990    !---------------------------------------------------------------------------
991    CALL start_clock('getvofr_qlm')
992    CALL getqlm_cube(ps_r, hcub, rho_ps, qlm)
993    CALL stop_clock('getvofr_qlm')
994    !-----------------------------------------------------------------------
995    CALL start_clock('getvofr_bound')
996    CALL exx_boundaryv_cube(me_r, ps_r, potme, qlm)
997    CALL stop_clock('getvofr_bound')
998    !-----------------------------------------------------------------------
999    CALL start_clock('getvofr_geterho')
1000    CALL geterho_cube(me_r, ps_r, potme, rho_ps)
1001    CALL stop_clock('getvofr_geterho')
1002    !========================================================================
1003    !---------------------------------------------------------------------------
1004    ! --- Poisson solver ---
1005    !---------------------------------------------------------------------------
1006    CALL start_clock('getvofr_solver')
1007    !---------------------------------------------------------------------------
1008
1009    call cg_solver_stdcg
1010
1011    !---------------------------------------------------------------------------
1012    CALL stop_clock('getvofr_solver')
1013    !---------------------------------------------------------------------------
1014
1015    !---------------------------------------------------------------------------
1016    CALL ps2me(me_r, ps_r, potme, pot_ps)
1017    !---------------------------------------------------------------------------
1018    potps_old(:,1) = pot_ps ! Use as initial guess for next CG in Poisson
1019    !
1020    DEALLOCATE( qlm    )
1021    !---------------------------------------------------------------------------
1022    RETURN
1023    !---------------------------------------------------------------------------
1024  contains
1025
1026    subroutine  cg_solver_stdcg()
1027      implicit none
1028      CALL CG_CUBE(nstep, ncb, poisson_eps, fbsscale, coemicf, coeke, rho_ps, pot_ps)
1029      !
1030      return
1031    end subroutine cg_solver_stdcg
1032
1033END SUBROUTINE getvofr_cube
1034!===============================================================================
1035
1036!===============================================================================
1037SUBROUTINE getqlm_cube(ps_r, hcub, rho, qlm)
1038    !
1039    USE kinds,            ONLY  :  DP
1040    USE exx_module,       ONLY  :  lpole=>lmax
1041    USE exx_module,       ONLY  :  clm
1042    USE exx_module,       ONLY  :  me_cs
1043    USE exx_module,       ONLY  :  me_rc
1044    USE exx_module,       ONLY  :  me_ri
1045    USE exx_module,       ONLY  :  me_rs
1046    USE multipole_expansion, ONLY  :  get_plm
1047    !
1048    IMPLICIT NONE
1049    !------------------------------------------------------------------------------
1050    ! --- pass in variable ---
1051    !------------------------------------------------------------------------------
1052    INTEGER     :: ps_r(6)
1053    REAL(DP)    :: hcub
1054    REAL(DP)    :: rho(ps_r(1):ps_r(4),ps_r(2):ps_r(5),ps_r(3):ps_r(6))
1055    COMPLEX(DP) :: qlm(0:lpole, 0:lpole)
1056    COMPLEX(DP) :: qlm_tmp
1057    real(dp) :: coef_l
1058    !------------------------------------------------------------------------------
1059    !
1060    !------------------------------------------------------------------------------
1061    REAL(DP)    :: x, y, z, costheta, sintheta, sqrxy
1062    INTEGER     :: i,j,k,l,m
1063    !------------------------------------------------------------------------------
1064    !
1065    !------------------------------------------------------------------------------
1066    !------------------------------------------------------------------------------
1067    DO l = 0, lpole
1068      DO m = 0, l
1069        if (m.eq.0) then
1070          coef_l = clm(l,0)*hcub
1071        else
1072          coef_l = 2.d0*clm(l,m)*hcub
1073        end if ! m.eq.0
1074        qlm_tmp = (0.d0, 0.d0)
1075        !$omp parallel do collapse(3) private(i,j,k,x,y,z,sqrxy,costheta,sintheta) reduction(+:qlm_tmp)
1076        DO k=ps_r(3),ps_r(6)
1077          DO j=ps_r(2),ps_r(5)
1078            DO i=ps_r(1),ps_r(4)
1079              !----------------------------
1080              x = me_cs(1,i,j,k)
1081              y = me_cs(2,i,j,k)
1082              z = me_cs(3,i,j,k)
1083              !------------------------------------------------------------------------------
1084              sqrxy = DSQRT(x*x+y*y)
1085              !------------------------------------------------------------------------------
1086              costheta =     z*me_ri(1,i,j,k)
1087              sintheta = sqrxy*me_ri(1,i,j,k)
1088              !------------------------------------------------------------------------------
1089              qlm_tmp = qlm_tmp + rho(i,j,k)*me_rs(l,i,j,k)*get_plm(costheta,sintheta,l,m)*me_rc(m,i,j,k)*coef_l
1090            END DO ! i
1091          END DO ! j
1092        END DO ! k
1093        !$omp end parallel do
1094        qlm(l,m) = qlm_tmp
1095      END DO ! m
1096    END DO ! l
1097    !
1098    !---------------------------------------------------------------------------
1099    RETURN
1100    !---------------------------------------------------------------------------
1101END SUBROUTINE getqlm_cube
1102!===============================================================================
1103
1104!===============================================================================
1105SUBROUTINE exx_boundaryv_cube(me_r, ps_r, potme, qlm)
1106    !
1107    USE kinds,            ONLY  :  DP
1108    USE exx_module,       ONLY  :  lpole=>lmax
1109    USE exx_module,       ONLY  :  clm
1110    USE exx_module,       ONLY  :  me_cs
1111    USE exx_module,       ONLY  :  me_rc
1112    USE exx_module,       ONLY  :  me_ri
1113    USE exx_module,       ONLY  :  me_rs
1114    USE multipole_expansion, ONLY  : get_plm
1115    !
1116    IMPLICIT  NONE
1117    !--------------------------------------------------------------------
1118    ! pass in variables
1119    !--------------------------------------------------------------------
1120    INTEGER       :: me_r(6), ps_r(6)
1121    REAL(DP)      :: potme(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6))
1122    COMPLEX(DP)   :: qlm(0:lpole, 0:lpole)
1123    !--------------------------------------------------------------------
1124    !
1125    !--------------------------------------------------------------------
1126    ! local variables
1127    !--------------------------------------------------------------------
1128    INTEGER       :: i,j,k
1129    !------------------------------------------------------------------------------
1130    INTEGER       :: l, m
1131    INTEGER       :: ps_r1, ps_r2, ps_r3, ps_r4, ps_r5, ps_r6
1132    complex(dp)   :: cpot_r
1133    REAL(DP)      :: costheta, sintheta, x, y, z, sqrxy
1134    !------------------------------------------------------------------------------
1135
1136    ! WRITE(*,*) "multipole expansion pot"
1137    ! make ps_r as integer to improve cuf kernel perf
1138    ps_r1 = ps_r(1); ps_r2 = ps_r(2); ps_r3 = ps_r(3)
1139    ps_r4 = ps_r(4); ps_r5 = ps_r(5); ps_r6 = ps_r(6)
1140
1141    !------------------------------------------------------------------------------
1142    ! JJ: to continue
1143    !------------------------------------------------------------------------------
1144    !$omp parallel do private(x,y,z,sqrxy,costheta,sintheta,cpot_r) schedule(guided)
1145    DO k=me_r(3),me_r(6)
1146      DO j=me_r(2),me_r(5)
1147        DO i=me_r(1),me_r(4)
1148          !------------------------------------------------------------------------
1149          IF(.NOT.( (i.GE.ps_r1).AND.(i.LE.ps_r4).AND. &
1150                    (j.GE.ps_r2).AND.(j.LE.ps_r5).AND. &
1151                    (k.GE.ps_r3).AND.(k.LE.ps_r6) )) THEN
1152                  cpot_r = (0.0_DP,0.0_DP)
1153                  !------------------------------------------------------------------------------
1154                  x = me_cs(1,i,j,k) ! HK: TODO: dev var
1155                  y = me_cs(2,i,j,k)
1156                  z = me_cs(3,i,j,k)
1157                  !------------------------------------------------------------------------------
1158                  sqrxy = DSQRT(x*x+y*y)
1159                  !------------------------------------------------------------------------------
1160                  costheta =     z*me_ri(1,i,j,k) ! HK: TODO: GPU method should compute me_ri directly as 1/r
1161                  sintheta = sqrxy*me_ri(1,i,j,k)
1162                  !------------------------------------------------------------------------------
1163                  DO l=0,lpole
1164                    DO m=0,l
1165                      cpot_r = cpot_r + qlm(l,m)*me_ri(l+1,i,j,k)*get_plm(costheta,sintheta,l,m)*DCONJG(me_rc(m,i,j,k))
1166                      ! TODO: get_plm device function (need to be in mod)
1167                    END DO
1168                  END DO
1169                  potme(i,j,k) = dble(cpot_r)
1170          END IF
1171          !------------------------------------------------------------------------
1172        END DO
1173      END DO
1174    END DO
1175    !$omp end parallel do
1176    !------------------------------------------------------------------------------
1177    !------------------------------------------------------------------------------
1178    !
1179    !------------------------------------------------------------------------------
1180    RETURN
1181    !---------------------------------------------------------------------------
1182END SUBROUTINE exx_boundaryv_cube
1183!===========================================================================
1184
1185!===========================================================================
1186SUBROUTINE geterho_cube(me_r, ps_r, potme, rhops)
1187    !
1188    USE kinds,            ONLY  :  DP
1189    USE exx_module,       ONLY  :  nord2
1190    USE exx_module,       ONLY  :  coeke
1191    USE constants,        ONLY  :  eps12
1192    !
1193    IMPLICIT NONE
1194    !--------------------------------------------------------------------
1195    ! pass in variables
1196    !--------------------------------------------------------------------
1197    INTEGER       :: me_r(6), ps_r(6)
1198    REAL(DP)      :: potme(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6))
1199    REAL(DP)      :: rhops(ps_r(1):ps_r(4),ps_r(2):ps_r(5),ps_r(3):ps_r(6))
1200    !--------------------------------------------------------------------
1201    !
1202    !--------------------------------------------------------------------
1203    ! local variables
1204    !--------------------------------------------------------------------
1205    INTEGER       :: i,j,k,ish
1206    INTEGER       :: ps_r1, ps_r2, ps_r3, ps_r4, ps_r5, ps_r6
1207    !------------------------------------------------------------------------------
1208
1209    ! WRITE(*,*) "wrapped rho"
1210    ps_r1 = ps_r(1); ps_r2 = ps_r(2); ps_r3 = ps_r(3)
1211    ps_r4 = ps_r(4); ps_r5 = ps_r(5); ps_r6 = ps_r(6)
1212
1213    !------------------------------------------------------------------------------
1214    !$omp parallel do private(i,j,k,ish) schedule(guided)
1215    !------------------------------------------------------------------------------
1216    DO k=ps_r3,ps_r6
1217      DO j=ps_r2,ps_r5
1218        DO i=ps_r1,ps_r4
1219          !------------------------------------------------------------------------
1220          IF(.NOT.( (i.GE.ps_r1+nord2).AND.(i.LE.ps_r4-nord2).AND. &
1221                    (j.GE.ps_r2+nord2).AND.(j.LE.ps_r5-nord2).AND. &
1222                    (k.GE.ps_r3+nord2).AND.(k.LE.ps_r6-nord2) )) THEN
1223            !----------------------------------------------------------------------
1224            DO ish=1,nord2
1225              rhops(i,j,k) = rhops(i,j,k) - coeke(ish,1,1)*potme(i+ish, j,     k    ) &
1226                                          - coeke(ish,1,1)*potme(i-ish, j,     k    ) &
1227                                          - coeke(ish,2,2)*potme(i,     j+ish, k    ) &
1228                                          - coeke(ish,2,2)*potme(i,     j-ish, k    ) &
1229                                          - coeke(ish,3,3)*potme(i,     j,     k+ish) &
1230                                          - coeke(ish,3,3)*potme(i,     j,     k-ish) &
1231                                          - coeke(ish,1,2)*potme(i+ish, j+ish, k    ) &
1232                                          + coeke(ish,1,2)*potme(i+ish, j-ish, k    ) &
1233                                          + coeke(ish,1,2)*potme(i-ish, j+ish, k    ) &
1234                                          - coeke(ish,1,2)*potme(i-ish, j-ish, k    ) &
1235                                          - coeke(ish,1,3)*potme(i+ish, j,     k+ish) &
1236                                          + coeke(ish,1,3)*potme(i+ish, j,     k-ish) &
1237                                          + coeke(ish,1,3)*potme(i-ish, j,     k+ish) &
1238                                          - coeke(ish,1,3)*potme(i-ish, j,     k-ish) &
1239                                          - coeke(ish,2,3)*potme(i,     j+ish, k+ish) &
1240                                          + coeke(ish,2,3)*potme(i,     j+ish, k-ish) &
1241                                          + coeke(ish,2,3)*potme(i,     j-ish, k+ish) &
1242                                          - coeke(ish,2,3)*potme(i,     j-ish, k-ish)
1243            END DO
1244            !----------------------------------------------------------------------
1245          END IF
1246          !------------------------------------------------------------------------
1247          ! WRITE(*,"(I4,I4,I4,F15.11)") i,j,k, rhops(i,j,k)
1248          !------------------------------------------------------------------------
1249        END DO
1250      END DO
1251    END DO
1252    !$omp end parallel do
1253    !-----------------------------------------------------------------------
1254    RETURN
1255    !-----------------------------------------------------------------------
1256END SUBROUTINE geterho_cube
1257!==================================================================
1258
1259
1260!==================================================================
1261SUBROUTINE write_rho_pot(ps_r, rhops, rho_ps, pot_ps)
1262    USE kinds,            ONLY  :  DP
1263    !
1264    IMPLICIT  NONE
1265    !--------------------------------------------------------------------
1266    INTEGER       :: ps_r(6)
1267    REAL(DP)      :: rhops(ps_r(1):ps_r(4),ps_r(2):ps_r(5),ps_r(3):ps_r(6))
1268    REAL(DP)      :: rho_ps(ps_r(1):ps_r(4),ps_r(2):ps_r(5),ps_r(3):ps_r(6))
1269    REAL(DP)      :: pot_ps(ps_r(1):ps_r(4),ps_r(2):ps_r(5),ps_r(3):ps_r(6))
1270    !--------------------------------------------------------------------
1271    INTEGER       :: i,j,k
1272    !--------------------------------------------------------------------
1273    DO k=ps_r(3),ps_r(6)
1274      DO j=ps_r(2),ps_r(5)
1275        DO i=ps_r(1),ps_r(4)
1276        !----------------------------------------------------------------
1277        WRITE(*,"(I4,I4,I4,F15.11,F15.11,F15.11)") i,j,k, rhops(i,j,k),rho_ps(i,j,k),pot_ps(i,j,k)
1278        !----------------------------------------------------------------
1279        END DO
1280      END DO
1281    END DO
1282    !--------------------------------------------------------------------
1283    RETURN
1284    !--------------------------------------------------------------------
1285END SUBROUTINE write_rho_pot
1286!==================================================================
1287
1288!==================================================================
1289SUBROUTINE ps2me(me_r, ps_r, potme, potps)
1290    USE kinds,            ONLY  :  DP
1291    !
1292    IMPLICIT  NONE
1293    !--------------------------------------------------------------------
1294    INTEGER       :: me_r(6), ps_r(6)
1295    REAL(DP)      :: potme(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6))
1296    REAL(DP)      :: potps(ps_r(1):ps_r(4),ps_r(2):ps_r(5),ps_r(3):ps_r(6))
1297    !--------------------------------------------------------------------
1298    ! local variables
1299    !--------------------------------------------------------------------
1300    INTEGER       :: i,j,k
1301    INTEGER       :: ps_r1, ps_r2, ps_r3, ps_r4, ps_r5, ps_r6
1302    !------------------------------------------------------------------------------
1303    !
1304    ps_r1 = ps_r(1); ps_r2 = ps_r(2); ps_r3 = ps_r(3)
1305    ps_r4 = ps_r(4); ps_r5 = ps_r(5); ps_r6 = ps_r(6)
1306    !
1307    !$omp parallel do private(i,j,k)
1308    DO k=ps_r3,ps_r6
1309      DO j=ps_r2,ps_r5
1310        DO i=ps_r1,ps_r4
1311          potme(i,j,k) = potps(i,j,k)
1312        END DO
1313      END DO
1314    END DO
1315    !$omp end parallel do
1316    !--------------------------------------------------------------------
1317    !potme(ps_r(1):ps_r(4),ps_r(2):ps_r(5),ps_r(3):ps_r(6)) = potps(:,:,:)
1318    !--------------------------------------------------------------------
1319    RETURN
1320    !--------------------------------------------------------------------
1321END SUBROUTINE ps2me
1322!==================================================================
1323
1324!==================================================================
1325SUBROUTINE kernel_lr(me_r, klr_me, omega)
1326    USE kinds,            ONLY  :  DP
1327    USE exx_module,       ONLY  :  me_rs
1328    !
1329    IMPLICIT  NONE
1330    !--------------------------------------------------------------------
1331    INTEGER                :: me_r(6)
1332    REAL(DP)               :: klr_me(me_r(1):me_r(4),me_r(2):me_r(5),me_r(3):me_r(6))
1333    REAL(DP)               :: omega
1334    !--------------------------------------------------------------------
1335    REAL(DP)               :: r
1336    INTEGER                :: nb(3)
1337    INTEGER                :: i,j,k
1338    INTEGER                :: is,js,ks
1339    !--------------------------------------------------------------------
1340
1341    nb(1) = (me_r(4)-me_r(1)+1)
1342    nb(2) = (me_r(5)-me_r(2)+1)
1343    nb(3) = (me_r(6)-me_r(3)+1)
1344
1345    !--------------------------------------------------------------------
1346    !$omp parallel do private(i,j,k,r) schedule(guided)
1347    !--------------------------------------------------------------------
1348    DO k=me_r(3),me_r(6)
1349      DO j=me_r(2),me_r(5)
1350        DO i=me_r(1),me_r(4)
1351        !----------------------------------------------------------------
1352        is = MOD(i-me_r(1)+nb(1)/2, nb(1))+me_r(1)
1353        js = MOD(j-me_r(2)+nb(2)/2, nb(2))+me_r(2)
1354        ks = MOD(k-me_r(3)+nb(3)/2, nb(3))+me_r(3)
1355        !----------------------------------------------------------------
1356        r = me_rs(1,is,js,ks) * 2.0D0
1357        !----------------------------------------------------------------
1358        klr_me(i,j,k) = erf(omega*r)/r
1359        !----------------------------------------------------------------
1360        END DO
1361      END DO
1362    END DO
1363    !--------------------------------------------------------------------
1364    klr_me(me_r(1), me_r(2), me_r(3)) = (2*omega)/(3.1415926535897932D0**0.5D0)
1365    !--------------------------------------------------------------------
1366END SUBROUTINE kernel_lr
1367!==================================================================
1368
1369
1370!==================================================================
1371SUBROUTINE gaussian(ps_r, rho_ps, p)
1372    USE kinds,            ONLY  :  DP
1373    USE exx_module,       ONLY  :  me_rs
1374    !
1375    IMPLICIT  NONE
1376    !--------------------------------------------------------------------
1377    INTEGER                :: ps_r(6)
1378    REAL(DP)               :: rho_ps(ps_r(1):ps_r(4),ps_r(2):ps_r(5),ps_r(3):ps_r(6))
1379    REAL(DP)               :: p
1380    !--------------------------------------------------------------------
1381    REAL(DP)               :: r
1382    INTEGER                :: i,j,k
1383    !--------------------------------------------------------------------
1384
1385    !--------------------------------------------------------------------
1386    !$omp parallel do private(i,j,k,r) schedule(guided)
1387    !--------------------------------------------------------------------
1388    DO k=ps_r(3),ps_r(6)
1389      DO j=ps_r(2),ps_r(5)
1390        DO i=ps_r(1),ps_r(4)
1391        !----------------------------------------------------------------
1392        r = me_rs(1,i,j,k)
1393        !----------------------------------------------------------------
1394        rho_ps(i,j,k) = (p/3.1415926535897932D0)**1.5D0 * exp(-p * r**2.0D0)
1395        !----------------------------------------------------------------
1396        END DO
1397      END DO
1398    END DO
1399    !--------------------------------------------------------------------
1400END SUBROUTINE gaussian
1401!==================================================================
1402
1403