1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2019  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief different utils that are useful to manipulate splines on the regular
8!>      grid of a pw
9!> \par History
10!>      05.2003 created [fawzi]
11!>      08.2004 removed spline evaluation method using more than 2 read streams
12!>              (pw_compose_stripe_rs3), added linear solver based spline
13!>              inversion [fawzi]
14!> \author Fawzi Mohamed
15! **************************************************************************************************
16MODULE pw_spline_utils
17   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
18                                              cp_logger_type,&
19                                              cp_to_string
20   USE kinds,                           ONLY: dp
21   USE mathconstants,                   ONLY: twopi
22   USE message_passing,                 ONLY: mp_alltoall,&
23                                              mp_comm_compare,&
24                                              mp_sendrecv,&
25                                              mp_sum
26   USE pw_grid_types,                   ONLY: FULLSPACE,&
27                                              PW_MODE_LOCAL
28   USE pw_methods,                      ONLY: pw_axpy,&
29                                              pw_copy,&
30                                              pw_integral_ab,&
31                                              pw_zero
32   USE pw_pool_types,                   ONLY: pw_pool_create_pw,&
33                                              pw_pool_give_back_pw,&
34                                              pw_pool_release,&
35                                              pw_pool_retain,&
36                                              pw_pool_type
37   USE pw_types,                        ONLY: COMPLEXDATA1D,&
38                                              REALDATA3D,&
39                                              REALSPACE,&
40                                              RECIPROCALSPACE,&
41                                              pw_p_type,&
42                                              pw_type
43#include "../base/base_uses.f90"
44
45   IMPLICIT NONE
46   PRIVATE
47
48   LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
49   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'pw_spline_utils'
50
51   INTEGER, PARAMETER, PUBLIC               :: no_precond = 0, &
52                                               precond_spl3_aint = 1, &
53                                               precond_spl3_1 = 2, &
54                                               precond_spl3_aint2 = 3, &
55                                               precond_spl3_2 = 4, &
56                                               precond_spl3_3 = 5
57
58   REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(4) :: nn10_coeffs = &
59                                                     (/125._dp/216._dp, 25._dp/432._dp, 5._dp/864._dp, 1._dp/1728._dp/), &
60                                                     spline3_coeffs = &
61                                                     (/8._dp/(27._dp), 2._dp/(27._dp), 1._dp/(27._dp*2._dp), &
62                                                       1._dp/(27._dp*8._dp)/), &
63                                                     spline2_coeffs = &
64                                                     (/27._dp/(64._dp), 9._dp/(64._dp*2_dp), 3._dp/(64._dp*4._dp), &
65                                                       1._dp/(64._dp*8._dp)/), &
66                                                     nn50_coeffs = &
67                                                     (/15625._dp/17576._dp, 625._dp/35152._dp, 25._dp/70304._dp, &
68                                                       1._dp/140608._dp/), &
69                                                     spl3_aint_coeff = &
70                                                     (/46._dp/27._dp, -2._dp/(27._dp), -1._dp/(27._dp*2._dp), &
71                                                       -1._dp/(27._dp*8._dp)/), &
72                                                     spl3_precond1_coeff = &
73                                                     (/64._dp/3._dp, -8._dp/3._dp, -1._dp/3._dp, -1._dp/24._dp/), &
74                                                     spl3_1d_transf_coeffs = &
75                                                     (/2._dp/3._dp, 23._dp/48._dp, 1._dp/6._dp, 1._dp/48._dp/)
76
77   REAL(KIND=dp), PUBLIC, PARAMETER, DIMENSION(3) :: spline3_deriv_coeffs = &
78                                                     (/2.0_dp/9.0_dp, 1.0_dp/18.0_dp, 1.0_dp/72.0_dp/), &
79                                                     spline2_deriv_coeffs = &
80                                                     (/9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp/), &
81                                                     nn10_deriv_coeffs = &
82                                                     (/25._dp/72._dp, 5._dp/144, 1._dp/288._dp/), &
83                                                     nn50_deriv_coeffs = &
84                                                     (/625._dp/1352._dp, 25._dp/2704._dp, 1._dp/5408._dp/), &
85                                                     spl3_1d_coeffs0 = &
86                                                     (/1._dp/6_dp, 2._dp/3._dp, 1._dp/6._dp/), &
87                                                     spl3_1d_transf_border1 = &
88                                                     (/0.517977704_dp, 0.464044595_dp, 0.17977701e-1_dp/)
89
90   INTEGER, SAVE, PRIVATE :: last_precond_id = 0
91
92   PUBLIC :: pw_spline3_interpolate_values_g, &
93             pw_spline3_deriv_g
94   PUBLIC :: pw_spline_scale_deriv
95   PUBLIC :: pw_spline2_interpolate_values_g, &
96             pw_spline2_deriv_g
97   PUBLIC :: pw_nn_smear_r, pw_nn_deriv_r, &
98             spl3_nopbc, spl3_pbc, spl3_nopbct
99   PUBLIC :: add_fine2coarse, add_coarse2fine
100   PUBLIC :: pw_spline_precond_create, &
101             pw_spline_do_precond, &
102             pw_spline_precond_set_kind, &
103             find_coeffs, &
104             pw_spline_precond_release, &
105             pw_spline_precond_type, &
106             Eval_Interp_Spl3_pbc, &
107             Eval_d_Interp_Spl3_pbc
108
109!***
110
111! **************************************************************************************************
112!> \brief stores information for the preconditioner used to calculate the
113!>      coeffs of splines
114!> \author fawzi
115! **************************************************************************************************
116   TYPE pw_spline_precond_type
117      INTEGER :: ref_count, id_nr, kind
118      REAL(kind=dp), DIMENSION(4) :: coeffs
119      REAL(kind=dp), DIMENSION(3) :: coeffs_1d
120      LOGICAL :: sharpen, normalize, pbc, transpose
121      TYPE(pw_pool_type), POINTER :: pool
122   END TYPE pw_spline_precond_type
123
124CONTAINS
125
126! **************************************************************************************************
127!> \brief calculates the FFT of the coefficents of the quadratic spline that
128!>      interpolates the given values
129!> \param spline_g on entry the FFT of the values to interpolate as cc,
130!>        will contain the FFT of the coefficents of the spline
131!> \par History
132!>      06.2003 created [fawzi]
133!> \author Fawzi Mohamed
134!> \note
135!>      does not work with spherical cutoff
136! **************************************************************************************************
137   SUBROUTINE pw_spline2_interpolate_values_g(spline_g)
138      TYPE(pw_type), POINTER                             :: spline_g
139
140      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_interpolate_values_g', &
141         routineP = moduleN//':'//routineN
142
143      INTEGER                                            :: handle, i, ii, j, k
144      INTEGER, DIMENSION(2, 3)                           :: gbo
145      INTEGER, DIMENSION(3)                              :: n_tot
146      REAL(KIND=dp)                                      :: c23, coeff
147      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cosIVals, cosJVals, cosKVals
148
149      CALL timeset(routineN, handle)
150
151      n_tot(1:3) = spline_g%pw_grid%npts(1:3)
152      gbo = spline_g%pw_grid%bounds
153
154      CPASSERT(ASSOCIATED(spline_g))
155      CPASSERT(spline_g%in_use == COMPLEXDATA1D)
156      CPASSERT(spline_g%in_space == RECIPROCALSPACE)
157      CPASSERT(.NOT. spline_g%pw_grid%spherical)
158      CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
159
160      ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), cosJVals(gbo(1, 2):gbo(2, 2)), &
161                cosKVals(gbo(1, 3):gbo(2, 3)))
162
163      coeff = twopi/n_tot(1)
164!$OMP     PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
165      DO i = gbo(1, 1), gbo(2, 1)
166         cosIVals(i) = COS(coeff*REAL(i, dp))
167      END DO
168      coeff = twopi/n_tot(2)
169!$OMP     PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
170      DO j = gbo(1, 2), gbo(2, 2)
171         cosJVals(j) = COS(coeff*REAL(j, dp))
172      END DO
173      coeff = twopi/n_tot(3)
174!$OMP     PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
175      DO k = gbo(1, 3), gbo(2, 3)
176         cosKVals(k) = COS(coeff*REAL(k, dp))
177      END DO
178
179!$OMP     PARALLEL DO PRIVATE(i,j,k,ii,coeff,c23) DEFAULT(NONE) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
180      DO ii = 1, SIZE(spline_g%cc)
181         i = spline_g%pw_grid%g_hat(1, ii)
182         j = spline_g%pw_grid%g_hat(2, ii)
183         k = spline_g%pw_grid%g_hat(3, ii)
184
185         c23 = cosJVals(j)*cosKVals(k)
186         coeff = 64.0_dp/(cosIVals(i)*c23 + &
187                          (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*3.0_dp + &
188                          (cosIVals(i) + cosJVals(j) + cosKVals(k))*9.0_dp + &
189                          27.0_dp)
190
191         spline_g%cc(ii) = spline_g%cc(ii)*coeff
192
193      END DO
194      DEALLOCATE (cosIVals, cosJVals, cosKVals)
195
196      CALL timestop(handle)
197   END SUBROUTINE pw_spline2_interpolate_values_g
198
199! **************************************************************************************************
200!> \brief calculates the FFT of the coefficents of the2 cubic spline that
201!>      interpolates the given values
202!> \param spline_g on entry the FFT of the values to interpolate as cc,
203!>        will contain the FFT of the coefficents of the spline
204!> \par History
205!>      06.2003 created [fawzi]
206!> \author Fawzi Mohamed
207!> \note
208!>      does not work with spherical cutoff
209!>      stupid distribution for cos calculation, it should calculate only the
210!>      needed cos, and avoid the mp_sum
211! **************************************************************************************************
212   SUBROUTINE pw_spline3_interpolate_values_g(spline_g)
213      TYPE(pw_type), POINTER                             :: spline_g
214
215      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_interpolate_values_g', &
216         routineP = moduleN//':'//routineN
217
218      INTEGER                                            :: handle, i, ii, j, k
219      INTEGER, DIMENSION(2, 3)                           :: gbo
220      INTEGER, DIMENSION(3)                              :: n_tot
221      REAL(KIND=dp)                                      :: c23, coeff
222      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cosIVals, cosJVals, cosKVals
223
224      CALL timeset(routineN, handle)
225
226      n_tot(1:3) = spline_g%pw_grid%npts(1:3)
227      gbo = spline_g%pw_grid%bounds
228
229      CPASSERT(ASSOCIATED(spline_g))
230      CPASSERT(spline_g%in_use == COMPLEXDATA1D)
231      CPASSERT(spline_g%in_space == RECIPROCALSPACE)
232      CPASSERT(.NOT. spline_g%pw_grid%spherical)
233      CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
234
235      ALLOCATE (cosIVals(gbo(1, 1):gbo(2, 1)), &
236                cosJVals(gbo(1, 2):gbo(2, 2)), &
237                cosKVals(gbo(1, 3):gbo(2, 3)))
238
239      coeff = twopi/n_tot(1)
240!$OMP     PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(cosIVals,coeff,gbo)
241      DO i = gbo(1, 1), gbo(2, 1)
242         cosIVals(i) = COS(coeff*REAL(i, dp))
243      END DO
244      coeff = twopi/n_tot(2)
245!$OMP     PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(cosJVals,coeff,gbo)
246      DO j = gbo(1, 2), gbo(2, 2)
247         cosJVals(j) = COS(coeff*REAL(j, dp))
248      END DO
249      coeff = twopi/n_tot(3)
250!$OMP     PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(cosKVals,coeff,gbo)
251      DO k = gbo(1, 3), gbo(2, 3)
252         cosKVals(k) = COS(coeff*REAL(k, dp))
253      END DO
254
255!$OMP     PARALLEL DO DEFAULT(NONE) PRIVATE(i,j,k,ii,coeff,c23) SHARED(spline_g,cosIVals,cosJVals,cosKVals)
256      DO ii = 1, SIZE(spline_g%cc)
257         i = spline_g%pw_grid%g_hat(1, ii)
258         j = spline_g%pw_grid%g_hat(2, ii)
259         k = spline_g%pw_grid%g_hat(3, ii)
260         ! no opt
261!FM                coeff=1.0/((cosVal(1)*cosVal(2)*cosVal(3))/27.0_dp+&
262!FM                     (cosVal(1)*cosVal(2)+cosVal(1)*cosVal(3)+&
263!FM                     cosVal(2)*cosVal(3))*2.0_dp/27.0_dp+&
264!FM                     (cosVal(1)+cosVal(2)+cosVal(3))*4.0_dp/27.0_dp+&
265!FM                     8.0_dp/27.0_dp)
266         ! opt
267         c23 = cosJVals(j)*cosKVals(k)
268         coeff = 27.0_dp/(cosIVals(i)*c23 + &
269                          (cosIVals(i)*cosJVals(j) + cosIVals(i)*cosKVals(k) + c23)*2.0_dp + &
270                          (cosIVals(i) + cosJVals(j) + cosKVals(k))*4.0_dp + &
271                          8.0_dp)
272
273         spline_g%cc(ii) = spline_g%cc(ii)*coeff
274
275      END DO
276      DEALLOCATE (cosIVals, cosJVals, cosKVals)
277
278      CALL timestop(handle)
279   END SUBROUTINE pw_spline3_interpolate_values_g
280
281! **************************************************************************************************
282!> \brief rescales the derivatives from gridspacing=1 to the real derivatives
283!> \param deriv_vals_r an array of x,y,z derivatives
284!> \param transpose if true applies the transpose of the map (defaults to
285!>        false)
286!> \param scale a scaling factor (defaults to 1.0)
287!> \par History
288!>      06.2003 created [fawzi]
289!> \author Fawzi Mohamed
290! **************************************************************************************************
291   SUBROUTINE pw_spline_scale_deriv(deriv_vals_r, transpose, scale)
292      TYPE(pw_p_type), DIMENSION(3)                      :: deriv_vals_r
293      LOGICAL, INTENT(in), OPTIONAL                      :: transpose
294      REAL(KIND=dp), INTENT(in), OPTIONAL                :: scale
295
296      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_scale_deriv', &
297         routineP = moduleN//':'//routineN
298
299      INTEGER                                            :: handle, i, idir, j, k
300      INTEGER, DIMENSION(2, 3)                           :: bo
301      INTEGER, DIMENSION(3)                              :: n_tot
302      LOGICAL                                            :: diag, my_transpose
303      REAL(KIND=dp)                                      :: dVal1, dVal2, dVal3, my_scale, scalef
304      REAL(KIND=dp), DIMENSION(3, 3)                     :: dh_inv, h_grid
305      REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: ddata, ddata2, ddata3
306
307      CALL timeset(routineN, handle)
308
309      my_transpose = .FALSE.
310      IF (PRESENT(transpose)) my_transpose = transpose
311      my_scale = 1.0_dp
312      IF (PRESENT(scale)) my_scale = scale
313      n_tot(1:3) = deriv_vals_r(1)%pw%pw_grid%npts(1:3)
314      bo = deriv_vals_r(1)%pw%pw_grid%bounds_local
315      dh_inv = deriv_vals_r(1)%pw%pw_grid%dh_inv
316
317      ! map grid to real derivative
318      diag = .TRUE.
319      IF (my_transpose) THEN
320         DO j = 1, 3
321            DO i = 1, 3
322               h_grid(j, i) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
323               IF (i /= j .AND. h_grid(j, i) /= 0.0_dp) diag = .FALSE.
324            END DO
325         END DO
326      ELSE
327         DO j = 1, 3
328            DO i = 1, 3
329               h_grid(i, j) = my_scale*dh_inv(i, j) ! REAL(n_tot(i),dp)*cell_h_inv(i,j)
330               IF (i /= j .AND. h_grid(i, j) /= 0.0_dp) diag = .FALSE.
331            END DO
332         END DO
333      END IF
334
335      IF (diag) THEN
336         DO idir = 1, 3
337            ddata => deriv_vals_r(idir)%pw%cr3d
338            scalef = h_grid(idir, idir)
339            CALL dscal((bo(2, 1) - bo(1, 1) + 1)*(bo(2, 2) - bo(1, 2) + 1)*(bo(2, 3) - bo(1, 3) + 1), &
340                       scalef, ddata, 1)
341         END DO
342      ELSE
343         ddata => deriv_vals_r(1)%pw%cr3d
344         ddata2 => deriv_vals_r(2)%pw%cr3d
345         ddata3 => deriv_vals_r(3)%pw%cr3d
346!$OMP        PARALLEL DO DEFAULT(NONE) PRIVATE(k,j,i,dVal1,dVal2,dVal3) &
347!$OMP                 SHARED(ddata,ddata2,ddata3,h_grid,bo)
348         DO k = bo(1, 3), bo(2, 3)
349            DO j = bo(1, 2), bo(2, 2)
350               DO i = bo(1, 1), bo(2, 1)
351
352                  dVal1 = ddata(i, j, k)
353                  dVal2 = ddata2(i, j, k)
354                  dVal3 = ddata3(i, j, k)
355
356                  ddata(i, j, k) = h_grid(1, 1)*dVal1 + &
357                                   h_grid(2, 1)*dVal2 + h_grid(3, 1)*dVal3
358                  ddata2(i, j, k) = h_grid(1, 2)*dVal1 + &
359                                    h_grid(2, 2)*dVal2 + h_grid(3, 2)*dVal3
360                  ddata3(i, j, k) = h_grid(1, 3)*dVal1 + &
361                                    h_grid(2, 3)*dVal2 + h_grid(3, 3)*dVal3
362
363               END DO
364            END DO
365         END DO
366      END IF
367
368      CALL timestop(handle)
369   END SUBROUTINE pw_spline_scale_deriv
370
371! **************************************************************************************************
372!> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
373!>      derivative of the cubic spline
374!> \param spline_g on entry the FFT of the coefficents of the spline
375!>        will contain the FFT of the derivative
376!> \param idir direction of the derivative
377!> \par History
378!>      06.2003 created [fawzi]
379!> \author Fawzi Mohamed
380!> \note
381!>      the distance between gridpoints is assumed to be 1
382! **************************************************************************************************
383   SUBROUTINE pw_spline3_deriv_g(spline_g, idir)
384      TYPE(pw_type), POINTER                             :: spline_g
385      INTEGER, INTENT(in)                                :: idir
386
387      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline3_deriv_g', &
388         routineP = moduleN//':'//routineN
389      REAL(KIND=dp), PARAMETER                           :: inv9 = 1.0_dp/9.0_dp
390
391      INTEGER                                            :: handle, i, ii, j, k
392      INTEGER, DIMENSION(2, 3)                           :: bo, gbo
393      INTEGER, DIMENSION(3)                              :: n, n_tot
394      REAL(KIND=dp)                                      :: coeff, tmp
395      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: csIVals, csJVals, csKVals
396
397      CALL timeset(routineN, handle)
398
399      n(1:3) = spline_g%pw_grid%npts_local(1:3)
400      n_tot(1:3) = spline_g%pw_grid%npts(1:3)
401      bo = spline_g%pw_grid%bounds_local
402      gbo = spline_g%pw_grid%bounds
403
404      CPASSERT(ASSOCIATED(spline_g))
405      CPASSERT(spline_g%in_use == COMPLEXDATA1D)
406      CPASSERT(spline_g%in_space == RECIPROCALSPACE)
407      CPASSERT(.NOT. spline_g%pw_grid%spherical)
408      CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
409
410      ALLOCATE (csIVals(gbo(1, 1):gbo(2, 1)), &
411                csJVals(gbo(1, 2):gbo(2, 2)), &
412                csKVals(gbo(1, 3):gbo(2, 3)))
413
414      coeff = twopi/n_tot(1)
415      IF (idir == 1) THEN
416!$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
417         DO i = gbo(1, 1), gbo(2, 1)
418            csIVals(i) = SIN(coeff*REAL(i, dp))
419         END DO
420      ELSE
421!$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(gbo,csIVals,coeff)
422         DO i = gbo(1, 1), gbo(2, 1)
423            csIVals(i) = COS(coeff*REAL(i, dp))
424         END DO
425      END IF
426      coeff = twopi/n_tot(2)
427      IF (idir == 2) THEN
428!$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
429         DO j = gbo(1, 2), gbo(2, 2)
430            csJVals(j) = SIN(coeff*REAL(j, dp))
431         END DO
432      ELSE
433!$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(gbo,csJVals,coeff)
434         DO j = gbo(1, 2), gbo(2, 2)
435            csJVals(j) = COS(coeff*REAL(j, dp))
436         END DO
437      END IF
438      coeff = twopi/n_tot(3)
439      IF (idir == 3) THEN
440!$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
441         DO k = gbo(1, 3), gbo(2, 3)
442            csKVals(k) = SIN(coeff*REAL(k, dp))
443         END DO
444      ELSE
445!$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(gbo,csKVals,coeff)
446         DO k = gbo(1, 3), gbo(2, 3)
447            csKVals(k) = COS(coeff*REAL(k, dp))
448         END DO
449      END IF
450
451      SELECT CASE (idir)
452      CASE (1)
453         ! x deriv
454!$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
455         DO ii = 1, SIZE(spline_g%cc)
456            i = spline_g%pw_grid%g_hat(1, ii)
457            j = spline_g%pw_grid%g_hat(2, ii)
458            k = spline_g%pw_grid%g_hat(3, ii)
459!FM                ! formula
460!FM                coeff=(sinVal(1)*cosVal(2)*cosVal(3))/9.0_dp+&
461!FM                     (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*2.0_dp/9.0_dp+&
462!FM                     sinVal(1)*4.0_dp/9.0_dp
463            tmp = csIVals(i)*csJVals(j)
464            coeff = (tmp*csKVals(k) + &
465                     (tmp + csIVals(i)*csKVals(k))*2.0_dp + &
466                     csIVals(i)*4.0_dp)*inv9
467
468            spline_g%cc(ii) = spline_g%cc(ii)* &
469                              CMPLX(0.0_dp, coeff, dp)
470         END DO
471      CASE (2)
472         ! y deriv
473!$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
474         DO ii = 1, SIZE(spline_g%cc)
475            i = spline_g%pw_grid%g_hat(1, ii)
476            j = spline_g%pw_grid%g_hat(2, ii)
477            k = spline_g%pw_grid%g_hat(3, ii)
478
479            tmp = csIVals(i)*csJVals(j)
480            coeff = (tmp*csKVals(k) + &
481                     (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
482                     csJVals(j)*4.0_dp)*inv9
483
484            spline_g%cc(ii) = spline_g%cc(ii)* &
485                              CMPLX(0.0_dp, coeff, dp)
486         END DO
487      CASE (3)
488         ! z deriv
489!$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
490         DO ii = 1, SIZE(spline_g%cc)
491            i = spline_g%pw_grid%g_hat(1, ii)
492            j = spline_g%pw_grid%g_hat(2, ii)
493            k = spline_g%pw_grid%g_hat(3, ii)
494
495            tmp = csIVals(i)*csKVals(k)
496            coeff = (tmp*csJVals(j) + &
497                     (tmp + csJVals(j)*csKVals(k))*2.0_dp + &
498                     csKVals(k)*4.0_dp)*inv9
499
500            spline_g%cc(ii) = spline_g%cc(ii)* &
501                              CMPLX(0.0_dp, coeff, dp)
502         END DO
503      END SELECT
504
505      DEALLOCATE (csIVals, csJVals, csKVals)
506
507      CALL timestop(handle)
508   END SUBROUTINE pw_spline3_deriv_g
509
510! **************************************************************************************************
511!> \brief calculates the FFT of the values of the x,y,z (idir=1,2,3)
512!>      derivative of the quadratic spline
513!> \param spline_g on entry the FFT of the coefficents of the spline
514!>        will contain the FFT of the derivative
515!> \param idir direction of the derivative
516!> \par History
517!>      06.2003 created [fawzi]
518!> \author Fawzi Mohamed
519!> \note
520!>      the distance between gridpoints is assumed to be 1
521! **************************************************************************************************
522   SUBROUTINE pw_spline2_deriv_g(spline_g, idir)
523      TYPE(pw_type), POINTER                             :: spline_g
524      INTEGER, INTENT(in)                                :: idir
525
526      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline2_deriv_g', &
527         routineP = moduleN//':'//routineN
528      REAL(KIND=dp), PARAMETER                           :: inv16 = 1.0_dp/16.0_dp
529
530      INTEGER                                            :: handle, i, ii, j, k
531      INTEGER, DIMENSION(2, 3)                           :: bo
532      INTEGER, DIMENSION(3)                              :: n, n_tot
533      REAL(KIND=dp)                                      :: coeff, tmp
534      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: csIVals, csJVals, csKVals
535
536      CALL timeset(routineN, handle)
537
538      n(1:3) = spline_g%pw_grid%npts_local(1:3)
539      n_tot(1:3) = spline_g%pw_grid%npts(1:3)
540      bo = spline_g%pw_grid%bounds
541
542      CPASSERT(ASSOCIATED(spline_g))
543      CPASSERT(spline_g%in_use == COMPLEXDATA1D)
544      CPASSERT(spline_g%in_space == RECIPROCALSPACE)
545      CPASSERT(.NOT. spline_g%pw_grid%spherical)
546      CPASSERT(spline_g%pw_grid%grid_span == FULLSPACE)
547
548      ALLOCATE (csIVals(bo(1, 1):bo(2, 1)), csJVals(bo(1, 2):bo(2, 2)), &
549                csKVals(bo(1, 3):bo(2, 3)))
550
551      coeff = twopi/n_tot(1)
552      IF (idir == 1) THEN
553!$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
554         DO i = bo(1, 1), bo(2, 1)
555            csIVals(i) = SIN(coeff*REAL(i, dp))
556         END DO
557      ELSE
558!$OMP        PARALLEL DO PRIVATE(i) DEFAULT(NONE) SHARED(bo,coeff,csIVals)
559         DO i = bo(1, 1), bo(2, 1)
560            csIVals(i) = COS(coeff*REAL(i, dp))
561         END DO
562      END IF
563      coeff = twopi/n_tot(2)
564      IF (idir == 2) THEN
565!$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
566         DO j = bo(1, 2), bo(2, 2)
567            csJVals(j) = SIN(coeff*REAL(j, dp))
568         END DO
569      ELSE
570!$OMP        PARALLEL DO PRIVATE(j) DEFAULT(NONE) SHARED(bo,coeff,csJVals)
571         DO j = bo(1, 2), bo(2, 2)
572            csJVals(j) = COS(coeff*REAL(j, dp))
573         END DO
574      END IF
575      coeff = twopi/n_tot(3)
576      IF (idir == 3) THEN
577!$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
578         DO k = bo(1, 3), bo(2, 3)
579            csKVals(k) = SIN(coeff*REAL(k, dp))
580         END DO
581      ELSE
582!$OMP        PARALLEL DO PRIVATE(k) DEFAULT(NONE) SHARED(bo,coeff,csKVals)
583         DO k = bo(1, 3), bo(2, 3)
584            csKVals(k) = COS(coeff*REAL(k, dp))
585         END DO
586      END IF
587
588      SELECT CASE (idir)
589      CASE (1)
590         ! x deriv
591!$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) SHARED(spline_g,csIVals,csJVals,csKVals) DEFAULT(NONE)
592         DO ii = 1, SIZE(spline_g%cc)
593            i = spline_g%pw_grid%g_hat(1, ii)
594            j = spline_g%pw_grid%g_hat(2, ii)
595            k = spline_g%pw_grid%g_hat(3, ii)
596!FM                ! formula
597!FM                coeff=(sinVal(1)*cosVal(2)*cosVal(3))/16.0_dp+&
598!FM                     (sinVal(1)*cosVal(2)+sinVal(1)*cosVal(3))*3.0_dp/16.0_dp+&
599!FM                     sinVal(1)*9.0_dp/16.0_dp
600            tmp = csIVals(i)*csJVals(j)
601            coeff = (tmp*csKVals(k) + &
602                     (tmp + csIVals(i)*csKVals(k))*3.0_dp + &
603                     csIVals(i)*9.0_dp)*inv16
604
605            spline_g%cc(ii) = spline_g%cc(ii)* &
606                              CMPLX(0.0_dp, coeff, dp)
607         END DO
608      CASE (2)
609         ! y deriv
610!$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
611         DO ii = 1, SIZE(spline_g%cc)
612            i = spline_g%pw_grid%g_hat(1, ii)
613            j = spline_g%pw_grid%g_hat(2, ii)
614            k = spline_g%pw_grid%g_hat(3, ii)
615
616            tmp = csIVals(i)*csJVals(j)
617            coeff = (tmp*csKVals(k) + &
618                     (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
619                     csJVals(j)*9.0_dp)*inv16
620
621            spline_g%cc(ii) = spline_g%cc(ii)* &
622                              CMPLX(0.0_dp, coeff, dp)
623         END DO
624      CASE (3)
625         ! z deriv
626!$OMP        PARALLEL DO PRIVATE(ii,k,j,i,coeff,tmp) DEFAULT(NONE) SHARED(spline_g,csIVals,csJVals,csKVals)
627         DO ii = 1, SIZE(spline_g%cc)
628            i = spline_g%pw_grid%g_hat(1, ii)
629            j = spline_g%pw_grid%g_hat(2, ii)
630            k = spline_g%pw_grid%g_hat(3, ii)
631
632            tmp = csIVals(i)*csKVals(k)
633            coeff = (tmp*csJVals(j) + &
634                     (tmp + csJVals(j)*csKVals(k))*3.0_dp + &
635                     csKVals(k)*9.0_dp)*inv16
636
637            spline_g%cc(ii) = spline_g%cc(ii)* &
638                              CMPLX(0.0_dp, coeff, dp)
639         END DO
640      END SELECT
641
642      DEALLOCATE (csIVals, csJVals, csKVals)
643
644      CALL timestop(handle)
645   END SUBROUTINE pw_spline2_deriv_g
646
647! **************************************************************************************************
648!> \brief applies a nearest neighbor linear operator to a stripe in x direction:
649!>      out_val(i)=sum(weight(j)*in_val(i+j-1),j=0..2)
650!> \param weights the weights of the linear operator
651!> \param in_val the argument to the operator
652!> \param in_val_first the first argument (needed to calculate out_val(1))
653!> \param in_val_last the last argument (needed to calculate out_val(n_el))
654!> \param out_val the place where the result is accumulated
655!> \param n_el the number of elements in in_v and out_v
656!> \par History
657!>      04.2004 created [fawzi]
658!> \author fawzi
659!> \note
660!>      uses 2 read streams and 1 write stream
661! **************************************************************************************************
662   SUBROUTINE pw_compose_stripe(weights, in_val, in_val_first, in_val_last, &
663                                out_val, n_el)
664      REAL(kind=dp), DIMENSION(0:2), INTENT(in)          :: weights
665      REAL(kind=dp), DIMENSION(*), INTENT(in)            :: in_val
666      REAL(kind=dp), INTENT(in)                          :: in_val_first, in_val_last
667      REAL(kind=dp), DIMENSION(*), INTENT(inout)         :: out_val
668      INTEGER                                            :: n_el
669
670      CHARACTER(len=*), PARAMETER :: routineN = 'pw_compose_stripe', &
671         routineP = moduleN//':'//routineN
672
673      INTEGER                                            :: i
674      REAL(kind=dp)                                      :: v0, v1, v2
675
676!1:n_el), &
677!1:n_el), &
678
679      IF (n_el < 1) RETURN
680      v0 = in_val_first
681      v1 = in_val(1)
682      IF (weights(1) == 0.0_dp) THEN
683         ! optimized version for x deriv
684         DO i = 1, n_el - 3, 3
685            v2 = in_val(i + 1)
686            out_val(i) = out_val(i) + &
687                         weights(0)*v0 + &
688                         weights(2)*v2
689            v0 = in_val(i + 2)
690            out_val(i + 1) = out_val(i + 1) + &
691                             weights(0)*v1 + &
692                             weights(2)*v0
693            v1 = in_val(i + 3)
694            out_val(i + 2) = out_val(i + 2) + &
695                             weights(0)*v2 + &
696                             weights(2)*v1
697         END DO
698      ELSE
699         ! generic version
700         DO i = 1, n_el - 3, 3
701            v2 = in_val(i + 1)
702            out_val(i) = out_val(i) + &
703                         weights(0)*v0 + &
704                         weights(1)*v1 + &
705                         weights(2)*v2
706            v0 = in_val(i + 2)
707            out_val(i + 1) = out_val(i + 1) + &
708                             weights(0)*v1 + &
709                             weights(1)*v2 + &
710                             weights(2)*v0
711            v1 = in_val(i + 3)
712            out_val(i + 2) = out_val(i + 2) + &
713                             weights(0)*v2 + &
714                             weights(1)*v0 + &
715                             weights(2)*v1
716         END DO
717      END IF
718      SELECT CASE (MODULO(n_el - 1, 3))
719      CASE (0)
720         v2 = in_val_last
721         out_val(n_el) = out_val(n_el) + &
722                         weights(0)*v0 + &
723                         weights(1)*v1 + &
724                         weights(2)*v2
725      CASE (1)
726         v2 = in_val(n_el)
727         out_val(n_el - 1) = out_val(n_el - 1) + &
728                             weights(0)*v0 + &
729                             weights(1)*v1 + &
730                             weights(2)*v2
731         v0 = in_val_last
732         out_val(n_el) = out_val(n_el) + &
733                         weights(0)*v1 + &
734                         weights(1)*v2 + &
735                         weights(2)*v0
736      CASE (2)
737         v2 = in_val(n_el - 1)
738         out_val(n_el - 2) = out_val(n_el - 2) + &
739                             weights(0)*v0 + &
740                             weights(1)*v1 + &
741                             weights(2)*v2
742         v0 = in_val(n_el)
743         out_val(n_el - 1) = out_val(n_el - 1) + &
744                             weights(0)*v1 + &
745                             weights(1)*v2 + &
746                             weights(2)*v0
747         v1 = in_val_last
748         out_val(n_el) = out_val(n_el) + &
749                         weights(0)*v2 + &
750                         weights(1)*v0 + &
751                         weights(2)*v1
752      END SELECT
753
754   END SUBROUTINE pw_compose_stripe
755
756! **************************************************************************************************
757!> \brief private routine that computes pw_nn_compose_r (it seems that without
758!>      passing arrays in this way either some compiler do a copyin/out (xlf)
759!>      or by inlining suboptimal code is produced (nag))
760!> \param weights a 3x3x3 array with the linear operator
761!> \param in_val the argument for the linear operator
762!> \param out_val place where the value of the linear oprator should be added
763!> \param pw_in pw to be able to get the needed meta data about in_val and
764!>        out_val
765!> \param bo boundaries of in_val and out_val
766!> \author fawzi
767! **************************************************************************************************
768   SUBROUTINE pw_nn_compose_r_work(weights, in_val, out_val, pw_in, bo)
769      REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2)            :: weights
770      INTEGER, DIMENSION(2, 3)                           :: bo
771      TYPE(pw_type), POINTER                             :: pw_in
772      REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
773         2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(inout)  :: out_val
774      REAL(kind=dp), DIMENSION(bo(1, 1):bo(2, 1), bo(1, &
775         2):bo(2, 2), bo(1, 3):bo(2, 3)), INTENT(in)     :: in_val
776
777      CHARACTER(len=*), PARAMETER :: routineN = 'pw_nn_compose_r_work', &
778         routineP = moduleN//':'//routineN
779
780      INTEGER                                            :: i, j, jw, k, kw, myj, myk
781      INTEGER, DIMENSION(2, 3)                           :: gbo
782      INTEGER, DIMENSION(3)                              :: s
783      LOGICAL                                            :: has_boundary, yderiv, zderiv
784      REAL(kind=dp)                                      :: in_val_f, in_val_l
785      REAL(kind=dp), DIMENSION(:, :), POINTER            :: l_boundary, tmp, u_boundary
786
787      zderiv = ALL(weights(:, :, 1) == 0.0_dp)
788      yderiv = ALL(weights(:, 1, :) == 0.0_dp)
789      bo = pw_in%pw_grid%bounds_local
790      gbo = pw_in%pw_grid%bounds
791      DO i = 1, 3
792         s(i) = bo(2, i) - bo(1, i) + 1
793      END DO
794      IF (ANY(s < 1)) RETURN
795      has_boundary = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
796                         pw_in%pw_grid%bounds(:, 1))
797      IF (has_boundary) THEN
798         ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
799                   u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
800                   tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
801         tmp(:, :) = pw_in%cr3d(bo(2, 1), :, :)
802         CALL mp_sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
803                          gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
804                          l_boundary, pw_in%pw_grid%para%pos_of_x( &
805                          gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
806                          pw_in%pw_grid%para%group)
807         tmp(:, :) = pw_in%cr3d(bo(1, 1), :, :)
808         CALL mp_sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
809                          gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
810                          u_boundary, pw_in%pw_grid%para%pos_of_x( &
811                          gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
812                          pw_in%pw_grid%para%group)
813         DEALLOCATE (tmp)
814      END IF
815
816!$OMP   PARALLEL DO DEFAULT(NONE) PRIVATE(k,kw,myk,j,jw,myj,in_val_f,&
817!$OMP       in_val_l) SHARED(zderiv,yderiv,bo,in_val,out_val,s,l_boundary,&
818!$OMP       u_boundary,weights,has_boundary)
819      DO k = 0, s(3) - 1
820         DO kw = 0, 2
821            myk = bo(1, 3) + MODULO(k + kw - 1, s(3))
822            IF (zderiv .AND. kw == 1) CYCLE
823            DO j = 0, s(2) - 1
824               DO jw = 0, 2
825                  myj = bo(1, 2) + MODULO(j + jw - 1, s(2))
826                  IF (yderiv .AND. jw == 1) CYCLE
827                  IF (has_boundary) THEN
828                     in_val_f = l_boundary(myj, myk)
829                     in_val_l = u_boundary(myj, myk)
830                  ELSE
831                     in_val_f = in_val(bo(2, 1), myj, myk)
832                     in_val_l = in_val(bo(1, 1), myj, myk)
833                  END IF
834                  CALL pw_compose_stripe(weights=weights(:, jw, kw), &
835                                         in_val=in_val(:, myj, myk), &
836                                         in_val_first=in_val_f, in_val_last=in_val_l, &
837                                         out_val=out_val(:, bo(1, 2) + j, bo(1, 3) + k), n_el=s(1))
838               END DO
839            END DO
840         END DO
841      END DO
842      IF (has_boundary) THEN
843         DEALLOCATE (l_boundary, u_boundary)
844      END IF
845   END SUBROUTINE pw_nn_compose_r_work
846
847! **************************************************************************************************
848!> \brief applies a nearest neighbor linear operator to a pw in real space
849!> \param weights a 3x3x3 array with the linear operator
850!> \param pw_in the argument for the linear operator
851!> \param pw_out place where the value of the linear oprator should be added
852!> \author fawzi
853!> \note
854!>      has specialized versions for derivative operator (with central values==0)
855! **************************************************************************************************
856   SUBROUTINE pw_nn_compose_r(weights, pw_in, pw_out)
857      REAL(kind=dp), DIMENSION(0:2, 0:2, 0:2)            :: weights
858      TYPE(pw_type), POINTER                             :: pw_in, pw_out
859
860      CHARACTER(len=*), PARAMETER :: routineN = 'pw_nn_compose_r', &
861         routineP = moduleN//':'//routineN
862
863      INTEGER                                            :: handle
864
865      CALL timeset(routineN, handle)
866      CPASSERT(ASSOCIATED(pw_in))
867      CPASSERT(pw_in%in_space == REALSPACE)
868      CPASSERT(pw_in%in_use == REALDATA3D)
869      CPASSERT(ASSOCIATED(pw_out))
870      CPASSERT(pw_out%in_space == REALSPACE)
871      CPASSERT(pw_out%in_use == REALDATA3D)
872      IF (.NOT. ALL(pw_in%pw_grid%bounds_local(:, 2:3) == pw_in%pw_grid%bounds(:, 2:3))) THEN
873         CPABORT("wrong pw distribution")
874      END IF
875      CALL pw_nn_compose_r_work(weights=weights, in_val=pw_in%cr3d, &
876                                out_val=pw_out%cr3d, pw_in=pw_in, bo=pw_in%pw_grid%bounds_local)
877      CALL timestop(handle)
878   END SUBROUTINE pw_nn_compose_r
879
880! **************************************************************************************************
881!> \brief calculates the values of a nearest neighbor smearing
882!> \param pw_in the argument for the linear operator
883!> \param pw_out place where the smeared values should be added
884!> \param coeffs array with the coefficent of the smearing, ordered with
885!>        the distance from the center: coeffs(1) the coeff of the central
886!>        element, coeffs(2) the coeff of the 6 element with distance 1,
887!>        coeff(3) the coeff of the 12 elements at distance sqrt(2),
888!>        coeff(4) the coeff of the 8 elements at distance sqrt(3).
889!> \author Fawzi Mohamed
890!> \note
891!>      does not normalize the smear to 1.
892!>      with coeff=(/ 8._dp/27._dp, 2._dp/27._dp, 1._dp/54._dp, 1._dp/216._dp /)
893!>      is equivalent to pw_spline3_evaluate_values_g, with
894!>      coeff=(/ 27._dp/64._dp, 9._dp/128._dp, 3._dp/256._dp, 1._dp/512._dp /)
895! **************************************************************************************************
896   SUBROUTINE pw_nn_smear_r(pw_in, pw_out, coeffs)
897      TYPE(pw_type), POINTER                             :: pw_in, pw_out
898      REAL(KIND=dp), DIMENSION(4), INTENT(in)            :: coeffs
899
900      CHARACTER(len=*), PARAMETER :: routineN = 'pw_nn_smear_r', routineP = moduleN//':'//routineN
901
902      INTEGER                                            :: i, j, k
903      REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1)         :: weights
904
905      DO k = -1, 1
906         DO j = -1, 1
907            DO i = -1, 1
908               weights(i, j, k) = coeffs(ABS(i) + ABS(j) + ABS(k) + 1)
909            END DO
910         END DO
911      END DO
912
913      CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
914   END SUBROUTINE pw_nn_smear_r
915
916! **************************************************************************************************
917!> \brief calculates a nearest neighbor central derivative.
918!>      for the x dir:
919!>      pw_out%cr3d(i,j,k)=( pw_in(i+1,j,k)-pw_in(i-1,j,k) )*coeff(1)+
920!>             ( pw_in(i+1,j(+-)1,k)-pw_in(i-1,j(+-)1,k)+
921!>               pw_in(i+1,j,k(+-)1)-pw_in(i-1,j,k(+-)1) )*coeff(2)+
922!>             ( pw_in(i+1,j(+-)1,k(+-)1)-pw_in(i-1,j(+-)1,k(+-)1)+
923!>               pw_in(i+1,j(+-)1,k(-+)1)-pw_in(i-1,j(+-)1,k(-+)1) )*coeff(3)
924!>      periodic boundary conditions are applied
925!> \param pw_in the argument for the linear operator
926!> \param pw_out place where the smeared values should be added
927!> \param coeffs array with the coefficent of the front (positive) plane
928!>        of the central derivative, ordered with
929!>        the distance from the center: coeffs(1) the coeff of the central
930!>        element, coeffs(2) the coeff of the 4 element with distance 1,
931!>        coeff(3) the coeff of the 4 elements at distance sqrt(2)
932!> \param idir ...
933!> \author Fawzi Mohamed
934!> \note
935!>      with coeff=(/ 2.0_dp/9.0_dp,  1.0_dp/18.0_dp, 1.0_dp/72.0_dp /)
936!>      is equivalent to pw_spline3_deriv_r, with
937!>      coeff=(/ 9.0_dp/32.0_dp, 3.0_dp/64.0_dp, 1.0_dp/128.0_dp /)
938!>      to pw_spline2_deriv_r
939!>      coeff=(/ 25._dp/72._dp, 5._dp/144, 1._dp/288._dp /)
940! **************************************************************************************************
941   SUBROUTINE pw_nn_deriv_r(pw_in, pw_out, coeffs, idir)
942      TYPE(pw_type), POINTER                             :: pw_in, pw_out
943      REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: coeffs
944      INTEGER                                            :: idir
945
946      CHARACTER(len=*), PARAMETER :: routineN = 'pw_nn_deriv_r', routineP = moduleN//':'//routineN
947
948      INTEGER                                            :: i, idirVal, j, k
949      REAL(kind=dp), DIMENSION(-1:1, -1:1, -1:1)         :: weights
950
951      DO k = -1, 1
952         DO j = -1, 1
953            DO i = -1, 1
954               SELECT CASE (idir)
955               CASE (1)
956                  idirVal = i
957               CASE (2)
958                  idirVal = j
959               CASE (3)
960                  idirVal = k
961               CASE default
962                  CPABORT("invalid idir ("//TRIM(cp_to_string(idir))//")")
963               END SELECT
964               IF (idirVal == 0) THEN
965                  weights(i, j, k) = 0.0_dp
966               ELSE
967                  weights(i, j, k) = REAL(idirVal, dp)*coeffs(ABS(i) + ABS(j) + ABS(k))
968               END IF
969            END DO
970         END DO
971      END DO
972
973      CALL pw_nn_compose_r(weights=weights, pw_in=pw_in, pw_out=pw_out)
974   END SUBROUTINE pw_nn_deriv_r
975
976! **************************************************************************************************
977!> \brief low level function that adds a coarse grid
978!>      to a fine grid.
979!>      If pbc is true periodic boundary conditions are applied
980!>
981!>      It will add to
982!>
983!>        fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
984!>                    2*coarse_bounds(1,2):2*coarse_bounds(2,2),
985!>                    2*coarse_bounds(1,3):2*coarse_bounds(2,3))
986!>
987!>      using
988!>
989!>        coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
990!>                      coarse_bounds(1,2):coarse_bounds(2,2),
991!>                      coarse_bounds(1,3):coarse_bounds(2,3))
992!>
993!>      composed with the weights obtained by the direct product of the
994!>      1d coefficents weights:
995!>
996!>      for i,j,k in -3..3
997!>         w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
998!>                  weights_1d(abs(k)+1)
999!> \param coarse_coeffs_pw the values of the coefficients
1000!> \param fine_values_pw where to add the values due to the
1001!>        coarse coeffs
1002!> \param weights_1d the weights of the 1d smearing
1003!> \param w_border0 the 1d weight at the border (when pbc is false)
1004!> \param w_border1 the 1d weights for a point one off the border
1005!>        (w_border1(1) is the weight of the coefficent at the border)
1006!>        (used if pbc is false)
1007!> \param pbc if periodic boundary conditions should be applied
1008!> \param safe_computation ...
1009!> \author fawzi
1010!> \note
1011!>      coarse looping is continuos, I did not check if keeping the fine looping
1012!>      contiguos is better.
1013!>      And I ask myself yet again why, why we use x-slice distribution,
1014!>      z-slice distribution would be much better performancewise
1015!>      (and would semplify this code enormously).
1016!>      fine2coarse has much more understandable parallel part (build up of
1017!>      send/rcv sizes,... but worse if you have really a lot of processors,
1018!>      probabily irrelevant because it is not critical) [fawzi].
1019! **************************************************************************************************
1020   SUBROUTINE add_coarse2fine(coarse_coeffs_pw, fine_values_pw, &
1021                              weights_1d, w_border0, w_border1, pbc, safe_computation)
1022      TYPE(pw_type), POINTER                             :: coarse_coeffs_pw, fine_values_pw
1023      REAL(kind=dp), DIMENSION(4), INTENT(in)            :: weights_1d
1024      REAL(kind=dp), INTENT(in)                          :: w_border0
1025      REAL(kind=dp), DIMENSION(3), INTENT(in)            :: w_border1
1026      LOGICAL, INTENT(in)                                :: pbc
1027      LOGICAL, INTENT(in), OPTIONAL                      :: safe_computation
1028
1029      CHARACTER(len=*), PARAMETER :: routineN = 'add_coarse2fine', &
1030         routineP = moduleN//':'//routineN
1031
1032      INTEGER :: coarse_slice_size, f_shift(3), fi, fi_lb, fi_ub, fj, fk, handle, handle2, i, ii, &
1033         ij, ik, ip, j, k, my_lb, my_ub, n_procs, p, p_lb, p_old, p_ub, rcv_tot_size, rest_b, &
1034         s(3), send_tot_size, sf, shift, ss, x, x_att, xx
1035      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rcv_offset, rcv_size, real_rcv_size, &
1036                                                            send_offset, send_size, sent_size
1037      INTEGER, DIMENSION(2, 3)                           :: coarse_bo, coarse_gbo, fine_bo, &
1038                                                            fine_gbo, my_coarse_bo
1039      INTEGER, DIMENSION(:), POINTER                     :: pos_of_x
1040      LOGICAL                                            :: has_i_lbound, has_i_ubound, is_split, &
1041                                                            safe_calc
1042      REAL(kind=dp)                                      :: v0, v1, v2, v3, wi, wj, wk
1043      REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: rcv_buf, send_buf
1044      REAL(kind=dp), DIMENSION(3)                        :: w_0, ww0
1045      REAL(kind=dp), DIMENSION(4)                        :: w_1, ww1
1046      REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: coarse_coeffs, fine_values
1047
1048      CALL timeset(routineN, handle)
1049!    CALL timeset(routineN//"_pre",handle2)
1050      safe_calc = .FALSE.
1051      IF (PRESENT(safe_computation)) safe_calc = safe_computation
1052      CALL mp_comm_compare(coarse_coeffs_pw%pw_grid%para%group, &
1053                           fine_values_pw%pw_grid%para%group, ii)
1054      IF (ii > 1) THEN
1055         CPABORT("")
1056      END IF
1057      my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
1058      coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
1059      fine_bo = fine_values_pw%pw_grid%bounds_local
1060      fine_gbo = fine_values_pw%pw_grid%bounds
1061      f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
1062      DO j = 2, 3
1063         DO i = 1, 2
1064            coarse_bo(i, j) = FLOOR((fine_bo(i, j) - f_shift(j))/2.)
1065         END DO
1066      END DO
1067      IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
1068         coarse_bo(1, 1) = FLOOR((fine_bo(1, 1) - 2 - f_shift(1))/2.)
1069         coarse_bo(2, 1) = FLOOR((fine_bo(2, 1) + 3 - f_shift(1))/2.)
1070      ELSE
1071         coarse_bo(1, 1) = coarse_gbo(2, 1)
1072         coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
1073      END IF
1074      is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
1075      IF (.NOT. is_split .OR. .NOT. pbc) THEN
1076         coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
1077         coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
1078      END IF
1079      has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
1080      has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
1081
1082      IF (pbc) THEN
1083         CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1084         CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + 1 + f_shift))
1085      ELSE
1086         CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
1087         CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1088      END IF
1089
1090      coarse_coeffs => coarse_coeffs_pw%cr3d
1091      DO i = 1, 3
1092         s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
1093      END DO
1094!       CALL timestop(handle2)
1095      ! *** parallel case
1096      IF (is_split) THEN
1097         CALL timeset(routineN//"_comm", handle2)
1098         coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
1099                             (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
1100         n_procs = coarse_coeffs_pw%pw_grid%para%group_size
1101         ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
1102                   sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
1103                   rcv_offset(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
1104
1105         ! ** rcv size count
1106
1107         pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
1108         p_old = pos_of_x(coarse_gbo(1, 1) &
1109                          + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
1110         rcv_size = 0
1111         DO x = coarse_bo(1, 1), coarse_bo(2, 1)
1112            p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1113            rcv_size(p) = rcv_size(p) + coarse_slice_size
1114         END DO
1115
1116         ! ** send size count
1117
1118         pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
1119         sf = fine_gbo(2, 1) - fine_gbo(1, 1) + 1
1120         fi_lb = 2*my_coarse_bo(1, 1) - 3 + f_shift(1)
1121         fi_ub = 2*my_coarse_bo(2, 1) + 3 + f_shift(1)
1122         IF (.NOT. pbc) THEN
1123            fi_lb = MAX(fi_lb, fine_gbo(1, 1))
1124            fi_ub = MIN(fi_ub, fine_gbo(2, 1))
1125         ELSE
1126            fi_ub = MIN(fi_ub, fi_lb + sf - 1)
1127         END IF
1128         p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
1129         p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
1130         send_size = 0
1131         DO x = fi_lb, fi_ub
1132            p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
1133            IF (p /= p_old) THEN
1134               p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2.)
1135
1136               send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
1137                                                      - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
1138
1139               IF (pbc) THEN
1140                  DO xx = p_lb, coarse_gbo(1, 1) - 1
1141                     x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1142                     IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1143                        send_size(p_old) = send_size(p_old) + coarse_slice_size
1144                     END IF
1145                  END DO
1146                  DO xx = coarse_gbo(2, 1) + 1, p_ub
1147                     x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1148                     IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1149                        send_size(p_old) = send_size(p_old) + coarse_slice_size
1150                     END IF
1151                  END DO
1152               END IF
1153
1154               p_old = p
1155               p_lb = FLOOR((x - 2 - f_shift(1))/2.)
1156            END IF
1157         END DO
1158         p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
1159
1160         send_size(p_old) = send_size(p_old) + (MIN(p_ub, my_coarse_bo(2, 1)) &
1161                                                - MAX(p_lb, my_coarse_bo(1, 1)) + 1)*coarse_slice_size
1162
1163         IF (pbc) THEN
1164            DO xx = p_lb, coarse_gbo(1, 1) - 1
1165               x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1166               IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1167                  send_size(p_old) = send_size(p_old) + coarse_slice_size
1168               END IF
1169            END DO
1170            DO xx = coarse_gbo(2, 1) + 1, p_ub
1171               x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1172               IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1173                  send_size(p_old) = send_size(p_old) + coarse_slice_size
1174               END IF
1175            END DO
1176         END IF
1177         ! ** offsets & alloc send-rcv
1178
1179         send_tot_size = 0
1180         DO ip = 0, n_procs - 1
1181            send_offset(ip) = send_tot_size
1182            send_tot_size = send_tot_size + send_size(ip)
1183         END DO
1184         ALLOCATE (send_buf(0:send_tot_size - 1))
1185
1186         rcv_tot_size = 0
1187         DO ip = 0, n_procs - 1
1188            rcv_offset(ip) = rcv_tot_size
1189            rcv_tot_size = rcv_tot_size + rcv_size(ip)
1190         END DO
1191         IF (.NOT. rcv_tot_size == (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) THEN
1192            CPABORT("Error calculating rcv_tot_size ")
1193         END IF
1194         ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
1195
1196         ! ** fill send buffer
1197
1198         p_old = pos_of_x(fine_gbo(1, 1) + MODULO(fi_lb - fine_gbo(1, 1), sf))
1199         p_lb = FLOOR((fi_lb - 2 - f_shift(1))/2.)
1200         sent_size(:) = send_offset
1201         ss = my_coarse_bo(2, 1) - my_coarse_bo(1, 1) + 1
1202         DO x = fi_lb, fi_ub
1203            p = pos_of_x(fine_gbo(1, 1) + MODULO(x - fine_gbo(1, 1), sf))
1204            IF (p /= p_old) THEN
1205               shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
1206                       FLOOR((x - 1 - f_shift(1))/2._dp)
1207               p_ub = FLOOR((x - 1 + 3 - f_shift(1))/2._dp)
1208
1209               IF (pbc) THEN
1210                  DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
1211                     x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), sf)
1212                     IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1213                        CALL dcopy(coarse_slice_size, &
1214                                   coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1215                                                 my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
1216                        sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1217                     END IF
1218                  END DO
1219               END IF
1220
1221               ii = sent_size(p_old)
1222               DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1223                  DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1224                     DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
1225                        send_buf(ii) = coarse_coeffs(i, j, k)
1226                        ii = ii + 1
1227                     END DO
1228                  END DO
1229               END DO
1230               sent_size(p_old) = ii
1231
1232               IF (pbc) THEN
1233                  DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
1234                     x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1235                     IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1236                        CALL dcopy(coarse_slice_size, &
1237                                   coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1238                                                 my_coarse_bo(1, 3)), ss, &
1239                                   send_buf(sent_size(p_old)), 1)
1240                        sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1241                     END IF
1242                  END DO
1243               END IF
1244
1245               p_old = p
1246               p_lb = FLOOR((x - 2 - f_shift(1))/2.)
1247            END IF
1248         END DO
1249         shift = FLOOR((fine_gbo(1, 1) + MODULO(x - 1 - fine_gbo(1, 1), sf) - f_shift(1))/2._dp) - &
1250                 FLOOR((x - 1 - f_shift(1))/2._dp)
1251         p_ub = FLOOR((fi_ub + 3 - f_shift(1))/2.)
1252
1253         IF (pbc) THEN
1254            DO xx = p_lb + shift, coarse_gbo(1, 1) - 1
1255               x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1256               IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1257                  CALL dcopy(coarse_slice_size, &
1258                             coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1259                                           my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
1260                  sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1261               END IF
1262            END DO
1263         END IF
1264
1265         ii = sent_size(p_old)
1266         DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1267            DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1268               DO i = MAX(p_lb + shift, my_coarse_bo(1, 1)), MIN(p_ub + shift, my_coarse_bo(2, 1))
1269                  send_buf(ii) = coarse_coeffs(i, j, k)
1270                  ii = ii + 1
1271               END DO
1272            END DO
1273         END DO
1274         sent_size(p_old) = ii
1275
1276         IF (pbc) THEN
1277            DO xx = coarse_gbo(2, 1) + 1, p_ub + shift
1278               x_att = coarse_gbo(1, 1) + MODULO(xx - coarse_gbo(1, 1), s(1))
1279               IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
1280                  CALL dcopy(coarse_slice_size, &
1281                             coarse_coeffs(x_att, my_coarse_bo(1, 2), &
1282                                           my_coarse_bo(1, 3)), ss, send_buf(sent_size(p_old)), 1)
1283                  sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1284               END IF
1285            END DO
1286         END IF
1287
1288         CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
1289         CPASSERT(sent_size(n_procs - 1) == send_tot_size)
1290         ! test send/rcv sizes
1291         CALL mp_alltoall(send_size, real_rcv_size, 1, coarse_coeffs_pw%pw_grid%para%group)
1292         CPASSERT(ALL(real_rcv_size == rcv_size))
1293         ! all2all
1294         CALL mp_alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
1295                          rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset, &
1296                          group=coarse_coeffs_pw%pw_grid%para%group)
1297
1298         ! ** reorder rcv buffer
1299         ! (actually reordering should be needed only with pbc)
1300
1301         ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
1302                                 coarse_bo(1, 2):coarse_bo(2, 2), &
1303                                 coarse_bo(1, 3):coarse_bo(2, 3)))
1304
1305         my_lb = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
1306         my_ub = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
1307         pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
1308         sent_size(:) = rcv_offset
1309         ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
1310         DO x = my_ub + 1, coarse_bo(2, 1)
1311            p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1312            CALL dcopy(coarse_slice_size, &
1313                       rcv_buf(sent_size(p_old)), 1, &
1314                       coarse_coeffs(x, coarse_bo(1, 2), &
1315                                     coarse_bo(1, 3)), ss)
1316            sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1317         END DO
1318         p_old = pos_of_x(coarse_gbo(1, 1) &
1319                          + MODULO(my_lb - coarse_gbo(1, 1), s(1)))
1320         p_lb = my_lb
1321         DO x = my_lb, my_ub
1322            p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1323            IF (p /= p_old) THEN
1324               p_ub = x - 1
1325
1326               ii = sent_size(p_old)
1327               DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1328                  DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1329                     DO i = p_lb, p_ub
1330                        coarse_coeffs(i, j, k) = rcv_buf(ii)
1331                        ii = ii + 1
1332                     END DO
1333                  END DO
1334               END DO
1335               sent_size(p_old) = ii
1336
1337               p_lb = x
1338               p_old = p
1339            END IF
1340            rcv_size(p) = rcv_size(p) + coarse_slice_size
1341         END DO
1342         p_ub = my_ub
1343         ii = sent_size(p_old)
1344         DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1345            DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1346               DO i = p_lb, p_ub
1347                  coarse_coeffs(i, j, k) = rcv_buf(ii)
1348                  ii = ii + 1
1349               END DO
1350            END DO
1351         END DO
1352         sent_size(p_old) = ii
1353         DO x = coarse_bo(1, 1), my_lb - 1
1354            p_old = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
1355            CALL dcopy(coarse_slice_size, &
1356                       rcv_buf(sent_size(p_old)), 1, &
1357                       coarse_coeffs(x, coarse_bo(1, 2), &
1358                                     coarse_bo(1, 3)), ss)
1359            sent_size(p_old) = sent_size(p_old) + coarse_slice_size
1360         END DO
1361
1362         CPASSERT(ALL(sent_size(0:n_procs - 2) == rcv_offset(1:)))
1363         CPASSERT(sent_size(n_procs - 1) == rcv_tot_size)
1364
1365         ! dealloc
1366         DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
1367         DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
1368         CALL timestop(handle2)
1369
1370      END IF
1371      fine_values => fine_values_pw%cr3d
1372      w_0 = (/weights_1d(3), weights_1d(1), weights_1d(3)/)
1373      w_1 = (/weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)/)
1374
1375      DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1376         DO ik = -3, 3
1377            IF (pbc) THEN
1378               wk = weights_1d(ABS(ik) + 1)
1379               fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
1380            ELSE
1381               fk = 2*k + ik + f_shift(3)
1382               IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
1383                  IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
1384                  IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
1385                     IF (ik /= 0) CYCLE
1386                     wk = w_border0
1387                  ELSE IF (fk == 2*coarse_bo(1, 3) + 1 + f_shift(3)) THEN
1388                     SELECT CASE (ik)
1389                     CASE (1)
1390                        wk = w_border1(1)
1391                     CASE (-1)
1392                        wk = w_border1(2)
1393                     CASE (-3)
1394                        wk = w_border1(3)
1395                     CASE default
1396                        CPABORT("")
1397                        CYCLE
1398                     END SELECT
1399                  ELSE
1400                     SELECT CASE (ik)
1401                     CASE (3)
1402                        wk = w_border1(3)
1403                     CASE (1)
1404                        wk = w_border1(2)
1405                     CASE (-1)
1406                        wk = w_border1(1)
1407                     CASE default
1408                        CPABORT("")
1409                        CYCLE
1410                     END SELECT
1411                  END IF
1412               ELSE
1413                  wk = weights_1d(ABS(ik) + 1)
1414               END IF
1415            END IF
1416            DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1417               DO ij = -3, 3
1418                  IF (pbc) THEN
1419                     wj = weights_1d(ABS(ij) + 1)*wk
1420                     fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), 2*s(2))
1421                  ELSE
1422                     fj = 2*j + ij + f_shift(2)
1423                     IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
1424                        IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
1425                        IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
1426                           IF (ij /= 0) CYCLE
1427                           wj = w_border0*wk
1428                        ELSE IF (fj == 2*coarse_bo(1, 2) + 1 + f_shift(2)) THEN
1429                           SELECT CASE (ij)
1430                           CASE (1)
1431                              wj = w_border1(1)*wk
1432                           CASE (-1)
1433                              wj = w_border1(2)*wk
1434                           CASE (-3)
1435                              wj = w_border1(3)*wk
1436                           CASE default
1437                              CYCLE
1438                           END SELECT
1439                        ELSE
1440                           SELECT CASE (ij)
1441                           CASE (-1)
1442                              wj = w_border1(1)*wk
1443                           CASE (1)
1444                              wj = w_border1(2)*wk
1445                           CASE (3)
1446                              wj = w_border1(3)*wk
1447                           CASE default
1448                              CYCLE
1449                           END SELECT
1450                        END IF
1451                     ELSE
1452                        wj = weights_1d(ABS(ij) + 1)*wk
1453                     END IF
1454                  END IF
1455
1456                  IF (fine_bo(2, 1) - fine_bo(1, 1) < 7 .OR. safe_calc) THEN
1457!                      CALL timeset(routineN//"_safe",handle2)
1458                     DO i = coarse_bo(1, 1), coarse_bo(2, 1)
1459                        DO ii = -3, 3
1460                           IF (pbc .AND. .NOT. is_split) THEN
1461                              wi = weights_1d(ABS(ii) + 1)*wj
1462                              fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
1463                           ELSE
1464                              fi = 2*i + ii + f_shift(1)
1465                              IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
1466                              IF (.NOT. pbc .AND. (fi <= fine_gbo(1, 1) + 1 .OR. &
1467                                                   fi >= fine_gbo(2, 1) - 1)) THEN
1468                                 IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
1469                                    IF (ii /= 0) CYCLE
1470                                    wi = w_border0*wj
1471                                 ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
1472                                    SELECT CASE (ii)
1473                                    CASE (1)
1474                                       wi = w_border1(1)*wj
1475                                    CASE (-1)
1476                                       wi = w_border1(2)*wj
1477                                    CASE (-3)
1478                                       wi = w_border1(3)*wj
1479                                    CASE default
1480                                       CYCLE
1481                                    END SELECT
1482                                 ELSE
1483                                    SELECT CASE (ii)
1484                                    CASE (-1)
1485                                       wi = w_border1(1)*wj
1486                                    CASE (1)
1487                                       wi = w_border1(2)*wj
1488                                    CASE (3)
1489                                       wi = w_border1(3)*wj
1490                                    CASE default
1491                                       CYCLE
1492                                    END SELECT
1493                                 END IF
1494                              ELSE
1495                                 wi = weights_1d(ABS(ii) + 1)*wj
1496                              END IF
1497                           END IF
1498                           fine_values(fi, fj, fk) = &
1499                              fine_values(fi, fj, fk) + &
1500                              wi*coarse_coeffs(i, j, k)
1501                        END DO
1502                     END DO
1503!                      CALL timestop(handle2)
1504                  ELSE
1505!                      CALL timeset(routineN//"_core1",handle2)
1506                     ww0 = wj*w_0
1507                     ww1 = wj*w_1
1508                     IF (pbc .AND. .NOT. is_split) THEN
1509                        v3 = coarse_coeffs(coarse_bo(2, 1), j, k)
1510                        i = coarse_bo(1, 1)
1511                        fi = 2*i + f_shift(1)
1512                        v0 = coarse_coeffs(i, j, k)
1513                        v1 = coarse_coeffs(i + 1, j, k)
1514                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1515                                                  ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
1516                        v2 = coarse_coeffs(i + 2, j, k)
1517                        fi = fi + 1
1518                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1519                                                  ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1520                     ELSE IF (.NOT. has_i_lbound) THEN
1521                        i = coarse_bo(1, 1)
1522                        fi = 2*i + f_shift(1)
1523                        v0 = coarse_coeffs(i, j, k)
1524                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1525                                                  w_border0*wj*v0
1526                        v1 = coarse_coeffs(i + 1, j, k)
1527                        v2 = coarse_coeffs(i + 2, j, k)
1528                        fi = fi + 1
1529                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1530                                                  wj*(w_border1(1)*v0 + w_border1(2)*v1 + &
1531                                                      w_border1(3)*v2)
1532                     ELSE
1533                        i = coarse_bo(1, 1)
1534                        v0 = coarse_coeffs(i, j, k)
1535                        v1 = coarse_coeffs(i + 1, j, k)
1536                        v2 = coarse_coeffs(i + 2, j, k)
1537                        fi = 2*i + f_shift(1) + 1
1538                        IF (.NOT. (fi + 1 == fine_bo(1, 1) .OR. &
1539                                   fi + 2 == fine_bo(1, 1))) THEN
1540                           CALL cp_abort(__LOCATION__, &
1541                                         "unexpected start index "// &
1542                                         TRIM(cp_to_string(coarse_bo(1, 1)))//" "// &
1543                                         TRIM(cp_to_string(fi)))
1544                        END IF
1545                     END IF
1546                     fi = fi + 1
1547                     IF (fi >= fine_bo(1, 1)) THEN
1548                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1549                                                  ww0(1)*v0 + ww0(2)*v1 + &
1550                                                  ww0(3)*v2
1551                     ELSE
1552                        CPASSERT(fi + 1 == fine_bo(1, 1))
1553                     END IF
1554!                      CALL timestop(handle2)
1555!                      CALL timeset(routineN//"_core",handle2)
1556                     DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - 3, 4
1557                        v3 = coarse_coeffs(i, j, k)
1558                        fi = fi + 1
1559                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1560                                                  (ww1(1)*v0 + ww1(2)*v1 + &
1561                                                   ww1(3)*v2 + ww1(4)*v3)
1562                        fi = fi + 1
1563                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1564                                                  (ww0(1)*v1 + ww0(2)*v2 + &
1565                                                   ww0(3)*v3)
1566                        v0 = coarse_coeffs(i + 1, j, k)
1567                        fi = fi + 1
1568                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1569                                                  (ww1(4)*v0 + ww1(1)*v1 + &
1570                                                   ww1(2)*v2 + ww1(3)*v3)
1571                        fi = fi + 1
1572                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1573                                                  (ww0(1)*v2 + ww0(2)*v3 + &
1574                                                   ww0(3)*v0)
1575                        v1 = coarse_coeffs(i + 2, j, k)
1576                        fi = fi + 1
1577                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1578                                                  (ww1(3)*v0 + ww1(4)*v1 + &
1579                                                   ww1(1)*v2 + ww1(2)*v3)
1580                        fi = fi + 1
1581                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1582                                                  (ww0(1)*v3 + ww0(2)*v0 + &
1583                                                   ww0(3)*v1)
1584                        v2 = coarse_coeffs(i + 3, j, k)
1585                        fi = fi + 1
1586                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1587                                                  (ww1(2)*v0 + ww1(3)*v1 + &
1588                                                   ww1(4)*v2 + ww1(1)*v3)
1589                        fi = fi + 1
1590                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1591                                                  (ww0(1)*v0 + ww0(2)*v1 + &
1592                                                   ww0(3)*v2)
1593                     END DO
1594!                      CALL timestop(handle2)
1595!                      CALL timeset(routineN//"_clean",handle2)
1596                     rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - coarse_bo(1, 1) - 3 + 1, 4)
1597                     IF (rest_b > 0) THEN
1598                        i = FLOOR((fine_bo(2, 1) - f_shift(1))/2.) - rest_b + 1
1599                        v3 = coarse_coeffs(i, j, k)
1600                        fi = fi + 1
1601                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1602                                                  (ww1(1)*v0 + ww1(2)*v1 + &
1603                                                   ww1(3)*v2 + ww1(4)*v3)
1604                        fi = fi + 1
1605                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1606                                                  (ww0(1)*v1 + ww0(2)*v2 + &
1607                                                   ww0(3)*v3)
1608                        IF (rest_b > 1) THEN
1609                           v0 = coarse_coeffs(i + 1, j, k)
1610                           fi = fi + 1
1611                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1612                                                     (ww1(4)*v0 + ww1(1)*v1 + &
1613                                                      ww1(2)*v2 + ww1(3)*v3)
1614                           fi = fi + 1
1615                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1616                                                     (ww0(1)*v2 + ww0(2)*v3 + &
1617                                                      ww0(3)*v0)
1618                           IF (rest_b > 2) THEN
1619                              v1 = coarse_coeffs(i + 2, j, k)
1620                              fi = fi + 1
1621                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1622                                                        (ww1(3)*v0 + ww1(4)*v1 + &
1623                                                         ww1(1)*v2 + ww1(2)*v3)
1624                              fi = fi + 1
1625                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1626                                                        (ww0(1)*v3 + ww0(2)*v0 + &
1627                                                         ww0(3)*v1)
1628                              IF (pbc .AND. .NOT. is_split) THEN
1629                                 v2 = coarse_coeffs(coarse_bo(1, 1), j, k)
1630                                 fi = fi + 1
1631                                 fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1632                                                           ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1633                                 fi = fi + 1
1634                                 fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1635                                                           ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
1636                                 v3 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1637                                 fi = fi + 1
1638                                 fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1639                                                           ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1640                              ELSE IF (has_i_ubound) THEN
1641                                 v2 = coarse_coeffs(i + 3, j, k)
1642                                 fi = fi + 1
1643                                 fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1644                                                           ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1645                                 fi = fi + 1
1646                                 fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1647                                                           ww0(1)*v0 + ww0(2)*v1 + ww0(3)*v2
1648                                 IF (fi + 1 == fine_bo(2, 1)) THEN
1649                                    v3 = coarse_coeffs(i + 4, j, k)
1650                                    fi = fi + 1
1651                                    fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1652                                                              ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1653                                 END IF
1654                              ELSE
1655                                 fi = fi + 1
1656                                 fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1657                                                           wj*(w_border1(3)*v3 + w_border1(2)*v0 + &
1658                                                               w_border1(1)*v1)
1659                                 fi = fi + 1
1660                                 fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1661                                                           w_border0*wj*v1
1662                              END IF
1663                           ELSE IF (pbc .AND. .NOT. is_split) THEN
1664                              v1 = coarse_coeffs(coarse_bo(1, 1), j, k)
1665                              fi = fi + 1
1666                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1667                                                        ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1668                              fi = fi + 1
1669                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1670                                                        ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
1671                              v2 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1672                              fi = fi + 1
1673                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1674                                                        ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1675                           ELSE IF (has_i_ubound) THEN
1676                              v1 = coarse_coeffs(i + 2, j, k)
1677                              fi = fi + 1
1678                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1679                                                        ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1680                              fi = fi + 1
1681                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1682                                                        ww0(1)*v3 + ww0(2)*v0 + ww0(3)*v1
1683                              IF (fi + 1 == fine_bo(2, 1)) THEN
1684                                 v2 = coarse_coeffs(i + 3, j, k)
1685                                 fi = fi + 1
1686                                 fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1687                                                           ww1(1)*v3 + ww1(2)*v0 + ww1(3)*v1 + ww1(4)*v2
1688                              END IF
1689                           ELSE
1690                              fi = fi + 1
1691                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1692                                                        wj*(w_border1(3)*v2 + w_border1(2)*v3 + &
1693                                                            w_border1(1)*v0)
1694                              fi = fi + 1
1695                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1696                                                        w_border0*wj*v0
1697                           END IF
1698                        ELSE IF (pbc .AND. .NOT. is_split) THEN
1699                           v0 = coarse_coeffs(coarse_bo(1, 1), j, k)
1700                           fi = fi + 1
1701                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1702                                                     ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1703                           fi = fi + 1
1704                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1705                                                     ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
1706                           v1 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1707                           fi = fi + 1
1708                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1709                                                     ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1710                        ELSE IF (has_i_ubound) THEN
1711                           v0 = coarse_coeffs(i + 1, j, k)
1712                           fi = fi + 1
1713                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1714                                                     ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1715                           fi = fi + 1
1716                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1717                                                     ww0(1)*v2 + ww0(2)*v3 + ww0(3)*v0
1718                           IF (fi + 1 == fine_bo(2, 1)) THEN
1719                              v1 = coarse_coeffs(i + 2, j, k)
1720                              fi = fi + 1
1721                              fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1722                                                        ww1(1)*v2 + ww1(2)*v3 + ww1(3)*v0 + ww1(4)*v1
1723                           END IF
1724                        ELSE
1725                           fi = fi + 1
1726                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1727                                                     wj*(w_border1(3)*v1 + w_border1(2)*v2 + &
1728                                                         w_border1(1)*v3)
1729                           fi = fi + 1
1730                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1731                                                     w_border0*wj*v3
1732                        END IF
1733                     ELSE IF (pbc .AND. .NOT. is_split) THEN
1734                        v3 = coarse_coeffs(coarse_bo(1, 1), j, k)
1735                        fi = fi + 1
1736                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1737                                                  ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1738                        fi = fi + 1
1739                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1740                                                  ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
1741                        v0 = coarse_coeffs(coarse_bo(1, 1) + 1, j, k)
1742                        fi = fi + 1
1743                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1744                                                  ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1745                     ELSE IF (has_i_ubound) THEN
1746                        v3 = coarse_coeffs(i, j, k)
1747                        fi = fi + 1
1748                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1749                                                  ww1(1)*v0 + ww1(2)*v1 + ww1(3)*v2 + ww1(4)*v3
1750                        fi = fi + 1
1751                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1752                                                  ww0(1)*v1 + ww0(2)*v2 + ww0(3)*v3
1753                        IF (fi + 1 == fine_bo(2, 1)) THEN
1754                           v0 = coarse_coeffs(i + 1, j, k)
1755                           fi = fi + 1
1756                           fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1757                                                     ww1(1)*v1 + ww1(2)*v2 + ww1(3)*v3 + ww1(4)*v0
1758                        END IF
1759                     ELSE
1760                        fi = fi + 1
1761                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1762                                                  wj*(w_border1(3)*v0 + w_border1(2)*v1 + &
1763                                                      w_border1(1)*v2)
1764                        fi = fi + 1
1765                        fine_values(fi, fj, fk) = fine_values(fi, fj, fk) + &
1766                                                  w_border0*wj*v2
1767                     END IF
1768                     CPASSERT(fi == fine_bo(2, 1))
1769                  END IF
1770!                   CALL timestop(handle2)
1771               END DO
1772            END DO
1773         END DO
1774      END DO
1775
1776      IF (is_split) THEN
1777         DEALLOCATE (coarse_coeffs)
1778      END IF
1779      CALL timestop(handle)
1780   END SUBROUTINE add_coarse2fine
1781
1782! **************************************************************************************************
1783!> \brief low level function that adds a coarse grid (without boundary)
1784!>      to a fine grid.
1785!>
1786!>      It will add to
1787!>
1788!>        coarse_coeffs(coarse_bounds(1,1):coarse_bounds(2,1),
1789!>                      coarse_bounds(1,2):coarse_bounds(2,2),
1790!>                      coarse_bounds(1,3):coarse_bounds(2,3))
1791!>
1792!>      using
1793!>
1794!>        fine_values(2*coarse_bounds(1,1):2*coarse_bounds(2,1),
1795!>                    2*coarse_bounds(1,2):2*coarse_bounds(2,2),
1796!>                    2*coarse_bounds(1,3):2*coarse_bounds(2,3))
1797!>
1798!>      composed with the weights obtained by the direct product of the
1799!>      1d coefficents weights:
1800!>
1801!>      for i,j,k in -3..3
1802!>         w(i,j,k)=weights_1d(abs(i)+1)*weights_1d(abs(j)+1)*
1803!>                  weights_1d(abs(k)+1)
1804!> \param fine_values_pw 3d array where to add the values due to the
1805!>        coarse coeffs
1806!> \param coarse_coeffs_pw 3d array with boundary of size 1 with the values of the
1807!>        coefficients
1808!> \param weights_1d the weights of the 1d smearing
1809!> \param w_border0 the 1d weight at the border
1810!> \param w_border1 the 1d weights for a point one off the border
1811!>        (w_border1(1) is the weight of the coefficent at the border)
1812!> \param pbc ...
1813!> \param safe_computation ...
1814!> \author fawzi
1815!> \note
1816!>      see coarse2fine for some relevant notes
1817! **************************************************************************************************
1818   SUBROUTINE add_fine2coarse(fine_values_pw, coarse_coeffs_pw, &
1819                              weights_1d, w_border0, w_border1, pbc, safe_computation)
1820      TYPE(pw_type), POINTER                             :: fine_values_pw, coarse_coeffs_pw
1821      REAL(kind=dp), DIMENSION(4), INTENT(in)            :: weights_1d
1822      REAL(kind=dp), INTENT(in)                          :: w_border0
1823      REAL(kind=dp), DIMENSION(3), INTENT(in)            :: w_border1
1824      LOGICAL, INTENT(in)                                :: pbc
1825      LOGICAL, INTENT(in), OPTIONAL                      :: safe_computation
1826
1827      CHARACTER(len=*), PARAMETER :: routineN = 'add_fine2coarse', &
1828         routineP = moduleN//':'//routineN
1829
1830      INTEGER :: coarse_slice_size, f_shift(3), fi, fj, fk, handle, handle2, i, ii, ij, ik, ip, j, &
1831         k, n_procs, p, p_old, rcv_tot_size, rest_b, s(3), send_tot_size, ss, x, x_att
1832      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: pp_lb, pp_ub, rcv_offset, rcv_size, &
1833                                                            real_rcv_size, send_offset, send_size, &
1834                                                            sent_size
1835      INTEGER, DIMENSION(2, 3)                           :: coarse_bo, coarse_gbo, fine_bo, &
1836                                                            fine_gbo, my_coarse_bo
1837      INTEGER, DIMENSION(:), POINTER                     :: pos_of_x
1838      LOGICAL                                            :: has_i_lbound, has_i_ubound, is_split, &
1839                                                            local_data, safe_calc
1840      REAL(kind=dp)                                      :: vv0, vv1, vv2, vv3, vv4, vv5, vv6, vv7, &
1841                                                            wi, wj, wk
1842      REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: rcv_buf, send_buf
1843      REAL(kind=dp), DIMENSION(3)                        :: w_0, ww0
1844      REAL(kind=dp), DIMENSION(4)                        :: w_1, ww1
1845      REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: coarse_coeffs, fine_values
1846
1847      CALL timeset(routineN, handle)
1848
1849      safe_calc = .FALSE.
1850      IF (PRESENT(safe_computation)) safe_calc = safe_computation
1851
1852      my_coarse_bo = coarse_coeffs_pw%pw_grid%bounds_local
1853      coarse_gbo = coarse_coeffs_pw%pw_grid%bounds
1854      fine_bo = fine_values_pw%pw_grid%bounds_local
1855      fine_gbo = fine_values_pw%pw_grid%bounds
1856      f_shift = fine_gbo(1, :) - 2*coarse_gbo(1, :)
1857      is_split = ANY(coarse_gbo(:, 1) /= my_coarse_bo(:, 1))
1858      coarse_bo = my_coarse_bo
1859      IF (fine_bo(1, 1) <= fine_bo(2, 1)) THEN
1860         coarse_bo(1, 1) = FLOOR(REAL(fine_bo(1, 1) - f_shift(1), dp)/2._dp) - 1
1861         coarse_bo(2, 1) = FLOOR(REAL(fine_bo(2, 1) + 1 - f_shift(1), dp)/2._dp) + 1
1862      ELSE
1863         coarse_bo(1, 1) = coarse_gbo(2, 1)
1864         coarse_bo(2, 1) = coarse_gbo(2, 1) - 1
1865      END IF
1866      IF (.NOT. is_split .OR. .NOT. pbc) THEN
1867         coarse_bo(1, 1) = MAX(coarse_gbo(1, 1), coarse_bo(1, 1))
1868         coarse_bo(2, 1) = MIN(coarse_gbo(2, 1), coarse_bo(2, 1))
1869      END IF
1870      has_i_ubound = (fine_gbo(2, 1) /= fine_bo(2, 1)) .OR. pbc .AND. is_split
1871      has_i_lbound = (fine_gbo(1, 1) /= fine_bo(1, 1)) .OR. pbc .AND. is_split
1872
1873      IF (pbc) THEN
1874         CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1875         CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift + 1))
1876      ELSE
1877         CPASSERT(ALL(fine_gbo(2, :) == 2*coarse_gbo(2, :) + f_shift))
1878         CPASSERT(ALL(fine_gbo(1, :) == 2*coarse_gbo(1, :) + f_shift))
1879      END IF
1880      CPASSERT(coarse_gbo(2, 1) - coarse_gbo(1, 2) > 1)
1881      local_data = is_split ! ANY(coarse_bo/=my_coarse_bo)
1882      IF (local_data) THEN
1883         ALLOCATE (coarse_coeffs(coarse_bo(1, 1):coarse_bo(2, 1), &
1884                                 coarse_bo(1, 2):coarse_bo(2, 2), &
1885                                 coarse_bo(1, 3):coarse_bo(2, 3)))
1886         coarse_coeffs = 0._dp
1887      ELSE
1888         coarse_coeffs => coarse_coeffs_pw%cr3d
1889      END IF
1890
1891      fine_values => fine_values_pw%cr3d
1892      w_0 = (/weights_1d(3), weights_1d(1), weights_1d(3)/)
1893      w_1 = (/weights_1d(4), weights_1d(2), weights_1d(2), weights_1d(4)/)
1894
1895      DO i = 1, 3
1896         s(i) = coarse_gbo(2, i) - coarse_gbo(1, i) + 1
1897      END DO
1898      IF (ANY(s < 1)) RETURN
1899
1900      DO k = coarse_bo(1, 3), coarse_bo(2, 3)
1901         DO ik = -3, 3
1902            IF (pbc) THEN
1903               wk = weights_1d(ABS(ik) + 1)
1904               fk = fine_gbo(1, 3) + MODULO(2*k + ik - fine_gbo(1, 3) + f_shift(3), 2*s(3))
1905            ELSE
1906               fk = 2*k + ik + f_shift(3)
1907               IF (fk <= fine_bo(1, 3) + 1 .OR. fk >= fine_bo(2, 3) - 1) THEN
1908                  IF (fk < fine_bo(1, 3) .OR. fk > fine_bo(2, 3)) CYCLE
1909                  IF (fk == fine_bo(1, 3) .OR. fk == fine_bo(2, 3)) THEN
1910                     IF (ik /= 0) CYCLE
1911                     wk = w_border0
1912                  ELSE IF (fk == fine_bo(1, 3) + 1) THEN
1913                     SELECT CASE (ik)
1914                     CASE (1)
1915                        wk = w_border1(1)
1916                     CASE (-1)
1917                        wk = w_border1(2)
1918                     CASE (-3)
1919                        wk = w_border1(3)
1920                     CASE default
1921                        CPABORT("")
1922                        CYCLE
1923                     END SELECT
1924                  ELSE
1925                     SELECT CASE (ik)
1926                     CASE (3)
1927                        wk = w_border1(3)
1928                     CASE (1)
1929                        wk = w_border1(2)
1930                     CASE (-1)
1931                        wk = w_border1(1)
1932                     CASE default
1933                        CPABORT("")
1934                        CYCLE
1935                     END SELECT
1936                  END IF
1937               ELSE
1938                  wk = weights_1d(ABS(ik) + 1)
1939               END IF
1940            END IF
1941            DO j = coarse_bo(1, 2), coarse_bo(2, 2)
1942               DO ij = -3, 3
1943                  IF (pbc) THEN
1944                     fj = fine_gbo(1, 2) + MODULO(2*j + ij - fine_gbo(1, 2) + f_shift(2), &
1945                                                  2*s(2))
1946                     wj = weights_1d(ABS(ij) + 1)*wk
1947                  ELSE
1948                     fj = 2*j + ij + f_shift(2)
1949                     IF (fj <= fine_bo(1, 2) + 1 .OR. fj >= fine_bo(2, 2) - 1) THEN
1950                        IF (fj < fine_bo(1, 2) .OR. fj > fine_bo(2, 2)) CYCLE
1951                        IF (fj == fine_bo(1, 2) .OR. fj == fine_bo(2, 2)) THEN
1952                           IF (ij /= 0) CYCLE
1953                           wj = w_border0*wk
1954                        ELSE IF (fj == fine_bo(1, 2) + 1) THEN
1955                           SELECT CASE (ij)
1956                           CASE (1)
1957                              wj = w_border1(1)*wk
1958                           CASE (-1)
1959                              wj = w_border1(2)*wk
1960                           CASE (-3)
1961                              wj = w_border1(3)*wk
1962                           CASE default
1963                              CPABORT("")
1964                              CYCLE
1965                           END SELECT
1966                        ELSE
1967                           SELECT CASE (ij)
1968                           CASE (-1)
1969                              wj = w_border1(1)*wk
1970                           CASE (1)
1971                              wj = w_border1(2)*wk
1972                           CASE (3)
1973                              wj = w_border1(3)*wk
1974                           CASE default
1975                              CPABORT("")
1976                              CYCLE
1977                           END SELECT
1978                        END IF
1979                     ELSE
1980                        wj = weights_1d(ABS(ij) + 1)*wk
1981                     END IF
1982                  END IF
1983
1984                  IF (coarse_bo(2, 1) - coarse_bo(1, 1) < 7 .OR. safe_calc) THEN
1985                     DO i = coarse_bo(1, 1), coarse_bo(2, 1)
1986                        DO ii = -3, 3
1987                           IF (pbc .AND. .NOT. is_split) THEN
1988                              wi = weights_1d(ABS(ii) + 1)*wj
1989                              fi = fine_gbo(1, 1) + MODULO(2*i + ii - fine_gbo(1, 1) + f_shift(1), 2*s(1))
1990                           ELSE
1991                              fi = 2*i + ii + f_shift(1)
1992                              IF (fi < fine_bo(1, 1) .OR. fi > fine_bo(2, 1)) CYCLE
1993                              IF (((.NOT. pbc) .AND. fi <= fine_gbo(1, 1) + 1) .OR. &
1994                                  ((.NOT. pbc) .AND. fi >= fine_gbo(2, 1) - 1)) THEN
1995                                 IF (fi == fine_gbo(1, 1) .OR. fi == fine_gbo(2, 1)) THEN
1996                                    IF (ii /= 0) CYCLE
1997                                    wi = w_border0*wj
1998                                 ELSE IF (fi == fine_gbo(1, 1) + 1) THEN
1999                                    SELECT CASE (ii)
2000                                    CASE (1)
2001                                       wi = w_border1(1)*wj
2002                                    CASE (-1)
2003                                       wi = w_border1(2)*wj
2004                                    CASE (-3)
2005                                       wi = w_border1(3)*wj
2006                                    CASE default
2007                                       CYCLE
2008                                    END SELECT
2009                                 ELSE
2010                                    SELECT CASE (ii)
2011                                    CASE (-1)
2012                                       wi = w_border1(1)*wj
2013                                    CASE (1)
2014                                       wi = w_border1(2)*wj
2015                                    CASE (3)
2016                                       wi = w_border1(3)*wj
2017                                    CASE default
2018                                       CYCLE
2019                                    END SELECT
2020                                 END IF
2021                              ELSE
2022                                 wi = weights_1d(ABS(ii) + 1)*wj
2023                              END IF
2024                           END IF
2025                           coarse_coeffs(i, j, k) = &
2026                              coarse_coeffs(i, j, k) + &
2027                              wi*fine_values(fi, fj, fk)
2028                        END DO
2029                     END DO
2030                  ELSE
2031                     ww0 = wj*w_0
2032                     ww1 = wj*w_1
2033                     IF (pbc .AND. .NOT. is_split) THEN
2034                        i = coarse_bo(1, 1) - 1
2035                        vv2 = fine_values(fine_bo(2, 1) - 2, fj, fk)
2036                        vv3 = fine_values(fine_bo(2, 1) - 1, fj, fk)
2037                        vv4 = fine_values(fine_bo(2, 1), fj, fk)
2038                        fi = fine_bo(1, 1)
2039                        vv5 = fine_values(fi, fj, fk)
2040                        fi = fi + 1
2041                        vv6 = fine_values(fi, fj, fk)
2042                        fi = fi + 1
2043                        vv7 = fine_values(fi, fj, fk)
2044                        coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2045                                                     + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
2046                        coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2047                                                     + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
2048                        coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2049                                                     + ww1(4)*vv6 + ww0(3)*vv7
2050                     ELSE IF (has_i_lbound) THEN
2051                        i = coarse_bo(1, 1)
2052                        fi = fine_bo(1, 1) - 1
2053                        IF (i + 1 == FLOOR((fine_bo(1, 1) + 1 - f_shift(1))/2._dp)) THEN
2054                           fi = fi + 1
2055                           vv0 = fine_values(fi, fj, fk)
2056                           coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
2057                                                    vv0*ww0(3)
2058                           coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
2059                                                        vv0*ww0(2)
2060                           coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
2061                                                        vv0*ww0(1)
2062                        END IF
2063                     ELSE
2064                        i = coarse_bo(1, 1)
2065                        fi = 2*i + f_shift(1)
2066                        vv0 = fine_values(fi, fj, fk)
2067                        fi = fi + 1
2068                        vv1 = fine_values(fi, fj, fk)
2069                        fi = fi + 1
2070                        vv2 = fine_values(fi, fj, fk)
2071                        coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) + &
2072                                                 (vv0*w_border0 + vv1*w_border1(1))*wj + vv2*ww0(1)
2073                        coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) + &
2074                                                     wj*w_border1(2)*vv1 + ww0(2)*vv2
2075                        coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) + &
2076                                                     wj*w_border1(3)*vv1 + ww0(3)*vv2
2077                     END IF
2078                     DO i = coarse_bo(1, 1) + 3, FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3, 4
2079                        fi = fi + 1
2080                        vv0 = fine_values(fi, fj, fk)
2081                        fi = fi + 1
2082                        vv1 = fine_values(fi, fj, fk)
2083                        fi = fi + 1
2084                        vv2 = fine_values(fi, fj, fk)
2085                        fi = fi + 1
2086                        vv3 = fine_values(fi, fj, fk)
2087                        fi = fi + 1
2088                        vv4 = fine_values(fi, fj, fk)
2089                        fi = fi + 1
2090                        vv5 = fine_values(fi, fj, fk)
2091                        fi = fi + 1
2092                        vv6 = fine_values(fi, fj, fk)
2093                        fi = fi + 1
2094                        vv7 = fine_values(fi, fj, fk)
2095                        coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2096                                                     + ww1(1)*vv0
2097                        coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2098                                                     + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
2099                        coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2100                                                     + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2101                        coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2102                                          + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2103                        coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2104                                                     + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7
2105                        coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2106                                                     + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7
2107                        coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2108                                                     + ww1(4)*vv6 + ww0(3)*vv7
2109                     END DO
2110                     IF (.NOT. FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) >= 4) THEN
2111                        CPABORT("FLOOR((fine_bo(2,1)-f_shift(1))/2._dp)-coarse_bo(1,1)>=4")
2112                     END IF
2113                     rest_b = MODULO(FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - coarse_bo(1, 1) - 6, 4)
2114                     i = FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp) - 3 - rest_b + 4
2115                     CPASSERT(fi == (i - 2)*2 + f_shift(1))
2116                     IF (rest_b > 0) THEN
2117                        fi = fi + 1
2118                        vv0 = fine_values(fi, fj, fk)
2119                        fi = fi + 1
2120                        vv1 = fine_values(fi, fj, fk)
2121                        coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2122                                                     + ww1(1)*vv0
2123                        coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2124                                                     + ww1(2)*vv0 + ww0(1)*vv1
2125                        coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2126                                                     + ww1(3)*vv0 + ww0(2)*vv1
2127                        coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2128                                                 + ww1(4)*vv0 + ww0(3)*vv1
2129                        IF (rest_b > 1) THEN
2130                           fi = fi + 1
2131                           vv2 = fine_values(fi, fj, fk)
2132                           fi = fi + 1
2133                           vv3 = fine_values(fi, fj, fk)
2134                           coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2135                                                        + ww1(1)*vv2
2136                           coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2137                                                        + ww1(2)*vv2 + ww0(1)*vv3
2138                           coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2139                                                    + ww1(3)*vv2 + ww0(2)*vv3
2140                           coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2141                                                        + ww1(4)*vv2 + ww0(3)*vv3
2142                           IF (rest_b > 2) THEN
2143                              fi = fi + 1
2144                              vv4 = fine_values(fi, fj, fk)
2145                              fi = fi + 1
2146                              vv5 = fine_values(fi, fj, fk)
2147                              fi = fi + 1
2148                              vv6 = fine_values(fi, fj, fk)
2149                              fi = fi + 1
2150                              vv7 = fine_values(fi, fj, fk)
2151                              coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2152                                                           + ww1(1)*vv4
2153                              IF (has_i_ubound) THEN
2154                                 IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2155                                    fi = fi + 1
2156                                    vv0 = fine_values(fi, fj, fk)
2157                                    coarse_coeffs(i + 4, j, k) = coarse_coeffs(i + 4, j, k) &
2158                                                                 + vv0*ww1(4)
2159                                 ELSE
2160                                    vv0 = 0._dp
2161                                 END IF
2162                                 coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2163                                                          + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2164                                 coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2165                                                              + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
2166                                 coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2167                                                              + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2)
2168                                 coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2169                                                              + ww1(4)*vv6 + ww0(3)*vv7 + vv0*ww1(3)
2170                              ELSEIF (pbc .AND. .NOT. is_split) THEN
2171                                 fi = fi + 1
2172                                 vv0 = fine_values(fi, fj, fk)
2173                                 vv1 = fine_values(fine_bo(1, 1), fj, fk)
2174                                 vv2 = fine_values(fine_bo(1, 1) + 1, fj, fk)
2175                                 coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2176                                                          + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2177                                 coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2178                                                              + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 + ww0(1)*vv7 + vv0*ww1(1)
2179                                 coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2180                                                              + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6 + ww0(2)*vv7 + vv0*ww1(2) &
2181                                                              + vv1*ww0(1) + vv2*ww1(1)
2182                              ELSE
2183                                 coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2184                                                          + ww1(2)*vv4 + ww0(1)*vv5 + wj*w_border1(3)*vv6
2185                                 coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2186                                                              + ww1(3)*vv4 + ww0(2)*vv5 + wj*w_border1(2)*vv6
2187                                 coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2188                                                              + ww1(4)*vv4 + ww0(3)*vv5 + wj*w_border1(1)*vv6 + w_border0*wj*vv7
2189                              END IF
2190                           ELSE
2191                              fi = fi + 1
2192                              vv4 = fine_values(fi, fj, fk)
2193                              fi = fi + 1
2194                              vv5 = fine_values(fi, fj, fk)
2195                              IF (has_i_ubound) THEN
2196                                 IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2197                                    fi = fi + 1
2198                                    vv6 = fine_values(fi, fj, fk)
2199                                    coarse_coeffs(i + 3, j, k) = coarse_coeffs(i + 3, j, k) &
2200                                                                 + ww1(4)*vv6
2201                                 ELSE
2202                                    vv6 = 0._dp
2203                                 END IF
2204                                 coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2205                                                              + ww1(1)*vv4
2206                                 coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2207                                                          + ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2208                                 coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2209                                                              + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6
2210                                 coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2211                                                              + ww1(4)*vv4 + ww0(3)*vv5 + ww1(3)*vv6
2212                              ELSEIF (pbc .AND. .NOT. is_split) THEN
2213                                 fi = fi + 1
2214                                 vv6 = fine_values(fi, fj, fk)
2215                                 vv7 = fine_values(fine_bo(1, 1), fj, fk)
2216                                 vv0 = fine_values(fine_bo(1, 1) + 1, fj, fk)
2217                                 coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2218                                                              + ww1(1)*vv4
2219                                 coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2220                                                          + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2 + ww0(2)*vv3 + &
2221                                                          ww1(2)*vv4 + ww0(1)*vv5 + ww1(1)*vv6
2222                                 coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2223                                                              + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4 + ww0(2)*vv5 + ww1(2)*vv6 &
2224                                                              + ww0(1)*vv7 + ww1(1)*vv0
2225                              ELSE
2226                                 coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2227                                                              + wj*w_border1(3)*vv4
2228                                 coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2229                                                          + wj*w_border1(2)*vv4
2230                                 coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2231                                                              + wj*(w_border1(1)*vv4 + w_border0*vv5)
2232                              END IF
2233                           END IF
2234                        ELSE
2235                           fi = fi + 1
2236                           vv2 = fine_values(fi, fj, fk)
2237                           fi = fi + 1
2238                           vv3 = fine_values(fi, fj, fk)
2239                           IF (has_i_ubound) THEN
2240                              IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2241                                 fi = fi + 1
2242                                 vv4 = fine_values(fi, fj, fk)
2243                                 coarse_coeffs(i + 2, j, k) = coarse_coeffs(i + 2, j, k) &
2244                                                              + ww1(4)*vv4
2245                              ELSE
2246                                 vv4 = 0._dp
2247                              END IF
2248                              coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2249                                                           + ww1(1)*vv2
2250                              coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2251                                                           + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2252                              coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2253                                                       + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4
2254                              coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2255                                                           + ww1(4)*vv2 + ww0(3)*vv3 + ww1(3)*vv4
2256                           ELSEIF (pbc .AND. .NOT. is_split) THEN
2257                              fi = fi + 1
2258                              vv4 = fine_values(fi, fj, fk)
2259                              vv5 = fine_values(fine_bo(1, 1), fj, fk)
2260                              vv6 = fine_values(fine_bo(1, 1) + 1, fj, fk)
2261                              coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2262                                                           + ww1(1)*vv2
2263                              coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2264                                                           + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2265                              coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2266                                                       + ww1(3)*vv2 + ww0(2)*vv3 + ww1(2)*vv4 + vv5*ww0(1) + ww1(1)*vv6
2267                           ELSE
2268                              coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2269                                                           + wj*w_border1(3)*vv2
2270                              coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2271                                                           + wj*w_border1(2)*vv2
2272                              coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2273                                                       + wj*(w_border1(1)*vv2 + w_border0*vv3)
2274                           END IF
2275                        END IF
2276                     ELSE
2277                        fi = fi + 1
2278                        vv0 = fine_values(fi, fj, fk)
2279                        fi = fi + 1
2280                        vv1 = fine_values(fi, fj, fk)
2281                        IF (has_i_ubound) THEN
2282                           IF (coarse_bo(2, 1) - 2 == FLOOR((fine_bo(2, 1) - f_shift(1))/2._dp)) THEN
2283                              fi = fi + 1
2284                              vv2 = fine_values(fi, fj, fk)
2285                              coarse_coeffs(i + 1, j, k) = coarse_coeffs(i + 1, j, k) &
2286                                                           + ww1(4)*vv2
2287                           ELSE
2288                              vv2 = 0._dp
2289                           END IF
2290                           coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2291                                                        + ww1(1)*vv0
2292                           coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2293                                                        + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
2294                           coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2295                                                        + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2
2296                           coarse_coeffs(i, j, k) = coarse_coeffs(i, j, k) &
2297                                                    + ww1(4)*vv0 + ww0(3)*vv1 + ww1(3)*vv2
2298                        ELSEIF (pbc .AND. .NOT. is_split) THEN
2299                           fi = fi + 1
2300                           vv2 = fine_values(fi, fj, fk)
2301                           vv3 = fine_values(fine_bo(1, 1), fk, fk)
2302                           vv4 = fine_values(fine_bo(1, 1) + 1, fk, fk)
2303                           coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2304                                                        + ww1(1)*vv0
2305                           coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2306                                                        + ww1(2)*vv0 + ww0(1)*vv1 + ww1(1)*vv2
2307                           coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2308                                                        + ww1(3)*vv0 + ww0(2)*vv1 + ww1(2)*vv2 + ww0(1)*vv3 + ww1(1)*vv4
2309                        ELSE
2310                           coarse_coeffs(i - 3, j, k) = coarse_coeffs(i - 3, j, k) &
2311                                                        + wj*w_border1(3)*vv0
2312                           coarse_coeffs(i - 2, j, k) = coarse_coeffs(i - 2, j, k) &
2313                                                        + wj*w_border1(2)*vv0
2314                           coarse_coeffs(i - 1, j, k) = coarse_coeffs(i - 1, j, k) &
2315                                                        + wj*(w_border1(1)*vv0 + w_border0*vv1)
2316                        END IF
2317                     END IF
2318                     CPASSERT(fi == fine_bo(2, 1))
2319                  END IF
2320               END DO
2321            END DO
2322         END DO
2323      END DO
2324
2325      ! *** parallel case
2326      IF (is_split) THEN
2327         CALL timeset(routineN//"_comm", handle2)
2328         coarse_slice_size = (coarse_bo(2, 2) - coarse_bo(1, 2) + 1)* &
2329                             (coarse_bo(2, 3) - coarse_bo(1, 3) + 1)
2330         n_procs = coarse_coeffs_pw%pw_grid%para%group_size
2331         ALLOCATE (send_size(0:n_procs - 1), send_offset(0:n_procs - 1), &
2332                   sent_size(0:n_procs - 1), rcv_size(0:n_procs - 1), &
2333                   rcv_offset(0:n_procs - 1), pp_lb(0:n_procs - 1), &
2334                   pp_ub(0:n_procs - 1), real_rcv_size(0:n_procs - 1))
2335
2336         ! ** send size count
2337
2338         pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
2339         send_size = 0
2340         DO x = coarse_bo(1, 1), coarse_bo(2, 1)
2341            p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
2342            send_size(p) = send_size(p) + coarse_slice_size
2343         END DO
2344
2345         ! ** rcv size count
2346
2347         pos_of_x => fine_values_pw%pw_grid%para%pos_of_x
2348         p_old = pos_of_x(fine_gbo(1, 1))
2349         pp_lb = fine_gbo(2, 1)
2350         pp_ub = fine_gbo(2, 1) - 1
2351         pp_lb(p_old) = fine_gbo(1, 1)
2352         DO x = fine_gbo(1, 1), fine_gbo(2, 1)
2353            p = pos_of_x(x)
2354            IF (p /= p_old) THEN
2355               pp_ub(p_old) = x - 1
2356               pp_lb(p) = x
2357               p_old = p
2358            END IF
2359         END DO
2360         pp_ub(p_old) = fine_gbo(2, 1)
2361
2362         DO ip = 0, n_procs - 1
2363            IF (pp_lb(ip) <= pp_ub(ip)) THEN
2364               pp_lb(ip) = FLOOR(REAL(pp_lb(ip) - f_shift(1), dp)/2._dp) - 1
2365               pp_ub(ip) = FLOOR(REAL(pp_ub(ip) + 1 - f_shift(1), dp)/2._dp) + 1
2366            ELSE
2367               pp_lb(ip) = coarse_gbo(2, 1)
2368               pp_ub(ip) = coarse_gbo(2, 1) - 1
2369            END IF
2370            IF (.NOT. is_split .OR. .NOT. pbc) THEN
2371               pp_lb(ip) = MAX(pp_lb(ip), coarse_gbo(1, 1))
2372               pp_ub(ip) = MIN(pp_ub(ip), coarse_gbo(2, 1))
2373            END IF
2374         END DO
2375
2376         rcv_size = 0
2377         DO ip = 0, n_procs - 1
2378            DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
2379               x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2380               IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2381                  rcv_size(ip) = rcv_size(ip) + coarse_slice_size
2382               END IF
2383            END DO
2384            rcv_size(ip) = rcv_size(ip) + coarse_slice_size* &
2385                           MAX(0, &
2386                               MIN(pp_ub(ip), my_coarse_bo(2, 1)) - MAX(pp_lb(ip), my_coarse_bo(1, 1)) + 1)
2387            DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
2388               x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2389               IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2390                  rcv_size(ip) = rcv_size(ip) + coarse_slice_size
2391               END IF
2392            END DO
2393         END DO
2394
2395         ! ** offsets & alloc send-rcv
2396
2397         send_tot_size = 0
2398         DO ip = 0, n_procs - 1
2399            send_offset(ip) = send_tot_size
2400            send_tot_size = send_tot_size + send_size(ip)
2401         END DO
2402         IF (send_tot_size /= (coarse_bo(2, 1) - coarse_bo(1, 1) + 1)*coarse_slice_size) &
2403            CPABORT("Error calculating send_tot_size")
2404         ALLOCATE (send_buf(0:send_tot_size - 1))
2405
2406         rcv_tot_size = 0
2407         DO ip = 0, n_procs - 1
2408            rcv_offset(ip) = rcv_tot_size
2409            rcv_tot_size = rcv_tot_size + rcv_size(ip)
2410         END DO
2411         ALLOCATE (rcv_buf(0:rcv_tot_size - 1))
2412
2413         ! ** fill send buffer
2414
2415         pos_of_x => coarse_coeffs_pw%pw_grid%para%pos_of_x
2416         p_old = pos_of_x(coarse_gbo(1, 1) &
2417                          + MODULO(coarse_bo(1, 1) - coarse_gbo(1, 1), s(1)))
2418         sent_size(:) = send_offset
2419         ss = coarse_bo(2, 1) - coarse_bo(1, 1) + 1
2420         DO x = coarse_bo(1, 1), coarse_bo(2, 1)
2421            p = pos_of_x(coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1)))
2422            CALL dcopy(coarse_slice_size, &
2423                       coarse_coeffs(x, coarse_bo(1, 2), &
2424                                     coarse_bo(1, 3)), ss, send_buf(sent_size(p)), 1)
2425            sent_size(p) = sent_size(p) + coarse_slice_size
2426         END DO
2427
2428         IF (ANY(sent_size(0:n_procs - 2) /= send_offset(1:n_procs - 1))) &
2429            CPABORT("error 1 filling send buffer")
2430         IF (sent_size(n_procs - 1) /= send_tot_size) &
2431            CPABORT("error 2 filling send buffer")
2432
2433         IF (local_data) THEN
2434            DEALLOCATE (coarse_coeffs)
2435         ELSE
2436            NULLIFY (coarse_coeffs)
2437         END IF
2438
2439         CPASSERT(ALL(sent_size(:n_procs - 2) == send_offset(1:)))
2440         CPASSERT(sent_size(n_procs - 1) == send_tot_size)
2441         ! test send/rcv sizes
2442         CALL mp_alltoall(send_size, real_rcv_size, 1, coarse_coeffs_pw%pw_grid%para%group)
2443
2444         CPASSERT(ALL(real_rcv_size == rcv_size))
2445         ! all2all
2446         CALL mp_alltoall(sb=send_buf, scount=send_size, sdispl=send_offset, &
2447                          rb=rcv_buf, rcount=rcv_size, rdispl=rcv_offset, &
2448                          group=coarse_coeffs_pw%pw_grid%para%group)
2449
2450         ! ** sum & reorder rcv buffer
2451
2452         sent_size(:) = rcv_offset
2453         DO ip = 0, n_procs - 1
2454
2455            DO x = pp_lb(ip), coarse_gbo(1, 1) - 1
2456               x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2457               IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2458                  ii = sent_size(ip)
2459                  DO k = coarse_bo(1, 3), coarse_bo(2, 3)
2460                     DO j = coarse_bo(1, 2), coarse_bo(2, 2)
2461                        coarse_coeffs_pw%cr3d(x_att, j, k) = coarse_coeffs_pw%cr3d(x_att, j, k) + rcv_buf(ii)
2462                        ii = ii + 1
2463                     END DO
2464                  END DO
2465                  sent_size(ip) = ii
2466               END IF
2467            END DO
2468
2469            ii = sent_size(ip)
2470            DO x_att = MAX(pp_lb(ip), my_coarse_bo(1, 1)), MIN(pp_ub(ip), my_coarse_bo(2, 1))
2471               DO k = coarse_bo(1, 3), coarse_bo(2, 3)
2472                  DO j = coarse_bo(1, 2), coarse_bo(2, 2)
2473                     coarse_coeffs_pw%cr3d(x_att, j, k) = coarse_coeffs_pw%cr3d(x_att, j, k) + rcv_buf(ii)
2474                     ii = ii + 1
2475                  END DO
2476               END DO
2477            END DO
2478            sent_size(ip) = ii
2479
2480            DO x = coarse_gbo(2, 1) + 1, pp_ub(ip)
2481               x_att = coarse_gbo(1, 1) + MODULO(x - coarse_gbo(1, 1), s(1))
2482               IF (x_att >= my_coarse_bo(1, 1) .AND. x_att <= my_coarse_bo(2, 1)) THEN
2483                  ii = sent_size(ip)
2484                  DO k = coarse_bo(1, 3), coarse_bo(2, 3)
2485                     DO j = coarse_bo(1, 2), coarse_bo(2, 2)
2486                        coarse_coeffs_pw%cr3d(x_att, j, k) = coarse_coeffs_pw%cr3d(x_att, j, k) + rcv_buf(ii)
2487                        ii = ii + 1
2488                     END DO
2489                  END DO
2490                  sent_size(ip) = ii
2491               END IF
2492            END DO
2493
2494         END DO
2495
2496         IF (ANY(sent_size(0:n_procs - 2) /= rcv_offset(1:n_procs - 1))) &
2497            CPABORT("error 1 handling the rcv buffer")
2498         IF (sent_size(n_procs - 1) /= rcv_tot_size) &
2499            CPABORT("error 2 handling the rcv buffer")
2500
2501         ! dealloc
2502         DEALLOCATE (send_size, send_offset, rcv_size, rcv_offset)
2503         DEALLOCATE (send_buf, rcv_buf, real_rcv_size)
2504         DEALLOCATE (pp_ub, pp_lb)
2505         CALL timestop(handle2)
2506      ELSE
2507         CPASSERT(.NOT. local_data)
2508      END IF
2509
2510      CALL timestop(handle)
2511   END SUBROUTINE add_fine2coarse
2512
2513! **************************************************************************************************
2514!> \brief ...
2515!> \param preconditioner the preconditioner to create
2516!> \param precond_kind the kind of preconditioner to use
2517!> \param pool a pool with grids of the same type as the elements to
2518!>        precondition
2519!> \param pbc if periodic boundary conditions should be applied
2520!> \param transpose ...
2521!> \author fawzi
2522! **************************************************************************************************
2523   SUBROUTINE pw_spline_precond_create(preconditioner, precond_kind, &
2524                                       pool, pbc, transpose)
2525      TYPE(pw_spline_precond_type), POINTER              :: preconditioner
2526      INTEGER, INTENT(in)                                :: precond_kind
2527      TYPE(pw_pool_type), POINTER                        :: pool
2528      LOGICAL, INTENT(in)                                :: pbc, transpose
2529
2530      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_precond_create', &
2531         routineP = moduleN//':'//routineN
2532
2533      ALLOCATE (preconditioner)
2534      last_precond_id = last_precond_id + 1
2535      preconditioner%id_nr = last_precond_id
2536      preconditioner%ref_count = 1
2537      preconditioner%kind = no_precond
2538      preconditioner%pool => pool
2539      preconditioner%pbc = pbc
2540      preconditioner%transpose = transpose
2541      CALL pw_pool_retain(pool)
2542      CALL pw_spline_precond_set_kind(preconditioner, precond_kind)
2543   END SUBROUTINE pw_spline_precond_create
2544
2545! **************************************************************************************************
2546!> \brief switches the types of precoditioner to use
2547!> \param preconditioner the preconditioner to be changed
2548!> \param precond_kind the new kind of preconditioner to use
2549!> \param pbc ...
2550!> \param transpose ...
2551!> \author fawzi
2552! **************************************************************************************************
2553   SUBROUTINE pw_spline_precond_set_kind(preconditioner, precond_kind, pbc, &
2554                                         transpose)
2555      TYPE(pw_spline_precond_type), POINTER              :: preconditioner
2556      INTEGER, INTENT(in)                                :: precond_kind
2557      LOGICAL, INTENT(in), OPTIONAL                      :: pbc, transpose
2558
2559      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_precond_set_kind', &
2560         routineP = moduleN//':'//routineN
2561
2562      LOGICAL                                            :: do_3d_coeff
2563      REAL(kind=dp)                                      :: s
2564
2565      CPASSERT(ASSOCIATED(preconditioner))
2566      CPASSERT(preconditioner%ref_count > 0)
2567      IF (PRESENT(transpose)) preconditioner%transpose = transpose
2568      do_3d_coeff = .FALSE.
2569      preconditioner%kind = precond_kind
2570      IF (PRESENT(pbc)) preconditioner%pbc = pbc
2571      SELECT CASE (precond_kind)
2572      CASE (no_precond)
2573      CASE (precond_spl3_aint2)
2574         preconditioner%coeffs_1d = (/-1.66_dp*0.25_dp, 1.66_dp, -1.66_dp*0.25_dp/)
2575         preconditioner%sharpen = .FALSE.
2576         preconditioner%normalize = .FALSE.
2577         do_3d_coeff = .TRUE.
2578      CASE (precond_spl3_3)
2579         preconditioner%coeffs_1d(1) = -0.25_dp*1.6_dp
2580         preconditioner%coeffs_1d(2) = 1.6_dp
2581         preconditioner%coeffs_1d(3) = -0.25_dp*1.6_dp
2582         preconditioner%sharpen = .FALSE.
2583         preconditioner%normalize = .FALSE.
2584         do_3d_coeff = .TRUE.
2585      CASE (precond_spl3_2)
2586         preconditioner%coeffs_1d(1) = -0.26_dp*1.76_dp
2587         preconditioner%coeffs_1d(2) = 1.76_dp
2588         preconditioner%coeffs_1d(3) = -0.26_dp*1.76_dp
2589         preconditioner%sharpen = .FALSE.
2590         preconditioner%normalize = .FALSE.
2591         do_3d_coeff = .TRUE.
2592      CASE (precond_spl3_aint)
2593         preconditioner%coeffs_1d = spl3_1d_coeffs0
2594         preconditioner%sharpen = .TRUE.
2595         preconditioner%normalize = .TRUE.
2596         do_3d_coeff = .TRUE.
2597      CASE (precond_spl3_1)
2598         preconditioner%coeffs_1d(1) = 0.5_dp/3._dp**(1._dp/3._dp)
2599         preconditioner%coeffs_1d(2) = 4._dp/3._dp**(1._dp/3._dp)
2600         preconditioner%coeffs_1d(3) = 0.5_dp/3._dp**(1._dp/3._dp)
2601         preconditioner%sharpen = .TRUE.
2602         preconditioner%normalize = .FALSE.
2603         do_3d_coeff = .TRUE.
2604      CASE default
2605         CPABORT("")
2606      END SELECT
2607      IF (do_3d_coeff) THEN
2608         s = 1._dp
2609         IF (preconditioner%sharpen) s = -1._dp
2610         preconditioner%coeffs(1) = &
2611            s*preconditioner%coeffs_1d(2)* &
2612            preconditioner%coeffs_1d(2)* &
2613            preconditioner%coeffs_1d(2)
2614         preconditioner%coeffs(2) = &
2615            s*preconditioner%coeffs_1d(1)* &
2616            preconditioner%coeffs_1d(2)* &
2617            preconditioner%coeffs_1d(2)
2618         preconditioner%coeffs(3) = &
2619            s*preconditioner%coeffs_1d(1)* &
2620            preconditioner%coeffs_1d(1)* &
2621            preconditioner%coeffs_1d(2)
2622         preconditioner%coeffs(4) = &
2623            s*preconditioner%coeffs_1d(1)* &
2624            preconditioner%coeffs_1d(1)* &
2625            preconditioner%coeffs_1d(1)
2626         IF (preconditioner%sharpen) THEN
2627            IF (preconditioner%normalize) THEN
2628               preconditioner%coeffs(1) = 2._dp + &
2629                                          preconditioner%coeffs(1)
2630            ELSE
2631               preconditioner%coeffs(1) = -preconditioner%coeffs(1)
2632            END IF
2633         END IF
2634      END IF
2635   END SUBROUTINE pw_spline_precond_set_kind
2636
2637! **************************************************************************************************
2638!> \brief retains the preconditioner
2639!> \param preconditioner the preconditioner to retain
2640!> \author fawzi
2641! **************************************************************************************************
2642   SUBROUTINE pw_spline_precond_retain(preconditioner)
2643      TYPE(pw_spline_precond_type), POINTER              :: preconditioner
2644
2645      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_precond_retain', &
2646         routineP = moduleN//':'//routineN
2647
2648      CPASSERT(ASSOCIATED(preconditioner))
2649      CPASSERT(preconditioner%ref_count > 1)
2650      preconditioner%ref_count = preconditioner%ref_count + 1
2651   END SUBROUTINE pw_spline_precond_retain
2652
2653! **************************************************************************************************
2654!> \brief releases the preconditioner
2655!> \param preconditioner the preconditioner to release
2656!> \author fawzi
2657! **************************************************************************************************
2658   SUBROUTINE pw_spline_precond_release(preconditioner)
2659      TYPE(pw_spline_precond_type), POINTER              :: preconditioner
2660
2661      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_precond_release', &
2662         routineP = moduleN//':'//routineN
2663
2664      IF (ASSOCIATED(preconditioner)) THEN
2665         CPASSERT(preconditioner%ref_count > 0)
2666         preconditioner%ref_count = preconditioner%ref_count - 1
2667         IF (preconditioner%ref_count == 0) THEN
2668            CALL pw_pool_release(preconditioner%pool)
2669            DEALLOCATE (preconditioner)
2670         END IF
2671      END IF
2672   END SUBROUTINE pw_spline_precond_release
2673
2674! **************************************************************************************************
2675!> \brief applies the preconditioner to the system of equations to find the
2676!>      coefficents of the spline
2677!> \param preconditioner the preconditioner to apply
2678!> \param in_v the grid on which the preconditioner should be applied
2679!> \param out_v place to store the preconditioner applied on v_out
2680!> \author fawzi
2681! **************************************************************************************************
2682   SUBROUTINE pw_spline_do_precond(preconditioner, in_v, out_v)
2683      TYPE(pw_spline_precond_type), POINTER              :: preconditioner
2684      TYPE(pw_type), POINTER                             :: in_v, out_v
2685
2686      CHARACTER(len=*), PARAMETER :: routineN = 'pw_spline_do_precond', &
2687         routineP = moduleN//':'//routineN
2688
2689      CPASSERT(ASSOCIATED(preconditioner))
2690      CPASSERT(preconditioner%ref_count > 0)
2691      SELECT CASE (preconditioner%kind)
2692      CASE (no_precond)
2693         CALL pw_copy(in_v, out_v)
2694      CASE (precond_spl3_aint, precond_spl3_1)
2695         CALL pw_zero(out_v)
2696         IF (preconditioner%pbc) THEN
2697            CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
2698                               coeffs=preconditioner%coeffs)
2699         ELSE
2700            CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
2701                                        pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
2702                                        normalize=preconditioner%normalize, &
2703                                        transpose=preconditioner%transpose)
2704         END IF
2705      CASE (precond_spl3_3, precond_spl3_2, precond_spl3_aint2)
2706         CALL pw_zero(out_v)
2707         IF (preconditioner%pbc) THEN
2708            CALL pw_nn_smear_r(pw_in=in_v, pw_out=out_v, &
2709                               coeffs=preconditioner%coeffs)
2710         ELSE
2711            CALL pw_nn_compose_r_no_pbc(weights_1d=preconditioner%coeffs_1d, &
2712                                        pw_in=in_v, pw_out=out_v, sharpen=preconditioner%sharpen, &
2713                                        normalize=preconditioner%normalize, smooth_boundary=.TRUE., &
2714                                        transpose=preconditioner%transpose)
2715         END IF
2716      CASE default
2717         CPABORT("")
2718      END SELECT
2719   END SUBROUTINE pw_spline_do_precond
2720
2721! **************************************************************************************************
2722!> \brief solves iteratively (CG) a systmes of linear equations
2723!>           linOp(coeffs)=values
2724!>      (for example those needed to find the coefficents of a spline)
2725!>      Returns true if the it succeded to acheive the requested accuracy
2726!> \param values the right hand side of the system
2727!> \param coeffs will contain the solution of the system (and on entry
2728!>        it contains the starting point)
2729!> \param linOp the linear operator to be inverted
2730!> \param preconditioner the preconditioner to apply
2731!> \param pool a pool of grids (for the temporary objects)
2732!> \param eps_r the requested precision on the residual
2733!> \param eps_x the requested precision on the solution
2734!> \param max_iter maximum number of iteration allowed
2735!> \return ...
2736!> \author fawzi
2737! **************************************************************************************************
2738   FUNCTION find_coeffs(values, coeffs, linOp, preconditioner, pool, &
2739                        eps_r, eps_x, max_iter) RESULT(res)
2740      TYPE(pw_type), POINTER                             :: values, coeffs
2741      INTERFACE
2742! **************************************************************************************************
2743         SUBROUTINE linOp(pw_in, pw_out)
2744            USE pw_types, ONLY: pw_type
2745            TYPE(pw_type), POINTER :: pw_in, pw_out
2746         END SUBROUTINE linOp
2747      END INTERFACE
2748      TYPE(pw_spline_precond_type), POINTER              :: preconditioner
2749      TYPE(pw_pool_type), POINTER                        :: pool
2750      REAL(kind=dp), INTENT(in)                          :: eps_r, eps_x
2751      INTEGER, INTENT(in)                                :: max_iter
2752      LOGICAL                                            :: res
2753
2754      CHARACTER(len=*), PARAMETER :: routineN = 'find_coeffs', routineP = moduleN//':'//routineN
2755
2756      INTEGER                                            :: i, iiter, iter, j, k
2757      INTEGER, DIMENSION(2, 3)                           :: bo
2758      LOGICAL                                            :: last
2759      REAL(kind=dp)                                      :: alpha, beta, eps_r_att, eps_x_att, r_z, &
2760                                                            r_z_new
2761      TYPE(cp_logger_type), POINTER                      :: logger
2762      TYPE(pw_type), POINTER                             :: Ap, p, r, z
2763
2764      last = .FALSE.
2765
2766      res = .FALSE.
2767      logger => cp_get_default_logger()
2768      CALL pw_pool_create_pw(pool, r, use_data=REALDATA3D, in_space=REALSPACE)
2769      CALL pw_pool_create_pw(pool, z, use_data=REALDATA3D, in_space=REALSPACE)
2770      CALL pw_pool_create_pw(pool, p, use_data=REALDATA3D, in_space=REALSPACE)
2771      CALL pw_pool_create_pw(pool, Ap, use_data=REALDATA3D, in_space=REALSPACE)
2772
2773      !CALL cp_add_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
2774      ext_do: DO iiter = 1, max_iter, 10
2775         CALL pw_zero(r)
2776         CALL linOp(pw_in=coeffs, pw_out=r)
2777         r%cr3d = -r%cr3d
2778         CALL pw_axpy(values, r)
2779         CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
2780         CALL pw_copy(z, p)
2781         r_z = pw_integral_ab(r, z)
2782
2783         DO iter = iiter, MIN(iiter + 9, max_iter)
2784            eps_r_att = SQRT(pw_integral_ab(r, r))
2785            IF (eps_r_att == 0._dp) THEN
2786               eps_x_att = 0._dp
2787               last = .TRUE.
2788            ELSE
2789               CALL pw_zero(Ap)
2790               CALL linOp(pw_in=p, pw_out=Ap)
2791               alpha = r_z/pw_integral_ab(Ap, p)
2792
2793               CALL pw_axpy(p, coeffs, alpha=alpha)
2794
2795               eps_x_att = alpha*SQRT(pw_integral_ab(p, p)) ! try to spare if unneded?
2796               IF (eps_r_att < eps_r .AND. eps_x_att < eps_x) last = .TRUE.
2797            END IF
2798            !CALL cp_iterate(logger%iter_info,last=last)
2799            IF (last) THEN
2800               res = .TRUE.
2801               EXIT ext_do
2802            END IF
2803
2804            CALL pw_axpy(Ap, r, alpha=-alpha)
2805
2806            CALL pw_spline_do_precond(preconditioner, in_v=r, out_v=z)
2807
2808            r_z_new = pw_integral_ab(r, z)
2809            beta = r_z_new/r_z
2810            r_z = r_z_new
2811
2812            bo = p%pw_grid%bounds_local
2813            DO k = bo(1, 3), bo(2, 3)
2814               DO j = bo(1, 2), bo(2, 2)
2815                  DO i = bo(1, 1), bo(2, 1)
2816                     p%cr3d(i, j, k) = z%cr3d(i, j, k) + beta*p%cr3d(i, j, k)
2817                  END DO
2818               END DO
2819            END DO
2820
2821         END DO
2822      END DO ext_do
2823      !CALL cp_rm_iter_level(logger%iter_info,level_name="SPLINE_FIND_COEFFS")
2824
2825      CALL pw_pool_give_back_pw(pool, r)
2826      CALL pw_pool_give_back_pw(pool, z)
2827      CALL pw_pool_give_back_pw(pool, p)
2828      CALL pw_pool_give_back_pw(pool, Ap)
2829   END FUNCTION find_coeffs
2830
2831! **************************************************************************************************
2832!> \brief adds to pw_out pw_in composed with the weights
2833!>      pw_out%cr3d(i,j,k)=pw_out%cr3d(i,j,k)+sum(pw_in%cr3d(i+l,j+m,k+n)*
2834!>         weights_1d(abs(l)+1)*weights_1d(abs(m)+1)*weights_1d(abs(n)+1),
2835!>         l=-1..1,m=-1..1,n=-1..1)
2836!> \param weights_1d ...
2837!> \param pw_in ...
2838!> \param pw_out ...
2839!> \param sharpen ...
2840!> \param normalize ...
2841!> \param transpose ...
2842!> \param smooth_boundary ...
2843!> \author fawzi
2844! **************************************************************************************************
2845   SUBROUTINE pw_nn_compose_r_no_pbc(weights_1d, pw_in, pw_out, &
2846                                     sharpen, normalize, transpose, smooth_boundary)
2847      REAL(kind=dp), DIMENSION(-1:1)                     :: weights_1d
2848      TYPE(pw_type), POINTER                             :: pw_in, pw_out
2849      LOGICAL, INTENT(in), OPTIONAL                      :: sharpen, normalize, transpose, &
2850                                                            smooth_boundary
2851
2852      CHARACTER(len=*), PARAMETER :: routineN = 'pw_nn_compose_r_no_pbc', &
2853         routineP = moduleN//':'//routineN
2854
2855      INTEGER                                            :: first_index, i, j, jw, k, kw, &
2856                                                            last_index, myj, myk, n_els
2857      INTEGER, DIMENSION(2, 3)                           :: bo, gbo
2858      INTEGER, DIMENSION(3)                              :: s
2859      LOGICAL                                            :: has_l_boundary, has_u_boundary, &
2860                                                            is_split, my_normalize, my_sharpen, &
2861                                                            my_smooth_boundary, my_transpose
2862      REAL(kind=dp)                                      :: in_val_f, in_val_l, in_val_tmp, w_j, w_k
2863      REAL(kind=dp), DIMENSION(-1:1)                     :: w
2864      REAL(kind=dp), DIMENSION(:, :), POINTER            :: l_boundary, tmp, u_boundary
2865      REAL(kind=dp), DIMENSION(:, :, :), POINTER         :: in_val, out_val
2866
2867      bo = pw_in%pw_grid%bounds_local
2868      gbo = pw_in%pw_grid%bounds
2869      in_val => pw_in%cr3d
2870      out_val => pw_out%cr3d
2871      my_sharpen = .FALSE.
2872      IF (PRESENT(sharpen)) my_sharpen = sharpen
2873      my_normalize = .FALSE.
2874      IF (PRESENT(normalize)) my_normalize = normalize
2875      my_transpose = .FALSE.
2876      IF (PRESENT(transpose)) my_transpose = transpose
2877      my_smooth_boundary = .FALSE.
2878      IF (PRESENT(smooth_boundary)) my_smooth_boundary = smooth_boundary
2879      CPASSERT(.NOT. my_normalize .OR. my_sharpen)
2880      CPASSERT(.NOT. my_smooth_boundary .OR. .NOT. my_sharpen)
2881      DO i = 1, 3
2882         s(i) = bo(2, i) - bo(1, i) + 1
2883      END DO
2884      IF (ANY(s < 1)) RETURN
2885      is_split = ANY(pw_in%pw_grid%bounds_local(:, 1) /= &
2886                     pw_in%pw_grid%bounds(:, 1))
2887      has_l_boundary = (gbo(1, 1) == bo(1, 1))
2888      has_u_boundary = (gbo(2, 1) == bo(2, 1))
2889      IF (is_split) THEN
2890         ALLOCATE (l_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
2891                   u_boundary(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)), &
2892                   tmp(bo(1, 2):bo(2, 2), bo(1, 3):bo(2, 3)))
2893         tmp(:, :) = pw_in%cr3d(bo(2, 1), :, :)
2894         CALL mp_sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
2895                          gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
2896                          l_boundary, pw_in%pw_grid%para%pos_of_x( &
2897                          gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
2898                          pw_in%pw_grid%para%group)
2899         tmp(:, :) = pw_in%cr3d(bo(1, 1), :, :)
2900         CALL mp_sendrecv(tmp, pw_in%pw_grid%para%pos_of_x( &
2901                          gbo(1, 1) + MODULO(bo(1, 1) - 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
2902                          u_boundary, pw_in%pw_grid%para%pos_of_x( &
2903                          gbo(1, 1) + MODULO(bo(2, 1) + 1 - gbo(1, 1), gbo(2, 1) - gbo(1, 1) + 1)), &
2904                          pw_in%pw_grid%para%group)
2905         DEALLOCATE (tmp)
2906      END IF
2907
2908      n_els = s(1)
2909      IF (has_l_boundary) THEN
2910         n_els = n_els - 1
2911         first_index = bo(1, 1) + 1
2912      ELSE
2913         first_index = bo(1, 1)
2914      END IF
2915      IF (has_u_boundary) THEN
2916         n_els = n_els - 1
2917         last_index = bo(2, 1) - 1
2918      ELSE
2919         last_index = bo(2, 1)
2920      END IF
2921!$OMP PARALLEL DO DEFAULT(NONE) &
2922!$OMP PRIVATE(k, kw, myk, j, jw, myj, in_val_f, in_val_l, w_k, w_j, in_val_tmp, w) &
2923!$OMP SHARED(bo, in_val, out_val, s, l_boundary, u_boundary, weights_1d, is_split, &
2924!$OMP        my_transpose, gbo, my_smooth_boundary, has_l_boundary, has_u_boundary, &
2925!$OMP        my_sharpen, last_index, first_index, my_normalize, n_els)
2926      DO k = bo(1, 3), bo(2, 3)
2927         DO kw = -1, 1
2928            myk = k + kw
2929            IF (my_transpose) THEN
2930               IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
2931                  IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
2932                     IF (myk < gbo(2, 3) .AND. myk > gbo(1, 3)) THEN
2933                        w_k = weights_1d(kw)
2934                        IF (my_smooth_boundary) THEN
2935                           w_k = weights_1d(kw)/weights_1d(0)
2936                        END IF
2937                     ELSE IF (kw == 0) THEN
2938                        w_k = 1._dp
2939                     ELSE
2940                        CYCLE
2941                     END IF
2942                  ELSE
2943                     IF (myk == gbo(2, 3) .OR. myk == gbo(1, 3)) CYCLE
2944                     w_k = weights_1d(kw)
2945                  END IF
2946               ELSE
2947                  w_k = weights_1d(kw)
2948               END IF
2949            ELSE
2950               IF (k >= gbo(2, 3) - 1 .OR. k <= gbo(1, 3) + 1) THEN
2951                  IF (k == gbo(2, 3) .OR. k == gbo(1, 3)) THEN
2952                     IF (kw /= 0) CYCLE
2953                     w_k = 1._dp
2954                  ELSE
2955                     IF (my_smooth_boundary .AND. ((k == gbo(1, 3) + 1 .AND. myk == gbo(1, 3)) .OR. &
2956                                                   (k == gbo(2, 3) - 1 .AND. myk == gbo(2, 3)))) THEN
2957                        w_k = weights_1d(kw)/weights_1d(0)
2958                     ELSE
2959                        w_k = weights_1d(kw)
2960                     END IF
2961                  END IF
2962               ELSE
2963                  w_k = weights_1d(kw)
2964               END IF
2965            END IF
2966            DO j = bo(1, 2), bo(2, 2)
2967               DO jw = -1, 1
2968                  myj = j + jw
2969                  IF (j < gbo(2, 2) - 1 .AND. j > gbo(1, 2) + 1) THEN
2970                     w_j = w_k*weights_1d(jw)
2971                  ELSE
2972                     IF (my_transpose) THEN
2973                        IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
2974                           IF (myj < gbo(2, 2) .AND. myj > gbo(1, 2)) THEN
2975                              w_j = weights_1d(jw)*w_k
2976                              IF (my_smooth_boundary) THEN
2977                                 w_j = weights_1d(jw)/weights_1d(0)*w_k
2978                              END IF
2979                           ELSE IF (jw == 0) THEN
2980                              w_j = w_k
2981                           ELSE
2982                              CYCLE
2983                           END IF
2984                        ELSE
2985                           IF (myj == gbo(2, 2) .OR. myj == gbo(1, 2)) CYCLE
2986                           w_j = w_k*weights_1d(jw)
2987                        END IF
2988                     ELSE
2989                        IF (j == gbo(2, 2) .OR. j == gbo(1, 2)) THEN
2990                           IF (jw /= 0) CYCLE
2991                           w_j = w_k
2992                        ELSE IF (my_smooth_boundary .AND. ((j == gbo(1, 2) + 1 .AND. myj == gbo(1, 2)) .OR. &
2993                                                           (j == gbo(2, 2) - 1 .AND. myj == gbo(2, 2)))) THEN
2994                           w_j = w_k*weights_1d(jw)/weights_1d(0)
2995                        ELSE
2996                           w_j = w_k*weights_1d(jw)
2997                        END IF
2998                     END IF
2999                  END IF
3000
3001                  IF (has_l_boundary) THEN
3002                     IF (my_transpose) THEN
3003                        IF (s(1) == 1) THEN
3004                           CPASSERT(.NOT. has_u_boundary)
3005                           in_val_tmp = u_boundary(myj, myk)
3006                        ELSE
3007                           in_val_tmp = in_val(bo(1, 1) + 1, myj, myk)
3008                        END IF
3009                        IF (my_sharpen) THEN
3010                           IF (kw == 0 .AND. jw == 0) THEN
3011                              IF (my_normalize) THEN
3012                                 out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
3013                                                           (2.0_dp - w_j)*in_val(bo(1, 1), myj, myk) - &
3014                                                           in_val_tmp*weights_1d(1)*w_j
3015                              ELSE
3016                                 out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
3017                                                           in_val(bo(1, 1), myj, myk)*w_j - &
3018                                                           in_val_tmp*weights_1d(1)*w_j
3019                              END IF
3020                           ELSE
3021                              out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
3022                                                        in_val(bo(1, 1), myj, myk)*w_j - &
3023                                                        in_val_tmp*weights_1d(1)*w_j
3024                           END IF
3025                        ELSE IF (my_smooth_boundary) THEN
3026                           out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
3027                                                     w_j*(in_val(bo(1, 1), myj, myk) + &
3028                                                          in_val_tmp*weights_1d(1)/weights_1d(0))
3029                        ELSE
3030                           out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
3031                                                     w_j*(in_val(bo(1, 1), myj, myk) + &
3032                                                          in_val_tmp*weights_1d(1))
3033                        END IF
3034                        in_val_f = 0.0_dp
3035                     ELSE
3036                        in_val_f = in_val(bo(1, 1), myj, myk)
3037                        IF (my_sharpen) THEN
3038                           IF (kw == 0 .AND. jw == 0) THEN
3039                              IF (my_normalize) THEN
3040                                 out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
3041                                                           (2.0_dp - w_j)*in_val_f
3042                              ELSE
3043                                 out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
3044                                                           in_val_f*w_j
3045                              END IF
3046                           ELSE
3047                              out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) - &
3048                                                        in_val_f*w_j
3049                           END IF
3050                        ELSE
3051                           out_val(bo(1, 1), j, k) = out_val(bo(1, 1), j, k) + &
3052                                                     in_val_f*w_j
3053                        END IF
3054                     END IF
3055                  ELSE
3056                     in_val_f = l_boundary(myj, myk)
3057                  END IF
3058                  IF (has_u_boundary) THEN
3059                     IF (my_transpose) THEN
3060                        in_val_l = in_val(bo(2, 1), myj, myk)
3061                        IF (s(1) == 1) THEN
3062                           CPASSERT(.NOT. has_l_boundary)
3063                           in_val_tmp = l_boundary(myj, myk)
3064                        ELSE
3065                           in_val_tmp = in_val(bo(2, 1) - 1, myj, myk)
3066                        END IF
3067                        IF (my_sharpen) THEN
3068                           IF (kw == 0 .AND. jw == 0) THEN
3069                              IF (my_normalize) THEN
3070                                 out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3071                                                           in_val_l*(2._dp - w_j) - &
3072                                                           in_val_tmp*weights_1d(1)*w_j
3073                              ELSE
3074                                 out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3075                                                           in_val_l*w_j - &
3076                                                           in_val_tmp*weights_1d(1)*w_j
3077                              END IF
3078                           ELSE
3079                              out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
3080                                                        w_j*in_val_l - &
3081                                                        in_val_tmp*weights_1d(1)*w_j
3082                           END IF
3083                        ELSE IF (my_smooth_boundary) THEN
3084                           out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3085                                                     w_j*(in_val_l + in_val_tmp*weights_1d(1)/weights_1d(0))
3086                        ELSE
3087                           out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3088                                                     w_j*(in_val_l + in_val_tmp*weights_1d(1))
3089                        END IF
3090                        in_val_l = 0._dp
3091                     ELSE
3092                        in_val_l = in_val(bo(2, 1), myj, myk)
3093                        IF (my_sharpen) THEN
3094                           IF (kw == 0 .AND. jw == 0) THEN
3095                              IF (my_normalize) THEN
3096                                 out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3097                                                           in_val_l*(2._dp - w_j)
3098                              ELSE
3099                                 out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3100                                                           in_val_l*w_j
3101                              END IF
3102                           ELSE
3103                              out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) - &
3104                                                        w_j*in_val_l
3105                           END IF
3106                        ELSE
3107                           out_val(bo(2, 1), j, k) = out_val(bo(2, 1), j, k) + &
3108                                                     w_j*in_val_l
3109                        END IF
3110                     END IF
3111                  ELSE
3112                     in_val_l = u_boundary(myj, myk)
3113                  END IF
3114                  IF (last_index >= first_index) THEN
3115                     IF (my_transpose) THEN
3116                        IF (bo(1, 1) - 1 == gbo(1, 1)) THEN
3117                           in_val_f = 0._dp
3118                        ELSE IF (bo(2, 1) + 1 == gbo(2, 1)) THEN
3119                           in_val_l = 0._dp
3120                        END IF
3121                     END IF
3122                     IF (my_sharpen) THEN
3123                        w = -weights_1d*w_j
3124                        IF (kw == 0 .AND. jw == 0) THEN
3125                           IF (my_normalize) THEN
3126                              w(0) = w(0) + 2._dp
3127                           ELSE
3128                              w(0) = -w(0)
3129                           END IF
3130                        END IF
3131                     ELSE
3132                        w = weights_1d*w_j
3133                     END IF
3134                     IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
3135                        IF (gbo(1, 1) + 1 >= bo(1, 1) .AND. &
3136                            gbo(1, 1) + 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
3137                           IF (gbo(1, 1) >= bo(1, 1)) THEN
3138                              out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
3139                                                             in_val(gbo(1, 1), myj, myk)*w_j*weights_1d(-1)* &
3140                                                             (1._dp/weights_1d(0) - 1._dp)
3141                           ELSE
3142                              out_val(gbo(1, 1) + 1, j, k) = out_val(gbo(1, 1) + 1, j, k) + &
3143                                                             l_boundary(myj, myk)*w_j*weights_1d(-1)* &
3144                                                             (1._dp/weights_1d(0) - 1._dp)
3145                           END IF
3146                        END IF
3147                     END IF
3148                     CALL pw_compose_stripe(weights=w, &
3149                                            in_val=in_val(first_index:last_index, myj, myk), &
3150                                            in_val_first=in_val_f, in_val_last=in_val_l, &
3151                                            out_val=out_val(first_index:last_index, j, k), &
3152                                            n_el=n_els)
3153!FM                   call pw_compose_stripe2(weights=w,&
3154!FM                        in_val=in_val,&
3155!FM                        in_val_first=in_val_f,in_val_last=in_val_l,&
3156!FM                        out_val=out_val,&
3157!FM                        first_val=first_index,last_val=last_index,&
3158!FM                        myj=myj,myk=myk,j=j,k=k)
3159                     IF (my_smooth_boundary .AND. (.NOT. my_transpose)) THEN
3160                        IF (gbo(2, 1) - 1 >= bo(1, 1) .AND. &
3161                            gbo(2, 1) - 1 <= bo(2, 1) .AND. gbo(2, 1) - gbo(1, 1) > 2) THEN
3162                           IF (gbo(2, 1) <= bo(2, 1)) THEN
3163                              out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
3164                                                             in_val(gbo(2, 1), myj, myk)*w_j*weights_1d(1)* &
3165                                                             (1._dp/weights_1d(0) - 1._dp)
3166                           ELSE
3167                              out_val(gbo(2, 1) - 1, j, k) = out_val(gbo(2, 1) - 1, j, k) + &
3168                                                             u_boundary(myj, myk)*w_j*weights_1d(1)* &
3169                                                             (1._dp/weights_1d(0) - 1._dp)
3170                           END IF
3171                        END IF
3172                     END IF
3173
3174                  END IF
3175               END DO
3176            END DO
3177         END DO
3178      END DO
3179
3180      IF (is_split) THEN
3181         DEALLOCATE (l_boundary, u_boundary)
3182      END IF
3183   END SUBROUTINE pw_nn_compose_r_no_pbc
3184
3185! **************************************************************************************************
3186!> \brief ...
3187!> \param pw_in ...
3188!> \param pw_out ...
3189! **************************************************************************************************
3190   SUBROUTINE spl3_nopbc(pw_in, pw_out)
3191      TYPE(pw_type), POINTER                             :: pw_in, pw_out
3192
3193      CALL pw_zero(pw_out)
3194      CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
3195                                  pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE.)
3196   END SUBROUTINE spl3_nopbc
3197
3198! **************************************************************************************************
3199!> \brief ...
3200!> \param pw_in ...
3201!> \param pw_out ...
3202! **************************************************************************************************
3203   SUBROUTINE spl3_nopbct(pw_in, pw_out)
3204      TYPE(pw_type), POINTER                             :: pw_in, pw_out
3205
3206      CALL pw_zero(pw_out)
3207      CALL pw_nn_compose_r_no_pbc(weights_1d=spl3_1d_coeffs0, pw_in=pw_in, &
3208                                  pw_out=pw_out, sharpen=.FALSE., normalize=.FALSE., transpose=.TRUE.)
3209   END SUBROUTINE spl3_nopbct
3210
3211! **************************************************************************************************
3212!> \brief ...
3213!> \param pw_in ...
3214!> \param pw_out ...
3215! **************************************************************************************************
3216   SUBROUTINE spl3_pbc(pw_in, pw_out)
3217      TYPE(pw_type), POINTER                             :: pw_in, pw_out
3218
3219      CALL pw_zero(pw_out)
3220      CALL pw_nn_smear_r(pw_in, pw_out, coeffs=spline3_coeffs)
3221   END SUBROUTINE spl3_pbc
3222
3223! **************************************************************************************************
3224!> \brief Evaluates the PBC interpolated Spline (pw) function on the generic
3225!>      input vector (vec)
3226!> \param vec ...
3227!> \param pw ...
3228!> \return ...
3229!> \par History
3230!>      12.2007 Adapted for use with distributed grids [rdeclerck]
3231!> \author Teodoro Laino 12/2005 [tlaino]
3232!> \note
3233!>      Requires the Spline coefficients to be computed with PBC
3234! **************************************************************************************************
3235   FUNCTION Eval_Interp_Spl3_pbc(vec, pw) RESULT(val)
3236      REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: vec
3237      TYPE(pw_type), POINTER                             :: pw
3238      REAL(KIND=dp)                                      :: val
3239
3240      CHARACTER(len=*), PARAMETER :: routineN = 'Eval_Interp_Spl3_pbc', &
3241         routineP = moduleN//':'//routineN
3242
3243      INTEGER                                            :: i, ivec(3), j, k, npts(3)
3244      INTEGER, DIMENSION(2, 3)                           :: bo, bo_l
3245      INTEGER, DIMENSION(4)                              :: ii, ij, ik
3246      LOGICAL                                            :: my_mpsum
3247      REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr2, dr3, e1, e2, e3, &
3248         f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, s1, s2, s3, s4, &
3249         t1, t2, t3, t4, u1, u2, u3, v1, v2, v3, v4, xd1, xd2, xd3
3250      REAL(KIND=dp), DIMENSION(4, 4, 4)                  :: box
3251      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: grid
3252
3253      NULLIFY (grid)
3254      my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
3255      npts = pw%pw_grid%npts
3256      ivec = FLOOR(vec/pw%pw_grid%dr)
3257      dr1 = pw%pw_grid%dr(1)
3258      dr2 = pw%pw_grid%dr(2)
3259      dr3 = pw%pw_grid%dr(3)
3260
3261      xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
3262      xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
3263      xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
3264      grid => pw%cr3d(:, :, :)
3265      bo = pw%pw_grid%bounds
3266      bo_l = pw%pw_grid%bounds_local
3267
3268      ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
3269      ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
3270      ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
3271      ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
3272
3273      ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
3274      ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
3275      ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
3276      ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
3277
3278      ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
3279      ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
3280      ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
3281      ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
3282
3283      DO k = 1, 4
3284         DO j = 1, 4
3285            DO i = 1, 4
3286               IF ( &
3287                  ii(i) >= bo_l(1, 1) .AND. &
3288                  ii(i) <= bo_l(2, 1) .AND. &
3289                  ij(j) >= bo_l(1, 2) .AND. &
3290                  ij(j) <= bo_l(2, 2) .AND. &
3291                  ik(k) >= bo_l(1, 3) .AND. &
3292                  ik(k) <= bo_l(2, 3) &
3293                  ) THEN
3294                  box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
3295                                      ij(j) + 1 - bo_l(1, 2), &
3296                                      ik(k) + 1 - bo_l(1, 3))
3297               ELSE
3298                  box(i, j, k) = 0.0_dp
3299               END IF
3300            END DO
3301         END DO
3302      END DO
3303
3304      a1 = 3.0_dp + xd1
3305      a2 = a1*a1
3306      a3 = a2*a1
3307      b1 = 2.0_dp + xd1
3308      b2 = b1*b1
3309      b3 = b2*b1
3310      c1 = 1.0_dp + xd1
3311      c2 = c1*c1
3312      c3 = c2*c1
3313      d1 = xd1
3314      d2 = d1*d1
3315      d3 = d2*d1
3316      e1 = 3.0_dp + xd2
3317      e2 = e1*e1
3318      e3 = e2*e1
3319      f1 = 2.0_dp + xd2
3320      f2 = f1*f1
3321      f3 = f2*f1
3322      g1 = 1.0_dp + xd2
3323      g2 = g1*g1
3324      g3 = g2*g1
3325      h1 = xd2
3326      h2 = h1*h1
3327      h3 = h2*h1
3328      p1 = 3.0_dp + xd3
3329      p2 = p1*p1
3330      p3 = p2*p1
3331      q1 = 2.0_dp + xd3
3332      q2 = q1*q1
3333      q3 = q2*q1
3334      r1 = 1.0_dp + xd3
3335      r2 = r1*r1
3336      r3 = r2*r1
3337      u1 = xd3
3338      u2 = u1*u1
3339      u3 = u2*u1
3340
3341      t1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
3342      t2 = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
3343      t3 = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
3344      t4 = 1.0_dp/6.0_dp*d3
3345      s1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
3346      s2 = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
3347      s3 = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
3348      s4 = 1.0_dp/6.0_dp*h3
3349      v1 = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
3350      v2 = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
3351      v3 = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
3352      v4 = 1.0_dp/6.0_dp*u3
3353
3354      val = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3355             (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3356             (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3357             (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3358            ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3359             (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3360             (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3361             (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3362            ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3363             (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3364             (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3365             (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3366            ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3367             (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3368             (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3369             (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3370
3371      IF (my_mpsum) CALL mp_sum(val, pw%pw_grid%para%group)
3372
3373   END FUNCTION Eval_Interp_Spl3_pbc
3374
3375! **************************************************************************************************
3376!> \brief Evaluates the derivatives of the PBC interpolated Spline (pw)
3377!>      function on the generic input vector (vec)
3378!> \param vec ...
3379!> \param pw ...
3380!> \return ...
3381!> \par History
3382!>      12.2007 Adapted for use with distributed grids [rdeclerck]
3383!> \author Teodoro Laino 12/2005 [tlaino]
3384!> \note
3385!>      Requires the Spline coefficients to be computed with PBC
3386! **************************************************************************************************
3387   FUNCTION Eval_d_Interp_Spl3_pbc(vec, pw) RESULT(val)
3388      REAL(KIND=dp), DIMENSION(3), INTENT(in)            :: vec
3389      TYPE(pw_type), POINTER                             :: pw
3390      REAL(KIND=dp)                                      :: val(3)
3391
3392      CHARACTER(len=*), PARAMETER :: routineN = 'Eval_d_Interp_Spl3_pbc', &
3393         routineP = moduleN//':'//routineN
3394
3395      INTEGER                                            :: i, ivec(3), j, k, npts(3)
3396      INTEGER, DIMENSION(2, 3)                           :: bo, bo_l
3397      INTEGER, DIMENSION(4)                              :: ii, ij, ik
3398      LOGICAL                                            :: my_mpsum
3399      REAL(KIND=dp) :: a1, a2, a3, b1, b2, b3, c1, c2, c3, d1, d2, d3, dr1, dr1i, dr2, dr2i, dr3, &
3400         dr3i, e1, e2, e3, f1, f2, f3, g1, g2, g3, h1, h2, h3, p1, p2, p3, q1, q2, q3, r1, r2, r3, &
3401         s1, s1d, s1o, s2, s2d, s2o, s3, s3d, s3o, s4, s4d, s4o, t1, t1d, t1o, t2, t2d, t2o, t3, &
3402         t3d, t3o, t4, t4d, t4o, u1, u2, u3, v1, v1d, v1o, v2, v2d, v2o, v3, v3d, v3o, v4, v4d, &
3403         v4o, xd1, xd2, xd3
3404      REAL(KIND=dp), DIMENSION(4, 4, 4)                  :: box
3405      REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: grid
3406
3407      NULLIFY (grid)
3408      my_mpsum = (pw%pw_grid%para%mode /= PW_MODE_LOCAL)
3409      npts = pw%pw_grid%npts
3410      ivec = FLOOR(vec/pw%pw_grid%dr)
3411      dr1 = pw%pw_grid%dr(1)
3412      dr2 = pw%pw_grid%dr(2)
3413      dr3 = pw%pw_grid%dr(3)
3414      dr1i = 1.0_dp/dr1
3415      dr2i = 1.0_dp/dr2
3416      dr3i = 1.0_dp/dr3
3417      xd1 = (vec(1)/dr1) - REAL(ivec(1), kind=dp)
3418      xd2 = (vec(2)/dr2) - REAL(ivec(2), kind=dp)
3419      xd3 = (vec(3)/dr3) - REAL(ivec(3), kind=dp)
3420      grid => pw%cr3d(:, :, :)
3421      bo = pw%pw_grid%bounds
3422      bo_l = pw%pw_grid%bounds_local
3423
3424      ik(1) = MODULO(ivec(3) - 1, npts(3)) + bo(1, 3)
3425      ik(2) = MODULO(ivec(3), npts(3)) + bo(1, 3)
3426      ik(3) = MODULO(ivec(3) + 1, npts(3)) + bo(1, 3)
3427      ik(4) = MODULO(ivec(3) + 2, npts(3)) + bo(1, 3)
3428
3429      ij(1) = MODULO(ivec(2) - 1, npts(2)) + bo(1, 2)
3430      ij(2) = MODULO(ivec(2), npts(2)) + bo(1, 2)
3431      ij(3) = MODULO(ivec(2) + 1, npts(2)) + bo(1, 2)
3432      ij(4) = MODULO(ivec(2) + 2, npts(2)) + bo(1, 2)
3433
3434      ii(1) = MODULO(ivec(1) - 1, npts(1)) + bo(1, 1)
3435      ii(2) = MODULO(ivec(1), npts(1)) + bo(1, 1)
3436      ii(3) = MODULO(ivec(1) + 1, npts(1)) + bo(1, 1)
3437      ii(4) = MODULO(ivec(1) + 2, npts(1)) + bo(1, 1)
3438
3439      DO k = 1, 4
3440         DO j = 1, 4
3441            DO i = 1, 4
3442               IF ( &
3443                  ii(i) >= bo_l(1, 1) .AND. &
3444                  ii(i) <= bo_l(2, 1) .AND. &
3445                  ij(j) >= bo_l(1, 2) .AND. &
3446                  ij(j) <= bo_l(2, 2) .AND. &
3447                  ik(k) >= bo_l(1, 3) .AND. &
3448                  ik(k) <= bo_l(2, 3) &
3449                  ) THEN
3450                  box(i, j, k) = grid(ii(i) + 1 - bo_l(1, 1), &
3451                                      ij(j) + 1 - bo_l(1, 2), &
3452                                      ik(k) + 1 - bo_l(1, 3))
3453               ELSE
3454                  box(i, j, k) = 0.0_dp
3455               END IF
3456            END DO
3457         END DO
3458      END DO
3459
3460      a1 = 3.0_dp + xd1
3461      a2 = a1*a1
3462      a3 = a2*a1
3463      b1 = 2.0_dp + xd1
3464      b2 = b1*b1
3465      b3 = b2*b1
3466      c1 = 1.0_dp + xd1
3467      c2 = c1*c1
3468      c3 = c2*c1
3469      d1 = xd1
3470      d2 = d1*d1
3471      d3 = d2*d1
3472      e1 = 3.0_dp + xd2
3473      e2 = e1*e1
3474      e3 = e2*e1
3475      f1 = 2.0_dp + xd2
3476      f2 = f1*f1
3477      f3 = f2*f1
3478      g1 = 1.0_dp + xd2
3479      g2 = g1*g1
3480      g3 = g2*g1
3481      h1 = xd2
3482      h2 = h1*h1
3483      h3 = h2*h1
3484      p1 = 3.0_dp + xd3
3485      p2 = p1*p1
3486      p3 = p2*p1
3487      q1 = 2.0_dp + xd3
3488      q2 = q1*q1
3489      q3 = q2*q1
3490      r1 = 1.0_dp + xd3
3491      r2 = r1*r1
3492      r3 = r2*r1
3493      u1 = xd3
3494      u2 = u1*u1
3495      u3 = u2*u1
3496
3497      t1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*a1 + 12.0_dp*a2 - a3)
3498      t2o = -22.0_dp/3.0_dp + 10.0_dp*b1 - 4.0_dp*b2 + 0.5_dp*b3
3499      t3o = 2.0_dp/3.0_dp - 2.0_dp*c1 + 2.0_dp*c2 - 0.5_dp*c3
3500      t4o = 1.0_dp/6.0_dp*d3
3501      s1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*e1 + 12.0_dp*e2 - e3)
3502      s2o = -22.0_dp/3.0_dp + 10.0_dp*f1 - 4.0_dp*f2 + 0.5_dp*f3
3503      s3o = 2.0_dp/3.0_dp - 2.0_dp*g1 + 2.0_dp*g2 - 0.5_dp*g3
3504      s4o = 1.0_dp/6.0_dp*h3
3505      v1o = 1.0_dp/6.0_dp*(64.0_dp - 48.0_dp*p1 + 12.0_dp*p2 - p3)
3506      v2o = -22.0_dp/3.0_dp + 10.0_dp*q1 - 4.0_dp*q2 + 0.5_dp*q3
3507      v3o = 2.0_dp/3.0_dp - 2.0_dp*r1 + 2.0_dp*r2 - 0.5_dp*r3
3508      v4o = 1.0_dp/6.0_dp*u3
3509
3510      t1d = -8.0_dp + 4.0_dp*a1 - 0.5_dp*a2
3511      t2d = 10.0_dp - 8.0_dp*b1 + 1.5_dp*b2
3512      t3d = -2.0_dp + 4.0_dp*c1 - 1.5_dp*c2
3513      t4d = 0.5_dp*d2
3514      s1d = -8.0_dp + 4.0_dp*e1 - 0.5_dp*e2
3515      s2d = 10.0_dp - 8.0_dp*f1 + 1.5_dp*f2
3516      s3d = -2.0_dp + 4.0_dp*g1 - 1.5_dp*g2
3517      s4d = 0.5_dp*h2
3518      v1d = -8.0_dp + 4.0_dp*p1 - 0.5_dp*p2
3519      v2d = 10.0_dp - 8.0_dp*q1 + 1.5_dp*q2
3520      v3d = -2.0_dp + 4.0_dp*r1 - 1.5_dp*r2
3521      v4d = 0.5_dp*u2
3522
3523      t1 = t1d*dr1i
3524      t2 = t2d*dr1i
3525      t3 = t3d*dr1i
3526      t4 = t4d*dr1i
3527      s1 = s1o
3528      s2 = s2o
3529      s3 = s3o
3530      s4 = s4o
3531      v1 = v1o
3532      v2 = v2o
3533      v3 = v3o
3534      v4 = v4o
3535      val(1) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3536                (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3537                (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3538                (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3539               ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3540                (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3541                (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3542                (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3543               ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3544                (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3545                (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3546                (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3547               ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3548                (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3549                (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3550                (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3551
3552      t1 = t1o
3553      t2 = t2o
3554      t3 = t3o
3555      t4 = t4o
3556      s1 = s1d*dr2i
3557      s2 = s2d*dr2i
3558      s3 = s3d*dr2i
3559      s4 = s4d*dr2i
3560      v1 = v1o
3561      v2 = v2o
3562      v3 = v3o
3563      v4 = v4o
3564      val(2) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3565                (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3566                (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3567                (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3568               ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3569                (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3570                (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3571                (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3572               ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3573                (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3574                (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3575                (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3576               ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3577                (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3578                (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3579                (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3580
3581      t1 = t1o
3582      t2 = t2o
3583      t3 = t3o
3584      t4 = t4o
3585      s1 = s1o
3586      s2 = s2o
3587      s3 = s3o
3588      s4 = s4o
3589      v1 = v1d*dr3i
3590      v2 = v2d*dr3i
3591      v3 = v3d*dr3i
3592      v4 = v4d*dr3i
3593      val(3) = ((box(1, 1, 1)*t1 + box(2, 1, 1)*t2 + box(3, 1, 1)*t3 + box(4, 1, 1)*t4)*s1 + &
3594                (box(1, 2, 1)*t1 + box(2, 2, 1)*t2 + box(3, 2, 1)*t3 + box(4, 2, 1)*t4)*s2 + &
3595                (box(1, 3, 1)*t1 + box(2, 3, 1)*t2 + box(3, 3, 1)*t3 + box(4, 3, 1)*t4)*s3 + &
3596                (box(1, 4, 1)*t1 + box(2, 4, 1)*t2 + box(3, 4, 1)*t3 + box(4, 4, 1)*t4)*s4)*v1 + &
3597               ((box(1, 1, 2)*t1 + box(2, 1, 2)*t2 + box(3, 1, 2)*t3 + box(4, 1, 2)*t4)*s1 + &
3598                (box(1, 2, 2)*t1 + box(2, 2, 2)*t2 + box(3, 2, 2)*t3 + box(4, 2, 2)*t4)*s2 + &
3599                (box(1, 3, 2)*t1 + box(2, 3, 2)*t2 + box(3, 3, 2)*t3 + box(4, 3, 2)*t4)*s3 + &
3600                (box(1, 4, 2)*t1 + box(2, 4, 2)*t2 + box(3, 4, 2)*t3 + box(4, 4, 2)*t4)*s4)*v2 + &
3601               ((box(1, 1, 3)*t1 + box(2, 1, 3)*t2 + box(3, 1, 3)*t3 + box(4, 1, 3)*t4)*s1 + &
3602                (box(1, 2, 3)*t1 + box(2, 2, 3)*t2 + box(3, 2, 3)*t3 + box(4, 2, 3)*t4)*s2 + &
3603                (box(1, 3, 3)*t1 + box(2, 3, 3)*t2 + box(3, 3, 3)*t3 + box(4, 3, 3)*t4)*s3 + &
3604                (box(1, 4, 3)*t1 + box(2, 4, 3)*t2 + box(3, 4, 3)*t3 + box(4, 4, 3)*t4)*s4)*v3 + &
3605               ((box(1, 1, 4)*t1 + box(2, 1, 4)*t2 + box(3, 1, 4)*t3 + box(4, 1, 4)*t4)*s1 + &
3606                (box(1, 2, 4)*t1 + box(2, 2, 4)*t2 + box(3, 2, 4)*t3 + box(4, 2, 4)*t4)*s2 + &
3607                (box(1, 3, 4)*t1 + box(2, 3, 4)*t2 + box(3, 3, 4)*t3 + box(4, 3, 4)*t4)*s3 + &
3608                (box(1, 4, 4)*t1 + box(2, 4, 4)*t2 + box(3, 4, 4)*t3 + box(4, 4, 4)*t4)*s4)*v4
3609
3610      IF (my_mpsum) CALL mp_sum(val, pw%pw_grid%para%group)
3611
3612   END FUNCTION Eval_d_Interp_Spl3_pbc
3613
3614END MODULE pw_spline_utils
3615