1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Setting up the Spline coefficients used to Interpolate the G-Term
8!>      in Ewald sums
9!> \par History
10!>      12.2005 created [tlaino]
11!> \author Teodoro Laino
12! **************************************************************************************************
13MODULE ewald_spline_util
14   USE cell_types,                      ONLY: cell_create,&
15                                              cell_release,&
16                                              cell_type
17   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
18                                              cp_logger_type
19   USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
20                                              cp_print_key_unit_nr
21   USE cp_para_types,                   ONLY: cp_para_env_type
22   USE input_section_types,             ONLY: section_vals_get_subs_vals,&
23                                              section_vals_type,&
24                                              section_vals_val_get
25   USE kinds,                           ONLY: dp
26   USE message_passing,                 ONLY: mp_sum
27   USE pw_grid_types,                   ONLY: HALFSPACE,&
28                                              pw_grid_type
29   USE pw_grids,                        ONLY: pw_grid_create,&
30                                              pw_grid_setup
31   USE pw_methods,                      ONLY: pw_zero
32   USE pw_pool_types,                   ONLY: pw_pool_create,&
33                                              pw_pool_create_pw,&
34                                              pw_pool_give_back_pw,&
35                                              pw_pool_type
36   USE pw_spline_utils,                 ONLY: &
37        Eval_Interp_Spl3_pbc, Eval_d_Interp_Spl3_pbc, find_coeffs, pw_spline_do_precond, &
38        pw_spline_precond_create, pw_spline_precond_release, pw_spline_precond_set_kind, &
39        pw_spline_precond_type, spl3_pbc
40   USE pw_types,                        ONLY: REALDATA3D,&
41                                              REALSPACE,&
42                                              pw_type
43!NB parallelization
44#include "./base/base_uses.f90"
45
46   IMPLICIT NONE
47   PRIVATE
48
49   LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .TRUE.
50   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ewald_spline_util'
51   PUBLIC :: Setup_Ewald_Spline
52
53CONTAINS
54
55! **************************************************************************************************
56!> \brief Setup of the G-space Ewald Term Spline Coefficients
57!> \param pw_grid ...
58!> \param pw_pool ...
59!> \param coeff ...
60!> \param LG ...
61!> \param gx ...
62!> \param gy ...
63!> \param gz ...
64!> \param hmat ...
65!> \param npts ...
66!> \param param_section ...
67!> \param tag ...
68!> \param print_section ...
69!> \param para_env ...
70!> \par History
71!>      12.2005 created [tlaino]
72!> \author Teodoro Laino
73! **************************************************************************************************
74   SUBROUTINE Setup_Ewald_Spline(pw_grid, pw_pool, coeff, LG, gx, gy, gz, hmat, npts, &
75                                 param_section, tag, print_section, para_env)
76      TYPE(pw_grid_type), POINTER                        :: pw_grid
77      TYPE(pw_pool_type), POINTER                        :: pw_pool
78      TYPE(pw_type), POINTER                             :: coeff
79      REAL(KIND=dp), DIMENSION(:), POINTER               :: LG, gx, gy, gz
80      REAL(KIND=dp), INTENT(IN)                          :: hmat(3, 3)
81      INTEGER, INTENT(IN)                                :: npts(3)
82      TYPE(section_vals_type), POINTER                   :: param_section
83      CHARACTER(LEN=*), INTENT(IN)                       :: tag
84      TYPE(section_vals_type), POINTER                   :: print_section
85      TYPE(cp_para_env_type), POINTER                    :: para_env
86
87      CHARACTER(len=*), PARAMETER :: routineN = 'Setup_Ewald_Spline', &
88         routineP = moduleN//':'//routineN
89
90      INTEGER                                            :: bo(2, 3), iounit
91      TYPE(cell_type), POINTER                           :: cell
92      TYPE(cp_logger_type), POINTER                      :: logger
93      TYPE(pw_type), POINTER                             :: pw
94
95!
96! Setting Up Fit Procedure
97!
98
99      CPASSERT(.NOT. ASSOCIATED(pw_grid))
100      CPASSERT(.NOT. ASSOCIATED(pw_pool))
101      CPASSERT(.NOT. ASSOCIATED(coeff))
102      NULLIFY (cell, pw)
103
104      CALL cell_create(cell, hmat=hmat, periodic=(/1, 1, 1/))
105      CALL pw_grid_create(pw_grid, para_env%group, local=.TRUE.)
106      logger => cp_get_default_logger()
107      iounit = cp_print_key_unit_nr(logger, print_section, "", &
108                                    extension=".Log")
109      bo(1, 1:3) = 0
110      bo(2, 1:3) = npts(1:3) - 1
111      CALL pw_grid_setup(cell%hmat, pw_grid, grid_span=HALFSPACE, bounds=bo, iounit=iounit)
112
113      CALL cp_print_key_finished_output(iounit, logger, print_section, &
114                                        "")
115      ! pw_pool initialized
116      CALL pw_pool_create(pw_pool, pw_grid=pw_grid)
117      CALL pw_pool_create_pw(pw_pool, pw, use_data=REALDATA3D, in_space=REALSPACE)
118      CALL pw_pool_create_pw(pw_pool, coeff, use_data=REALDATA3D, in_space=REALSPACE)
119      ! Evaluate function on grid
120      CALL eval_pw_TabLR(pw, pw_pool, coeff, Lg, gx, gy, gz, hmat_mm=hmat, &
121                         param_section=param_section, tag=tag)
122      CALL pw_pool_give_back_pw(pw_pool, pw)
123      CALL cell_release(cell)
124
125   END SUBROUTINE Setup_Ewald_Spline
126
127! **************************************************************************************************
128!> \brief Evaluates the function G-Term in reciprocal space on the grid
129!>      and find the coefficients of the Splines
130!> \param grid ...
131!> \param pw_pool ...
132!> \param TabLR ...
133!> \param Lg ...
134!> \param gx ...
135!> \param gy ...
136!> \param gz ...
137!> \param hmat_mm ...
138!> \param param_section ...
139!> \param tag ...
140!> \par History
141!>      12.2005 created [tlaino]
142!> \author Teodoro Laino
143! **************************************************************************************************
144   SUBROUTINE eval_pw_TabLR(grid, pw_pool, TabLR, Lg, gx, gy, gz, hmat_mm, &
145                            param_section, tag)
146      TYPE(pw_type), POINTER                             :: grid
147      TYPE(pw_pool_type), POINTER                        :: pw_pool
148      TYPE(pw_type), POINTER                             :: TabLR
149      REAL(KIND=dp), DIMENSION(:), POINTER               :: Lg, gx, gy, gz
150      REAL(KIND=dp), DIMENSION(3, 3)                     :: hmat_mm
151      TYPE(section_vals_type), POINTER                   :: param_section
152      CHARACTER(LEN=*), INTENT(IN)                       :: tag
153
154      CHARACTER(len=*), PARAMETER :: routineN = 'eval_pw_TabLR', routineP = moduleN//':'//routineN
155
156      INTEGER :: act_nx, act_ny, aint_precond, handle, i, iii, is, j, js, k, ks, Lg_loc_max, &
157         Lg_loc_min, max_iter, my_i, my_j, my_k, n1, n2, n3, n_extra, NLg_loc, nxlim, nylim, &
158         nzlim, precond_kind
159      INTEGER, DIMENSION(2, 3)                           :: gbo
160      LOGICAL                                            :: success
161      REAL(KIND=dp)                                      :: dr1, dr2, dr3, eps_r, eps_x, xs1, xs2, &
162                                                            xs3
163      REAL(KIND=dp), ALLOCATABLE                         :: cos_gx(:, :), cos_gy(:, :), &
164                                                            cos_gz(:, :), lhs(:, :), rhs(:, :), &
165                                                            sin_gx(:, :), sin_gy(:, :), &
166                                                            sin_gz(:, :)
167      TYPE(pw_spline_precond_type), POINTER              :: precond
168      TYPE(section_vals_type), POINTER                   :: interp_section
169
170!NB pull expensive Cos() out of inner looop
171!NB temporaries for holding stuff so that dgemm can be used
172
173      EXTERNAL :: DGEMM
174
175      CALL timeset(routineN, handle)
176      n1 = grid%pw_grid%npts(1)
177      n2 = grid%pw_grid%npts(2)
178      n3 = grid%pw_grid%npts(3)
179      dr1 = grid%pw_grid%dr(1)
180      dr2 = grid%pw_grid%dr(2)
181      dr3 = grid%pw_grid%dr(3)
182      gbo = grid%pw_grid%bounds
183      nxlim = FLOOR(REAL(n1, KIND=dp)/2.0_dp)
184      nylim = FLOOR(REAL(n2, KIND=dp)/2.0_dp)
185      nzlim = FLOOR(REAL(n3, KIND=dp)/2.0_dp)
186      is = 0
187      js = 0
188      ks = 0
189      IF (2*nxlim /= n1) is = 1
190      IF (2*nylim /= n2) js = 1
191      IF (2*nzlim /= n3) ks = 1
192      CALL pw_zero(grid)
193
194      ! Used the full symmetry to reduce the evaluation to 1/64th
195      !NB parallelization
196      iii = 0
197      !NB allocate temporaries for Cos refactoring
198      ALLOCATE (cos_gx(SIZE(Lg), gbo(1, 1):gbo(2, 1)))
199      ALLOCATE (sin_gx(SIZE(Lg), gbo(1, 1):gbo(2, 1)))
200      ALLOCATE (cos_gy(SIZE(Lg), gbo(1, 2):gbo(2, 2)))
201      ALLOCATE (sin_gy(SIZE(Lg), gbo(1, 2):gbo(2, 2)))
202      ALLOCATE (cos_gz(SIZE(Lg), gbo(1, 3):gbo(2, 3)))
203      ALLOCATE (sin_gz(SIZE(Lg), gbo(1, 3):gbo(2, 3)))
204      !NB precalculate Cos(gx*xs1) etc for Cos refactoring
205      DO k = gbo(1, 3), gbo(2, 3)
206         my_k = k - gbo(1, 3)
207         xs3 = REAL(my_k, dp)*dr3
208         IF (k > nzlim) CYCLE
209         cos_gz(1:SIZE(Lg), k) = COS(gz(1:SIZE(Lg))*xs3)
210         sin_gz(1:SIZE(Lg), k) = SIN(gz(1:SIZE(Lg))*xs3)
211      END DO ! k
212      xs2 = 0.0_dp
213      DO j = gbo(1, 2), gbo(2, 2)
214         IF (j > nylim) CYCLE
215         cos_gy(1:SIZE(Lg), j) = COS(gy(1:SIZE(Lg))*xs2)
216         sin_gy(1:SIZE(Lg), j) = SIN(gy(1:SIZE(Lg))*xs2)
217         xs2 = xs2 + dr2
218      END DO ! j
219      xs1 = 0.0_dp
220      DO i = gbo(1, 1), gbo(2, 1)
221         IF (i > nxlim) CYCLE
222         cos_gx(1:SIZE(Lg), i) = COS(gx(1:SIZE(Lg))*xs1)
223         sin_gx(1:SIZE(Lg), i) = SIN(gx(1:SIZE(Lg))*xs1)
224         xs1 = xs1 + dr1
225      END DO ! i
226
227      !NB use DGEMM to compute sum over kg for each i, j, k
228      ! number of elements per node, round down
229      NLg_loc = SIZE(Lg)/grid%pw_grid%para%group_size
230      ! number of extra elements not yet accounted for
231      n_extra = MOD(SIZE(Lg), grid%pw_grid%para%group_size)
232      ! first n_extra nodes get NLg_loc+1, remaining get NLg_loc
233      IF (grid%pw_grid%para%my_pos < n_extra) THEN
234         Lg_loc_min = (NLg_loc + 1)*grid%pw_grid%para%my_pos + 1
235         Lg_loc_max = Lg_loc_min + (NLg_loc + 1) - 1
236      ELSE
237         Lg_loc_min = (NLg_loc + 1)*n_extra + NLg_loc*(grid%pw_grid%para%my_pos - n_extra) + 1
238         Lg_loc_max = Lg_loc_min + NLg_loc - 1
239      END IF
240      ! shouldn't be necessary
241      Lg_loc_max = MIN(SIZE(Lg), Lg_loc_max)
242      NLg_loc = Lg_loc_max - Lg_loc_min + 1
243
244      IF (NLg_loc > 0) THEN ! some work for this node
245
246         act_nx = MIN(gbo(2, 1), nxlim) - gbo(1, 1) + 1
247         act_ny = MIN(gbo(2, 2), nylim) - gbo(1, 2) + 1
248         !NB temporaries for DGEMM use
249         ALLOCATE (lhs(act_nx, NLg_loc))
250         ALLOCATE (rhs(act_ny, NLg_loc))
251
252         ! do cos(gx) cos(gy+gz) term
253         DO i = gbo(1, 1), gbo(2, 1)
254            IF (i > nxlim) CYCLE
255            lhs(i - gbo(1, 1) + 1, 1:NLg_loc) = lg(Lg_loc_min:Lg_loc_max)*cos_gx(Lg_loc_min:Lg_loc_max, i)
256         END DO
257         DO k = gbo(1, 3), gbo(2, 3)
258            IF (k > nzlim) CYCLE
259            DO j = gbo(1, 2), gbo(2, 2)
260               IF (j > nylim) CYCLE
261               rhs(j - gbo(1, 2) + 1, 1:NLg_loc) = cos_gy(Lg_loc_min:Lg_loc_max, j)*cos_gz(Lg_loc_min:Lg_loc_max, k) - &
262                                                   sin_gy(Lg_loc_min:Lg_loc_max, j)*sin_gz(Lg_loc_min:Lg_loc_max, k)
263            END DO
264            CALL DGEMM('N', 'T', act_nx, act_ny, NLg_loc, 1.0D0, lhs(1, 1), act_nx, rhs(1, 1), act_ny, 0.0D0, &
265                       grid%cr3d(gbo(1, 1), gbo(1, 2), k), SIZE(grid%cr3d, 1))
266         END DO
267
268         ! do sin(gx) sin(gy+gz) term
269         DO i = gbo(1, 1), gbo(2, 1)
270            IF (i > nxlim) CYCLE
271            lhs(i - gbo(1, 1) + 1, 1:NLg_loc) = -lg(Lg_loc_min:Lg_loc_max)*sin_gx(Lg_loc_min:Lg_loc_max, i)
272         END DO
273         DO k = gbo(1, 3), gbo(2, 3)
274            IF (k > nzlim) CYCLE
275            DO j = gbo(1, 2), gbo(2, 2)
276               IF (j > nylim) CYCLE
277               rhs(j - gbo(1, 2) + 1, 1:NLg_loc) = cos_gy(Lg_loc_min:Lg_loc_max, j)*sin_gz(Lg_loc_min:Lg_loc_max, k) + &
278                                                   sin_gy(Lg_loc_min:Lg_loc_max, j)*cos_gz(Lg_loc_min:Lg_loc_max, k)
279            END DO
280            CALL DGEMM('N', 'T', act_nx, act_ny, NLg_loc, 1.0D0, lhs(1, 1), act_nx, rhs(1, 1), act_ny, 1.0D0, &
281                       grid%cr3d(gbo(1, 1), gbo(1, 2), k), SIZE(grid%cr3d, 1))
282         END DO
283
284         !NB deallocate temporaries for DGEMM use
285         DEALLOCATE (lhs)
286         DEALLOCATE (rhs)
287         !NB deallocate temporaries for Cos refactoring
288         DEALLOCATE (cos_gx)
289         DEALLOCATE (sin_gx)
290         DEALLOCATE (cos_gy)
291         DEALLOCATE (sin_gy)
292         DEALLOCATE (cos_gz)
293         DEALLOCATE (sin_gz)
294         !NB parallelization
295      ELSE ! no work for this node, just zero contribution
296         grid%cr3d(gbo(1, 1):nxlim, gbo(1, 2):nylim, gbo(1, 3):nzlim) = 0.0_dp
297      ENDIF ! NLg_loc > 0
298
299      CALL mp_sum(grid%cr3d(gbo(1, 1):nxlim, gbo(1, 2):nylim, gbo(1, 3):nzlim), grid%pw_grid%para%group)
300
301      Fake_LoopOnGrid: DO k = gbo(1, 3), gbo(2, 3)
302         my_k = k
303         IF (k > nzlim) my_k = nzlim - ABS(nzlim - k) + ks
304         DO j = gbo(1, 2), gbo(2, 2)
305            my_j = j
306            IF (j > nylim) my_j = nylim - ABS(nylim - j) + js
307            DO i = gbo(1, 1), gbo(2, 1)
308               my_i = i
309               IF (i > nxlim) my_i = nxlim - ABS(nxlim - i) + is
310               grid%cr3d(i, j, k) = grid%cr3d(my_i, my_j, my_k)
311            END DO
312         END DO
313      END DO Fake_LoopOnGrid
314      !
315      ! Solve for spline coefficients
316      !
317      interp_section => section_vals_get_subs_vals(param_section, "INTERPOLATOR")
318      CALL section_vals_val_get(interp_section, "aint_precond", i_val=aint_precond)
319      CALL section_vals_val_get(interp_section, "precond", i_val=precond_kind)
320      CALL section_vals_val_get(interp_section, "max_iter", i_val=max_iter)
321      CALL section_vals_val_get(interp_section, "eps_r", r_val=eps_r)
322      CALL section_vals_val_get(interp_section, "eps_x", r_val=eps_x)
323      !
324      ! Solve for spline coefficients
325      !
326      CALL pw_spline_precond_create(precond, precond_kind=aint_precond, &
327                                    pool=pw_pool, pbc=.TRUE., transpose=.FALSE.)
328      CALL pw_spline_do_precond(precond, grid, TabLR)
329      CALL pw_spline_precond_set_kind(precond, precond_kind)
330      success = find_coeffs(values=grid, coeffs=TabLR, &
331                            linOp=spl3_pbc, preconditioner=precond, pool=pw_pool, &
332                            eps_r=eps_r, eps_x=eps_x, &
333                            max_iter=max_iter)
334      CPASSERT(success)
335      CALL pw_spline_precond_release(precond)
336      !
337      ! Check for the interpolation Spline
338      !
339      CALL check_spline_interp_TabLR(hmat_mm, Lg, gx, gy, gz, TabLR, param_section, &
340                                     tag)
341      CALL timestop(handle)
342   END SUBROUTINE eval_pw_TabLR
343
344! **************************************************************************************************
345!> \brief Routine to check the accuracy for the Spline Interpolation
346!> \param hmat_mm ...
347!> \param Lg ...
348!> \param gx ...
349!> \param gy ...
350!> \param gz ...
351!> \param TabLR ...
352!> \param param_section ...
353!> \param tag ...
354!> \par History
355!>      12.2005 created [tlaino]
356!> \author Teodoro Laino
357! **************************************************************************************************
358   SUBROUTINE check_spline_interp_TabLR(hmat_mm, Lg, gx, gy, gz, TabLR, &
359                                        param_section, tag)
360      REAL(KIND=dp), DIMENSION(3, 3)                     :: hmat_mm
361      REAL(KIND=dp), DIMENSION(:), POINTER               :: Lg, gx, gy, gz
362      TYPE(pw_type), POINTER                             :: TabLR
363      TYPE(section_vals_type), POINTER                   :: param_section
364      CHARACTER(LEN=*), INTENT(IN)                       :: tag
365
366      CHARACTER(len=*), PARAMETER :: routineN = 'check_spline_interp_TabLR', &
367         routineP = moduleN//':'//routineN
368
369      INTEGER                                            :: handle, i, iw, kg, npoints
370      REAL(KIND=dp)                                      :: dn(3), dr1, dr2, dr3, dxTerm, dyTerm, &
371                                                            dzTerm, errd, errf, Fterm, maxerrord, &
372                                                            maxerrorf, Na, Nn, Term, tmp1, tmp2, &
373                                                            vec(3), xs1, xs2, xs3
374      TYPE(cp_logger_type), POINTER                      :: logger
375
376      NULLIFY (logger)
377      logger => cp_get_default_logger()
378      iw = cp_print_key_unit_nr(logger, param_section, "check_spline", &
379                                extension="."//TRIM(tag)//"Log")
380      CALL timeset(routineN, handle)
381      IF (iw > 0) THEN
382         npoints = 100
383         errf = 0.0_dp
384         maxerrorf = 0.0_dp
385         errd = 0.0_dp
386         maxerrord = 0.0_dp
387         dr1 = hmat_mm(1, 1)/REAL(npoints, KIND=dp)
388         dr2 = hmat_mm(2, 2)/REAL(npoints, KIND=dp)
389         dr3 = hmat_mm(3, 3)/REAL(npoints, KIND=dp)
390         xs1 = 0.0_dp
391         xs2 = 0.0_dp
392         xs3 = 0.0_dp
393         WRITE (iw, '(A,T5,A15,4X,A17,T50,4X,A,5X,A,T80,A,T85,A15,4X,A17,T130,4X,A,5X,A)') &
394            "#", "Analytical Term", "Interpolated Term", "Error", "MaxError", &
395            "*", " Analyt Deriv  ", "Interp Deriv Mod ", "Error", "MaxError"
396         DO i = 1, npoints + 1
397            Term = 0.0_dp
398            dxTerm = 0.0_dp
399            dyTerm = 0.0_dp
400            dzTerm = 0.0_dp
401            ! Sum over k vectors
402            DO kg = 1, SIZE(Lg)
403               vec = (/REAL(gx(kg), KIND=dp), REAL(gy(kg), KIND=dp), REAL(gz(kg), KIND=dp)/)
404               Term = Term + lg(kg)*COS(vec(1)*xs1 + vec(2)*xs2 + vec(3)*xs3)
405               dxTerm = dxTerm - lg(kg)*SIN(vec(1)*xs1 + vec(2)*xs2 + vec(3)*xs3)*vec(1)
406               dyTerm = dyTerm - lg(kg)*SIN(vec(1)*xs1 + vec(2)*xs2 + vec(3)*xs3)*vec(2)
407               dzTerm = dzTerm - lg(kg)*SIN(vec(1)*xs1 + vec(2)*xs2 + vec(3)*xs3)*vec(3)
408            END DO
409            Na = SQRT(dxTerm*dxTerm + dyTerm*dyTerm + dzTerm*dzTerm)
410            dn = Eval_d_Interp_Spl3_pbc((/xs1, xs2, xs3/), TabLR)
411            Nn = SQRT(DOT_PRODUCT(dn, dn))
412            Fterm = Eval_Interp_Spl3_pbc((/xs1, xs2, xs3/), TabLR)
413            tmp1 = ABS(Term - Fterm)
414            tmp2 = SQRT(DOT_PRODUCT(dn - (/dxTerm, dyTerm, dzTerm/), dn - (/dxTerm, dyTerm, dzTerm/)))
415            errf = errf + tmp1
416            maxerrorf = MAX(maxerrorf, tmp1)
417            errd = errd + tmp2
418            maxerrord = MAX(maxerrord, tmp2)
419            WRITE (iw, '(T5,F15.10,5X,F15.10,T50,2F12.9,T80,A,T85,F15.10,5X,F15.10,T130,2F12.9)') &
420               Term, Fterm, tmp1, maxerrorf, "*", Na, Nn, tmp2, maxerrord
421            xs1 = xs1 + dr1
422            xs2 = xs2 + dr2
423            xs3 = xs3 + dr3
424         END DO
425         WRITE (iw, '(A,T5,A,T50,F12.9,T130,F12.9)') "#", "Averages", errf/REAL(npoints, kind=dp), &
426            errd/REAL(npoints, kind=dp)
427      END IF
428      CALL timestop(handle)
429      CALL cp_print_key_finished_output(iw, logger, param_section, "check_spline")
430
431   END SUBROUTINE check_spline_interp_TabLR
432
433END MODULE ewald_spline_util
434
435