1!! Copyright (C) 2013 Umberto De Giovannini
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17!!
18
19#include "global.h"
20
21
22!> The includes for the PNFFT
23module pnfft_params_oct_m
24  use,intrinsic :: iso_c_binding
25  use pfft_params_oct_m
26  implicit none
27#ifdef HAVE_PNFFT
28  include "pnfft.f03"
29#endif
30end module pnfft_params_oct_m
31
32
33!> The low level module to work with the PNFFT library.
34!! http://www-user.tu-chemnitz.de/~mpip/software.php?lang=en
35module pnfft_oct_m
36  use global_oct_m
37  use io_oct_m
38  use loct_math_oct_m
39  use math_oct_m
40  use messages_oct_m
41  use mpi_oct_m
42  use namespace_oct_m
43  use parser_oct_m
44  use pfft_oct_m
45  use pnfft_params_oct_m
46  use profiling_oct_m
47
48  implicit none
49
50  private
51
52  public ::               &
53    pnfft_t,              &
54    pnfft_write_info,     &
55    pnfft_init_plan,      &
56    pnfft_copy_params,    &
57    pnfft_init_procmesh,  &
58    pnfft_end,            &
59    pnfft_set_sp_nodes,   &
60    pnfft_guru_options,   &
61    zpnfft_forward,       &
62    zpnfft_backward,      &
63    dpnfft_forward,       &
64    dpnfft_backward
65
66
67  type pnfft_t
68    private
69
70! Parameters
71    integer                       :: np(3)           !> Processes
72    integer(C_INTPTR_T)           :: N_local(3)     !> Number of Fourier coefficients
73    integer(C_INTPTR_T)           :: N(3)            !> Number of Fourier coefficients local
74    integer(C_INTPTR_T)           :: Nos(3)          !> FFT grid size
75    integer(C_INTPTR_T), public   :: M(3)
76    integer                       :: M_istart(3)
77    integer(C_INTPTR_T)           :: local_M         !> Local number of nodes per process
78    integer                       :: mm              !> Real space cut-off
79    FLOAT,               public   :: sigma           !> oversampling factor
80    integer                       :: flags           !> PNFFT initialization options
81    logical,             public   :: set_defaults = .false. !> set default values from the code
82
83    FLOAT,               public   :: norm       !> Normalization
84
85
86    integer               :: comm
87! Data
88    type(C_PTR)           :: plan            !> pnfft plan
89
90    complex(C_DOUBLE_COMPLEX), pointer :: f_lin(:) => NULL()
91    complex(C_DOUBLE_COMPLEX), pointer :: f(:,:,:) => NULL()
92    complex(C_DOUBLE_COMPLEX), pointer :: f_hat(:,:,:) => NULL()
93    real(C_DOUBLE),            pointer :: x_lin(:,:) => NULL()
94    real(C_DOUBLE),            pointer :: x(:,:,:,:) => NULL()
95
96    real(C_DOUBLE)        :: lower_border(3) !> contains the real-space nodes local borders
97    real(C_DOUBLE)        :: upper_border(3) !> parallelization
98    FLOAT                 :: lo_global(3)
99    FLOAT                 :: up_global(3)
100
101  end type pnfft_t
102
103contains
104
105  ! ---------------------------------------------------------
106  subroutine pnfft_guru_options(pnfft, namespace)
107    type(pnfft_t),     intent(inout) :: pnfft
108    type(namespace_t), intent(in)    :: namespace
109
110    PUSH_SUB(pnfft_guru_options)
111
112
113    !%Variable PNFFTCutoff
114    !%Type integer
115    !%Default 6
116    !%Section Mesh::FFTs
117    !%Description
118    !% Cut-off parameter of the window function.
119    !%End
120    call parse_variable(namespace, 'PNFFTCutoff', 6, pnfft%mm)
121
122    !%Variable PNFFTOversampling
123    !%Type float
124    !%Default 2.0
125    !%Section Mesh::FFTs
126    !%Description
127    !% PNFFT oversampling factor (sigma). This will rule the size of the FFT under the hood.
128    !%End
129    call parse_variable(namespace, 'PNFFTOversampling', M_TWO, pnfft%sigma)
130
131    POP_SUB(pnfft_guru_options)
132  end subroutine pnfft_guru_options
133
134  ! ---------------------------------------------------------
135  subroutine pnfft_init_params(pnfft, pnfft_options, nn, optimize)
136    type(pnfft_t),     intent(inout) :: pnfft
137    type(pnfft_t),     intent(in)    :: pnfft_options
138    integer,           intent(in)    :: nn(3) !> pnfft bandwidths
139    logical, optional, intent(in)    :: optimize
140
141    integer :: ii, my_nn(3)
142    logical :: optimize_
143
144    PUSH_SUB(pnfft_init_params)
145
146    optimize_ = optional_default(optimize, .true.)
147
148    if(.not. pnfft%set_defaults) then
149      !Set defaults
150      pnfft%mm = pnfft_options%mm
151      pnfft%sigma = pnfft_options%sigma
152    end if
153
154    my_nn = 0
155    do ii = 1, 3
156      my_nn(ii) = nn(ii)*pnfft%sigma
157      if(optimize_) call loct_fft_optimize(my_nn(ii), 1) ! ask for an odd number
158    end do
159
160    pnfft%Nos(1:3) = my_nn(1:3)
161
162#ifdef HAVE_PNFFT
163    pnfft%flags = PNFFT_MALLOC_X + PNFFT_MALLOC_F_HAT + PNFFT_MALLOC_F + &
164                  PNFFT_WINDOW_KAISER_BESSEL
165#endif
166
167    POP_SUB(pnfft_init_params)
168  end subroutine pnfft_init_params
169
170
171  ! ---------------------------------------------------------
172  subroutine pnfft_init_procmesh(pnfft, mpi_grp, comm)
173    type(pnfft_t), intent(inout)  :: pnfft
174    type(mpi_grp_t),   intent(in) :: mpi_grp
175    integer,           intent(out):: comm
176
177    integer :: ierror
178
179    PUSH_SUB(pnfft_init_procmesh)
180
181#ifdef HAVE_PNFFT
182    call pnfft_init()
183#endif
184
185    pnfft%np(1:3) = 1
186
187    call pfft_decompose(mpi_grp%size, pnfft%np(1), pnfft%np(2))
188
189#ifdef HAVE_PNFFT
190    ierror = pnfft_create_procmesh(2, mpi_grp%comm,  pnfft%np, comm)
191#else
192    ierror = 0
193#endif
194
195    if (ierror /= 0) then
196      message(1) = "The number of rows and columns in PNFFT processor grid is not equal to "
197      message(2) = "the number of processor in the MPI communicator."
198      message(3) = "Please check it."
199      call messages_fatal(3)
200    end if
201
202    POP_SUB(pnfft_init_procmesh)
203  end subroutine pnfft_init_procmesh
204
205
206  ! ---------------------------------------------------------
207  subroutine pnfft_copy_params(in, out)
208    type(pnfft_t), intent(in)  :: in
209    type(pnfft_t), intent(out) :: out
210
211
212    PUSH_SUB(pnfft_copy_params)
213
214    out%mm = in%mm
215    out%sigma = in%sigma
216    out%set_defaults = in%set_defaults
217
218    POP_SUB(pnfft_copy_params)
219  end subroutine pnfft_copy_params
220
221  ! ---------------------------------------------------------
222  subroutine pnfft_write_info(pnfft)
223    type(pnfft_t), intent(in) :: pnfft
224
225    integer :: idir
226
227    PUSH_SUB(pnfft_write_info)
228
229
230
231    call messages_write("Info: PNFFT parameters")
232    call messages_new_line()
233    call messages_write("      Fourier coefficients      N = ")
234    do idir = 1, 3
235      call messages_write(pnfft%N(idir))
236      if(idir < 3) call messages_write(" x ")
237    end do
238    call messages_new_line()
239    call messages_write("      Spatial nodes per process   = ")
240    call messages_write(pnfft%local_M)
241    call messages_new_line()
242    call messages_write("      Oversampling factor   sigma = ")
243    call messages_write(pnfft%sigma)
244    call messages_new_line()
245    call messages_write("      FFT grid size             n = ")
246    do idir = 1, 3
247      call messages_write(pnfft%Nos(idir))
248      if(idir < 3) call messages_write(" x ")
249    end do
250    call messages_new_line()
251    call messages_write("      Real Space cutoff           = ")
252    call messages_write(pnfft%mm)
253    call messages_new_line()
254    call messages_write("      Process mesh             np = ")
255    do idir = 1, 3
256      call messages_write(pnfft%np(idir))
257      if(idir < 3) call messages_write(" x ")
258    end do
259    call messages_info()
260
261    POP_SUB(pnfft_write_info)
262  end subroutine pnfft_write_info
263
264  ! ---------------------------------------------------------
265  subroutine pnfft_init_plan(pnfft, pnfft_options, mpi_comm, fs_n_global, fs_n, fs_istart, rs_n, rs_istart)
266    type(pnfft_t),   intent(inout) :: pnfft
267    type(pnfft_t),   intent(inout) :: pnfft_options
268    integer,         intent(in)    :: mpi_comm         !< MPI comunicator
269    integer,         intent(in)    :: fs_n_global(1:3) !< The general number of elements in each dimension in Fourier space
270    integer,         intent(out)   :: fs_n(1:3)        !< Local number of elements in each direction in Fourier space
271    integer,         intent(out)   :: fs_istart(1:3)   !< Where does the local portion of the function start in Fourier space
272    integer,         intent(out)   :: rs_n(1:3)        !< Local number of elements in each direction in real space
273    integer,         intent(out)   :: rs_istart(1:3)   !< Where does the local portion of the function start in real space
274
275    real(C_DOUBLE) :: lower_border(3), upper_border(3)
276    real(C_DOUBLE) :: x_max(3)
277    integer(C_INTPTR_T) :: local_N(3), local_N_start(3), d=3, local_M
278    type(C_PTR) :: cf_hat, cf, cx
279
280
281    PUSH_SUB(pnfft_init_plan)
282
283    pnfft%N(1:3) = fs_n_global(1:3)
284
285    call pnfft_init_params(pnfft, pnfft_options, fs_n_global(1:3), optimize = .true.)
286
287    x_max(:) = CNST(0.4)
288
289#ifdef HAVE_PNFFT
290    call pnfft_local_size_guru(3, pnfft%N, pnfft%Nos, x_max, pnfft%mm, mpi_comm, &
291         PNFFT_TRANSPOSED_F_HAT, local_N, local_N_start, lower_border, upper_border)
292#endif
293
294    pnfft%comm = mpi_comm
295
296    pnfft%lower_border = lower_border
297    pnfft%upper_border = upper_border
298    pnfft%N_local(1:3)   = local_N(1:3)
299
300    pnfft%M(1)   = pnfft%N_local(2)
301    pnfft%M(2)   = pnfft%N_local(3)
302    pnfft%M(3)   = pnfft%N_local(1)
303
304    local_M = pnfft%M(1) * pnfft%M(2) * pnfft%M(3)
305
306    fs_n(1)      = local_N(1)
307    fs_n(2)      = local_N(3)
308    fs_n(3)      = local_N(2)
309    fs_istart(1) = pnfft%N(1)/2 + local_N_start(1) + 1
310    fs_istart(2) = pnfft%N(3)/2 + local_N_start(3) + 1
311    fs_istart(3) = pnfft%N(2)/2 + local_N_start(2) + 1
312
313
314    rs_n(1:3) = pnfft%M(1:3)
315
316    rs_istart(1) = fs_istart(3)
317    rs_istart(2) = fs_istart(2)
318    rs_istart(3) = fs_istart(1)
319
320    pnfft%M_istart(:) = rs_istart(:)
321
322#ifdef HAVE_PNFFT
323    pnfft%plan = pnfft_init_guru(3, pnfft%N, pnfft%Nos, x_max, local_M, pnfft%mm, &
324                 pnfft%flags, PFFT_ESTIMATE, mpi_comm)
325#endif
326
327    pnfft%local_M=local_M
328
329#ifdef HAVE_PNFFT
330    ! Get data pointers in C format
331    cf_hat = pnfft_get_f_hat(pnfft%plan)
332    cf     = pnfft_get_f(pnfft%plan)
333    cx     = pnfft_get_x(pnfft%plan)
334#else
335    cf_hat = C_NULL_PTR
336    cf     = C_NULL_PTR
337    cx     = C_NULL_PTR
338#endif
339
340    ! Convert data pointers to Fortran format
341    call c_f_pointer(cf_hat, pnfft%f_hat, [local_N(1),local_N(3),local_N(2)])
342    call c_f_pointer(cf,     pnfft%f_lin, [pnfft%local_M])
343    call c_f_pointer(cf,     pnfft%f,     [pnfft%M(1),pnfft%M(2),pnfft%M(3)])
344    call c_f_pointer(cx,     pnfft%x_lin, [d, pnfft%local_M])
345    call c_f_pointer(cx,     pnfft%x,     [d, pnfft%M(1),pnfft%M(2),pnfft%M(3)])
346
347
348
349    write(6,*) mpi_world%rank, "local_N(3)       ", local_N
350    write(6,*) mpi_world%rank, "local_N_start(3) ", local_N_start
351    write(6,*) mpi_world%rank, "fs_istart(1:3)   ", fs_istart
352    write(6,*) mpi_world%rank, "fs_n(1:3)        ", fs_n
353    write(6,*) mpi_world%rank, "rs_istart(1:3)   ", rs_istart
354    write(6,*) mpi_world%rank, "rs_n(1:3)        ", rs_n
355    write(6,*) mpi_world%rank, "lower_border     ", lower_border
356    write(6,*) mpi_world%rank, "upper_border     ", upper_border
357    write(6,*) mpi_world%rank, "rs_range         ", upper_border(:) - lower_border(:)
358    write(6,*) mpi_world%rank, "local_M          ", local_M
359    write(6,*) mpi_world%rank, "pnfft%N_local    ", pnfft%N_local
360    write(6,*) mpi_world%rank, "pnfft%M          ", pnfft%M
361    write(6,*) mpi_world%rank, "pnfft%M_istart   ", pnfft%M_istart
362    write(6,*) mpi_world%rank, "size(pnfft%f_hat)", size(pnfft%f_hat,1), size(pnfft%f_hat,2), size(pnfft%f_hat, 3)
363    write(6,*) mpi_world%rank, "size(pnfft%f)    ", size(pnfft%f,1), size(pnfft%f,2), size(pnfft%f,3)
364
365    POP_SUB(pnfft_init_plan)
366  end subroutine pnfft_init_plan
367
368  ! ---------------------------------------------------------
369  subroutine pnfft_end(pnfft)
370    type(pnfft_t), intent(inout) :: pnfft
371
372    PUSH_SUB(pnfft_end)
373
374#ifdef HAVE_PNFFT
375    call pnfft_finalize(pnfft%plan, PNFFT_FREE_X + PNFFT_FREE_F_HAT + PNFFT_FREE_F)
376    call pnfft_cleanup()
377#endif
378
379    nullify(pnfft%f_lin)
380    nullify(pnfft%f)
381    nullify(pnfft%f_hat)
382    nullify(pnfft%x)
383    nullify(pnfft%x_lin)
384
385    POP_SUB(pnfft_end)
386  end subroutine pnfft_end
387
388
389  ! ---------------------------------------------------------
390  ! Set the coordinates for the spatial nodes rescaled to
391  ! the 3D torus [-0.5,0.5)
392  ! ---------------------------------------------------------
393  subroutine pnfft_set_sp_nodes(pnfft, namespace, X)
394    type(pnfft_t),    intent(inout) :: pnfft
395    type(namespace_t),intent(in)    :: namespace
396    FLOAT,            intent(in)    :: X(:,:) !X(i, dim)
397
398    FLOAT   :: len(3), cc(3), eps,temp, lo(3), up(3)
399    integer :: ii, idir, i1, i2, i3
400    FLOAT, allocatable ::  dX(:,:)
401!    integer :: j,t
402
403    PUSH_SUB(pnfft_set_sp_nodes)
404
405    eps = CNST(1.25) ! the sample nodes must be in [0.5,0.5)
406
407    lo = pnfft%lower_border
408    up = pnfft%upper_border
409
410
411
412    do idir = 1,3
413      len(:) = (maxval(X(:,idir))-minval(X(:,idir)))*eps
414      cc(:) = (minval(X(:,idir))+maxval(X(:,idir)))/M_TWO
415    end do
416
417
418
419
420    do i1 = 1, pnfft%M(1)
421      do i2 = 1, pnfft%M(2)
422        do i3 = 1, pnfft%M(3)
423          pnfft%x(1, i1,i2,i3) = (X(pnfft%M_istart(1)+i1-1, 1)  - cc(1))/len(1)
424          pnfft%x(2, i1,i2,i3) = (X(pnfft%M_istart(2)+i2-1, 2)  - cc(2))/len(2)
425          pnfft%x(3, i1,i2,i3) = (X(pnfft%M_istart(3)+i3-1, 3)  - cc(3))/len(3)
426
427!           pnfft%x_lin(1, pnfft_idx_3to1(pnfft,i1,i2,i3)) = real((X(rs_istart(1)+i1-1, 1)  - cc(1))/len(1), C_DOUBLE)
428!           pnfft%x_lin(2, pnfft_idx_3to1(pnfft,i1,i2,i3)) = real((X(rs_istart(2)+i2-1, 2)  - cc(2))/len(2), C_DOUBLE)
429!           pnfft%x_lin(3, pnfft_idx_3to1(pnfft,i1,i2,i3)) = real((X(rs_istart(3)+i3-1, 3)  - cc(3))/len(3), C_DOUBLE)
430
431!           pnfft%x_lin(1, pnfft_idx_3to1(pnfft,i1,i2,i3)) = &
432!               (pnfft%upper_border(1) - pnfft%lower_border(1)) * rand(0) + pnfft%lower_border(1)
433!           pnfft%x_lin(2, pnfft_idx_3to1(pnfft,i1,i2,i3)) = &
434!               (pnfft%upper_border(2) - pnfft%lower_border(2)) * rand(0) + pnfft%lower_border(2)
435!           pnfft%x_lin(3, pnfft_idx_3to1(pnfft,i1,i2,i3)) = &
436!               (pnfft%upper_border(3) - pnfft%lower_border(3)) * rand(0) + pnfft%lower_border(3)
437
438!           pnfft%x(1, i1,i2,i3) = (pnfft%upper_border(1) - pnfft%lower_border(1)) * rand(0) + pnfft%lower_border(1)
439!           pnfft%x(2, i1,i2,i3) = (pnfft%upper_border(2) - pnfft%lower_border(2)) * rand(0) + pnfft%lower_border(2)
440!           pnfft%x(3, i1,i2,i3) = (pnfft%upper_border(3) - pnfft%lower_border(3)) * rand(0) + pnfft%lower_border(3)
441
442          temp = (X(pnfft%M_istart(3)+i3-1, 3)  - cc(3))/len(3)
443          if(temp > pnfft%upper_border(3) .or. temp < pnfft%lower_border(3) ) then
444            write(6,*) mpi_world%rank, "out of bounds x3 = ", temp,"-- ", pnfft%lower_border(3), pnfft%upper_border(3)
445          end if
446
447
448        end do
449
450        temp = (X(pnfft%M_istart(2)+i2-1, 2)  - cc(2))/len(2)
451        if(temp > pnfft%upper_border(2) .or. temp < pnfft%lower_border(2) ) then
452          write(6,*) mpi_world%rank, "out of bounds x2 = ", temp,"-- ", pnfft%lower_border(2), pnfft%upper_border(2)
453        end if
454
455      end do
456
457      temp = (X(pnfft%M_istart(1)+i1-1, 1)  - cc(1))/len(1)
458      if(temp > pnfft%upper_border(1) .or. temp < pnfft%lower_border(1) ) then
459        write(6,*) mpi_world%rank, "out of bounds x1 = ", temp,"-- ", pnfft%lower_border(1), pnfft%upper_border(1)
460      end if
461
462    end do
463
464
465!     do j = 1,pnfft%local_M
466!       do t=1,3
467!         pnfft%x_lin(t,j) = (pnfft%upper_border(t) - pnfft%lower_border(t)) * rand(0) + pnfft%lower_border(t)
468!       end do
469!     end do
470
471
472    call pnfft_messages_debug(pnfft, namespace)
473
474
475    SAFE_ALLOCATE( dX(1:maxval(pnfft%M(:))-1, 1:3))
476
477
478    ! Set the normalization factor
479    do idir = 1,3
480      do ii = 1, size(X(:,idir))-1
481        dX(ii,idir)= abs(X(ii+1, idir)-X(ii, idir))
482!         dX(ii,2)= abs(x2_(ii+1)-x2_(ii))
483!         dX(ii,3)= abs(x3_(ii+1)-x3_(ii))
484      end do
485    end do
486
487!     pnfft%norm = M_ONE/(minval(dX(:,1)) * minval(dX(:,2)) * minval(dX(:,3)))
488    pnfft%norm = M_ONE/(pnfft%N(1)*pnfft%N(2)*pnfft%N(3))
489
490    write(6,*) mpi_world%rank, "pnfft%norm", pnfft%norm
491
492    POP_SUB(pnfft_set_sp_nodes)
493  end subroutine pnfft_set_sp_nodes
494
495  ! ---------------------------------------------------------
496  subroutine pnfft_messages_debug(pnfft, namespace)
497    type(pnfft_t),     intent(in) :: pnfft
498    type(namespace_t), intent(in) :: namespace
499
500    integer          :: nn, i1, i2, i3
501    integer          :: iunit          !< For debug output to files.
502    character(len=3) :: filenum
503#ifdef HAVE_MPI
504    integer          :: ierr
505#endif
506
507    PUSH_SUB(pnfft_messages_debug)
508
509    if(debug%info) then
510
511      if(mpi_grp_is_root(mpi_world)) then
512        call io_mkdir('debug/PNFFT', namespace)
513      end if
514#ifdef HAVE_MPI
515      call MPI_Barrier(pnfft%comm, ierr)
516#endif
517
518      nn = mpi_world%rank
519      write(filenum, '(i3.3)') nn
520
521      iunit = io_open('debug/PNFFT/rs_partition.'//filenum, &
522           namespace, action='write')
523
524      do i1 = 1, pnfft%M(1)
525       do i2 = 1, pnfft%M(2)
526         do i3 = 1, pnfft%M(3)
527           write(iunit, '(3f18.8)') pnfft%x(1, i1,i2,i3), pnfft%x(2, i1,i2,i3), pnfft%x(3, i1,i2,i3)
528         end do
529       end do
530      end do
531      call io_close(iunit)
532
533    end if
534
535    POP_SUB(pnfft_messages_debug)
536
537  end subroutine pnfft_messages_debug
538
539  !---------------------------------------------------------------------------------
540!  integer function pnfft_idx_3to1(pnfft, ix , iy, iz) result(idx)
541!    type(pnfft_t),  intent(in) :: pnfft
542!    integer,        intent(in) :: ix
543!    integer,        intent(in) :: iy
544!    integer,        intent(in) :: iz
545!
546!    idx =  (ix-1)*pnfft%M(2)*pnfft%M(3) + (iy-1)*pnfft%M(3) + (iz-1) + 1
547!
548!  end function pnfft_idx_3to1
549
550  #include "undef.F90"
551  #include "real.F90"
552  #include "pnfft_inc.F90"
553
554  #include "undef.F90"
555  #include "complex.F90"
556  #include "pnfft_inc.F90"
557
558end module pnfft_oct_m
559
560!! Local Variables:
561!! mode: f90
562!! coding: utf-8
563!! End:
564