1!! Copyright (C) 2013 Umberto De Giovannini 2!! 3!! This program is free software; you can redistribute it and/or modify 4!! it under the terms of the GNU General Public License as published by 5!! the Free Software Foundation; either version 2, or (at your option) 6!! any later version. 7!! 8!! This program is distributed in the hope that it will be useful, 9!! but WITHOUT ANY WARRANTY; without even the implied warranty of 10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11!! GNU General Public License for more details. 12!! 13!! You should have received a copy of the GNU General Public License 14!! along with this program; if not, write to the Free Software 15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16!! 02110-1301, USA. 17!! 18 19#include "global.h" 20 21 22!> The includes for the PNFFT 23module pnfft_params_oct_m 24 use,intrinsic :: iso_c_binding 25 use pfft_params_oct_m 26 implicit none 27#ifdef HAVE_PNFFT 28 include "pnfft.f03" 29#endif 30end module pnfft_params_oct_m 31 32 33!> The low level module to work with the PNFFT library. 34!! http://www-user.tu-chemnitz.de/~mpip/software.php?lang=en 35module pnfft_oct_m 36 use global_oct_m 37 use io_oct_m 38 use loct_math_oct_m 39 use math_oct_m 40 use messages_oct_m 41 use mpi_oct_m 42 use namespace_oct_m 43 use parser_oct_m 44 use pfft_oct_m 45 use pnfft_params_oct_m 46 use profiling_oct_m 47 48 implicit none 49 50 private 51 52 public :: & 53 pnfft_t, & 54 pnfft_write_info, & 55 pnfft_init_plan, & 56 pnfft_copy_params, & 57 pnfft_init_procmesh, & 58 pnfft_end, & 59 pnfft_set_sp_nodes, & 60 pnfft_guru_options, & 61 zpnfft_forward, & 62 zpnfft_backward, & 63 dpnfft_forward, & 64 dpnfft_backward 65 66 67 type pnfft_t 68 private 69 70! Parameters 71 integer :: np(3) !> Processes 72 integer(C_INTPTR_T) :: N_local(3) !> Number of Fourier coefficients 73 integer(C_INTPTR_T) :: N(3) !> Number of Fourier coefficients local 74 integer(C_INTPTR_T) :: Nos(3) !> FFT grid size 75 integer(C_INTPTR_T), public :: M(3) 76 integer :: M_istart(3) 77 integer(C_INTPTR_T) :: local_M !> Local number of nodes per process 78 integer :: mm !> Real space cut-off 79 FLOAT, public :: sigma !> oversampling factor 80 integer :: flags !> PNFFT initialization options 81 logical, public :: set_defaults = .false. !> set default values from the code 82 83 FLOAT, public :: norm !> Normalization 84 85 86 integer :: comm 87! Data 88 type(C_PTR) :: plan !> pnfft plan 89 90 complex(C_DOUBLE_COMPLEX), pointer :: f_lin(:) => NULL() 91 complex(C_DOUBLE_COMPLEX), pointer :: f(:,:,:) => NULL() 92 complex(C_DOUBLE_COMPLEX), pointer :: f_hat(:,:,:) => NULL() 93 real(C_DOUBLE), pointer :: x_lin(:,:) => NULL() 94 real(C_DOUBLE), pointer :: x(:,:,:,:) => NULL() 95 96 real(C_DOUBLE) :: lower_border(3) !> contains the real-space nodes local borders 97 real(C_DOUBLE) :: upper_border(3) !> parallelization 98 FLOAT :: lo_global(3) 99 FLOAT :: up_global(3) 100 101 end type pnfft_t 102 103contains 104 105 ! --------------------------------------------------------- 106 subroutine pnfft_guru_options(pnfft, namespace) 107 type(pnfft_t), intent(inout) :: pnfft 108 type(namespace_t), intent(in) :: namespace 109 110 PUSH_SUB(pnfft_guru_options) 111 112 113 !%Variable PNFFTCutoff 114 !%Type integer 115 !%Default 6 116 !%Section Mesh::FFTs 117 !%Description 118 !% Cut-off parameter of the window function. 119 !%End 120 call parse_variable(namespace, 'PNFFTCutoff', 6, pnfft%mm) 121 122 !%Variable PNFFTOversampling 123 !%Type float 124 !%Default 2.0 125 !%Section Mesh::FFTs 126 !%Description 127 !% PNFFT oversampling factor (sigma). This will rule the size of the FFT under the hood. 128 !%End 129 call parse_variable(namespace, 'PNFFTOversampling', M_TWO, pnfft%sigma) 130 131 POP_SUB(pnfft_guru_options) 132 end subroutine pnfft_guru_options 133 134 ! --------------------------------------------------------- 135 subroutine pnfft_init_params(pnfft, pnfft_options, nn, optimize) 136 type(pnfft_t), intent(inout) :: pnfft 137 type(pnfft_t), intent(in) :: pnfft_options 138 integer, intent(in) :: nn(3) !> pnfft bandwidths 139 logical, optional, intent(in) :: optimize 140 141 integer :: ii, my_nn(3) 142 logical :: optimize_ 143 144 PUSH_SUB(pnfft_init_params) 145 146 optimize_ = optional_default(optimize, .true.) 147 148 if(.not. pnfft%set_defaults) then 149 !Set defaults 150 pnfft%mm = pnfft_options%mm 151 pnfft%sigma = pnfft_options%sigma 152 end if 153 154 my_nn = 0 155 do ii = 1, 3 156 my_nn(ii) = nn(ii)*pnfft%sigma 157 if(optimize_) call loct_fft_optimize(my_nn(ii), 1) ! ask for an odd number 158 end do 159 160 pnfft%Nos(1:3) = my_nn(1:3) 161 162#ifdef HAVE_PNFFT 163 pnfft%flags = PNFFT_MALLOC_X + PNFFT_MALLOC_F_HAT + PNFFT_MALLOC_F + & 164 PNFFT_WINDOW_KAISER_BESSEL 165#endif 166 167 POP_SUB(pnfft_init_params) 168 end subroutine pnfft_init_params 169 170 171 ! --------------------------------------------------------- 172 subroutine pnfft_init_procmesh(pnfft, mpi_grp, comm) 173 type(pnfft_t), intent(inout) :: pnfft 174 type(mpi_grp_t), intent(in) :: mpi_grp 175 integer, intent(out):: comm 176 177 integer :: ierror 178 179 PUSH_SUB(pnfft_init_procmesh) 180 181#ifdef HAVE_PNFFT 182 call pnfft_init() 183#endif 184 185 pnfft%np(1:3) = 1 186 187 call pfft_decompose(mpi_grp%size, pnfft%np(1), pnfft%np(2)) 188 189#ifdef HAVE_PNFFT 190 ierror = pnfft_create_procmesh(2, mpi_grp%comm, pnfft%np, comm) 191#else 192 ierror = 0 193#endif 194 195 if (ierror /= 0) then 196 message(1) = "The number of rows and columns in PNFFT processor grid is not equal to " 197 message(2) = "the number of processor in the MPI communicator." 198 message(3) = "Please check it." 199 call messages_fatal(3) 200 end if 201 202 POP_SUB(pnfft_init_procmesh) 203 end subroutine pnfft_init_procmesh 204 205 206 ! --------------------------------------------------------- 207 subroutine pnfft_copy_params(in, out) 208 type(pnfft_t), intent(in) :: in 209 type(pnfft_t), intent(out) :: out 210 211 212 PUSH_SUB(pnfft_copy_params) 213 214 out%mm = in%mm 215 out%sigma = in%sigma 216 out%set_defaults = in%set_defaults 217 218 POP_SUB(pnfft_copy_params) 219 end subroutine pnfft_copy_params 220 221 ! --------------------------------------------------------- 222 subroutine pnfft_write_info(pnfft) 223 type(pnfft_t), intent(in) :: pnfft 224 225 integer :: idir 226 227 PUSH_SUB(pnfft_write_info) 228 229 230 231 call messages_write("Info: PNFFT parameters") 232 call messages_new_line() 233 call messages_write(" Fourier coefficients N = ") 234 do idir = 1, 3 235 call messages_write(pnfft%N(idir)) 236 if(idir < 3) call messages_write(" x ") 237 end do 238 call messages_new_line() 239 call messages_write(" Spatial nodes per process = ") 240 call messages_write(pnfft%local_M) 241 call messages_new_line() 242 call messages_write(" Oversampling factor sigma = ") 243 call messages_write(pnfft%sigma) 244 call messages_new_line() 245 call messages_write(" FFT grid size n = ") 246 do idir = 1, 3 247 call messages_write(pnfft%Nos(idir)) 248 if(idir < 3) call messages_write(" x ") 249 end do 250 call messages_new_line() 251 call messages_write(" Real Space cutoff = ") 252 call messages_write(pnfft%mm) 253 call messages_new_line() 254 call messages_write(" Process mesh np = ") 255 do idir = 1, 3 256 call messages_write(pnfft%np(idir)) 257 if(idir < 3) call messages_write(" x ") 258 end do 259 call messages_info() 260 261 POP_SUB(pnfft_write_info) 262 end subroutine pnfft_write_info 263 264 ! --------------------------------------------------------- 265 subroutine pnfft_init_plan(pnfft, pnfft_options, mpi_comm, fs_n_global, fs_n, fs_istart, rs_n, rs_istart) 266 type(pnfft_t), intent(inout) :: pnfft 267 type(pnfft_t), intent(inout) :: pnfft_options 268 integer, intent(in) :: mpi_comm !< MPI comunicator 269 integer, intent(in) :: fs_n_global(1:3) !< The general number of elements in each dimension in Fourier space 270 integer, intent(out) :: fs_n(1:3) !< Local number of elements in each direction in Fourier space 271 integer, intent(out) :: fs_istart(1:3) !< Where does the local portion of the function start in Fourier space 272 integer, intent(out) :: rs_n(1:3) !< Local number of elements in each direction in real space 273 integer, intent(out) :: rs_istart(1:3) !< Where does the local portion of the function start in real space 274 275 real(C_DOUBLE) :: lower_border(3), upper_border(3) 276 real(C_DOUBLE) :: x_max(3) 277 integer(C_INTPTR_T) :: local_N(3), local_N_start(3), d=3, local_M 278 type(C_PTR) :: cf_hat, cf, cx 279 280 281 PUSH_SUB(pnfft_init_plan) 282 283 pnfft%N(1:3) = fs_n_global(1:3) 284 285 call pnfft_init_params(pnfft, pnfft_options, fs_n_global(1:3), optimize = .true.) 286 287 x_max(:) = CNST(0.4) 288 289#ifdef HAVE_PNFFT 290 call pnfft_local_size_guru(3, pnfft%N, pnfft%Nos, x_max, pnfft%mm, mpi_comm, & 291 PNFFT_TRANSPOSED_F_HAT, local_N, local_N_start, lower_border, upper_border) 292#endif 293 294 pnfft%comm = mpi_comm 295 296 pnfft%lower_border = lower_border 297 pnfft%upper_border = upper_border 298 pnfft%N_local(1:3) = local_N(1:3) 299 300 pnfft%M(1) = pnfft%N_local(2) 301 pnfft%M(2) = pnfft%N_local(3) 302 pnfft%M(3) = pnfft%N_local(1) 303 304 local_M = pnfft%M(1) * pnfft%M(2) * pnfft%M(3) 305 306 fs_n(1) = local_N(1) 307 fs_n(2) = local_N(3) 308 fs_n(3) = local_N(2) 309 fs_istart(1) = pnfft%N(1)/2 + local_N_start(1) + 1 310 fs_istart(2) = pnfft%N(3)/2 + local_N_start(3) + 1 311 fs_istart(3) = pnfft%N(2)/2 + local_N_start(2) + 1 312 313 314 rs_n(1:3) = pnfft%M(1:3) 315 316 rs_istart(1) = fs_istart(3) 317 rs_istart(2) = fs_istart(2) 318 rs_istart(3) = fs_istart(1) 319 320 pnfft%M_istart(:) = rs_istart(:) 321 322#ifdef HAVE_PNFFT 323 pnfft%plan = pnfft_init_guru(3, pnfft%N, pnfft%Nos, x_max, local_M, pnfft%mm, & 324 pnfft%flags, PFFT_ESTIMATE, mpi_comm) 325#endif 326 327 pnfft%local_M=local_M 328 329#ifdef HAVE_PNFFT 330 ! Get data pointers in C format 331 cf_hat = pnfft_get_f_hat(pnfft%plan) 332 cf = pnfft_get_f(pnfft%plan) 333 cx = pnfft_get_x(pnfft%plan) 334#else 335 cf_hat = C_NULL_PTR 336 cf = C_NULL_PTR 337 cx = C_NULL_PTR 338#endif 339 340 ! Convert data pointers to Fortran format 341 call c_f_pointer(cf_hat, pnfft%f_hat, [local_N(1),local_N(3),local_N(2)]) 342 call c_f_pointer(cf, pnfft%f_lin, [pnfft%local_M]) 343 call c_f_pointer(cf, pnfft%f, [pnfft%M(1),pnfft%M(2),pnfft%M(3)]) 344 call c_f_pointer(cx, pnfft%x_lin, [d, pnfft%local_M]) 345 call c_f_pointer(cx, pnfft%x, [d, pnfft%M(1),pnfft%M(2),pnfft%M(3)]) 346 347 348 349 write(6,*) mpi_world%rank, "local_N(3) ", local_N 350 write(6,*) mpi_world%rank, "local_N_start(3) ", local_N_start 351 write(6,*) mpi_world%rank, "fs_istart(1:3) ", fs_istart 352 write(6,*) mpi_world%rank, "fs_n(1:3) ", fs_n 353 write(6,*) mpi_world%rank, "rs_istart(1:3) ", rs_istart 354 write(6,*) mpi_world%rank, "rs_n(1:3) ", rs_n 355 write(6,*) mpi_world%rank, "lower_border ", lower_border 356 write(6,*) mpi_world%rank, "upper_border ", upper_border 357 write(6,*) mpi_world%rank, "rs_range ", upper_border(:) - lower_border(:) 358 write(6,*) mpi_world%rank, "local_M ", local_M 359 write(6,*) mpi_world%rank, "pnfft%N_local ", pnfft%N_local 360 write(6,*) mpi_world%rank, "pnfft%M ", pnfft%M 361 write(6,*) mpi_world%rank, "pnfft%M_istart ", pnfft%M_istart 362 write(6,*) mpi_world%rank, "size(pnfft%f_hat)", size(pnfft%f_hat,1), size(pnfft%f_hat,2), size(pnfft%f_hat, 3) 363 write(6,*) mpi_world%rank, "size(pnfft%f) ", size(pnfft%f,1), size(pnfft%f,2), size(pnfft%f,3) 364 365 POP_SUB(pnfft_init_plan) 366 end subroutine pnfft_init_plan 367 368 ! --------------------------------------------------------- 369 subroutine pnfft_end(pnfft) 370 type(pnfft_t), intent(inout) :: pnfft 371 372 PUSH_SUB(pnfft_end) 373 374#ifdef HAVE_PNFFT 375 call pnfft_finalize(pnfft%plan, PNFFT_FREE_X + PNFFT_FREE_F_HAT + PNFFT_FREE_F) 376 call pnfft_cleanup() 377#endif 378 379 nullify(pnfft%f_lin) 380 nullify(pnfft%f) 381 nullify(pnfft%f_hat) 382 nullify(pnfft%x) 383 nullify(pnfft%x_lin) 384 385 POP_SUB(pnfft_end) 386 end subroutine pnfft_end 387 388 389 ! --------------------------------------------------------- 390 ! Set the coordinates for the spatial nodes rescaled to 391 ! the 3D torus [-0.5,0.5) 392 ! --------------------------------------------------------- 393 subroutine pnfft_set_sp_nodes(pnfft, namespace, X) 394 type(pnfft_t), intent(inout) :: pnfft 395 type(namespace_t),intent(in) :: namespace 396 FLOAT, intent(in) :: X(:,:) !X(i, dim) 397 398 FLOAT :: len(3), cc(3), eps,temp, lo(3), up(3) 399 integer :: ii, idir, i1, i2, i3 400 FLOAT, allocatable :: dX(:,:) 401! integer :: j,t 402 403 PUSH_SUB(pnfft_set_sp_nodes) 404 405 eps = CNST(1.25) ! the sample nodes must be in [0.5,0.5) 406 407 lo = pnfft%lower_border 408 up = pnfft%upper_border 409 410 411 412 do idir = 1,3 413 len(:) = (maxval(X(:,idir))-minval(X(:,idir)))*eps 414 cc(:) = (minval(X(:,idir))+maxval(X(:,idir)))/M_TWO 415 end do 416 417 418 419 420 do i1 = 1, pnfft%M(1) 421 do i2 = 1, pnfft%M(2) 422 do i3 = 1, pnfft%M(3) 423 pnfft%x(1, i1,i2,i3) = (X(pnfft%M_istart(1)+i1-1, 1) - cc(1))/len(1) 424 pnfft%x(2, i1,i2,i3) = (X(pnfft%M_istart(2)+i2-1, 2) - cc(2))/len(2) 425 pnfft%x(3, i1,i2,i3) = (X(pnfft%M_istart(3)+i3-1, 3) - cc(3))/len(3) 426 427! pnfft%x_lin(1, pnfft_idx_3to1(pnfft,i1,i2,i3)) = real((X(rs_istart(1)+i1-1, 1) - cc(1))/len(1), C_DOUBLE) 428! pnfft%x_lin(2, pnfft_idx_3to1(pnfft,i1,i2,i3)) = real((X(rs_istart(2)+i2-1, 2) - cc(2))/len(2), C_DOUBLE) 429! pnfft%x_lin(3, pnfft_idx_3to1(pnfft,i1,i2,i3)) = real((X(rs_istart(3)+i3-1, 3) - cc(3))/len(3), C_DOUBLE) 430 431! pnfft%x_lin(1, pnfft_idx_3to1(pnfft,i1,i2,i3)) = & 432! (pnfft%upper_border(1) - pnfft%lower_border(1)) * rand(0) + pnfft%lower_border(1) 433! pnfft%x_lin(2, pnfft_idx_3to1(pnfft,i1,i2,i3)) = & 434! (pnfft%upper_border(2) - pnfft%lower_border(2)) * rand(0) + pnfft%lower_border(2) 435! pnfft%x_lin(3, pnfft_idx_3to1(pnfft,i1,i2,i3)) = & 436! (pnfft%upper_border(3) - pnfft%lower_border(3)) * rand(0) + pnfft%lower_border(3) 437 438! pnfft%x(1, i1,i2,i3) = (pnfft%upper_border(1) - pnfft%lower_border(1)) * rand(0) + pnfft%lower_border(1) 439! pnfft%x(2, i1,i2,i3) = (pnfft%upper_border(2) - pnfft%lower_border(2)) * rand(0) + pnfft%lower_border(2) 440! pnfft%x(3, i1,i2,i3) = (pnfft%upper_border(3) - pnfft%lower_border(3)) * rand(0) + pnfft%lower_border(3) 441 442 temp = (X(pnfft%M_istart(3)+i3-1, 3) - cc(3))/len(3) 443 if(temp > pnfft%upper_border(3) .or. temp < pnfft%lower_border(3) ) then 444 write(6,*) mpi_world%rank, "out of bounds x3 = ", temp,"-- ", pnfft%lower_border(3), pnfft%upper_border(3) 445 end if 446 447 448 end do 449 450 temp = (X(pnfft%M_istart(2)+i2-1, 2) - cc(2))/len(2) 451 if(temp > pnfft%upper_border(2) .or. temp < pnfft%lower_border(2) ) then 452 write(6,*) mpi_world%rank, "out of bounds x2 = ", temp,"-- ", pnfft%lower_border(2), pnfft%upper_border(2) 453 end if 454 455 end do 456 457 temp = (X(pnfft%M_istart(1)+i1-1, 1) - cc(1))/len(1) 458 if(temp > pnfft%upper_border(1) .or. temp < pnfft%lower_border(1) ) then 459 write(6,*) mpi_world%rank, "out of bounds x1 = ", temp,"-- ", pnfft%lower_border(1), pnfft%upper_border(1) 460 end if 461 462 end do 463 464 465! do j = 1,pnfft%local_M 466! do t=1,3 467! pnfft%x_lin(t,j) = (pnfft%upper_border(t) - pnfft%lower_border(t)) * rand(0) + pnfft%lower_border(t) 468! end do 469! end do 470 471 472 call pnfft_messages_debug(pnfft, namespace) 473 474 475 SAFE_ALLOCATE( dX(1:maxval(pnfft%M(:))-1, 1:3)) 476 477 478 ! Set the normalization factor 479 do idir = 1,3 480 do ii = 1, size(X(:,idir))-1 481 dX(ii,idir)= abs(X(ii+1, idir)-X(ii, idir)) 482! dX(ii,2)= abs(x2_(ii+1)-x2_(ii)) 483! dX(ii,3)= abs(x3_(ii+1)-x3_(ii)) 484 end do 485 end do 486 487! pnfft%norm = M_ONE/(minval(dX(:,1)) * minval(dX(:,2)) * minval(dX(:,3))) 488 pnfft%norm = M_ONE/(pnfft%N(1)*pnfft%N(2)*pnfft%N(3)) 489 490 write(6,*) mpi_world%rank, "pnfft%norm", pnfft%norm 491 492 POP_SUB(pnfft_set_sp_nodes) 493 end subroutine pnfft_set_sp_nodes 494 495 ! --------------------------------------------------------- 496 subroutine pnfft_messages_debug(pnfft, namespace) 497 type(pnfft_t), intent(in) :: pnfft 498 type(namespace_t), intent(in) :: namespace 499 500 integer :: nn, i1, i2, i3 501 integer :: iunit !< For debug output to files. 502 character(len=3) :: filenum 503#ifdef HAVE_MPI 504 integer :: ierr 505#endif 506 507 PUSH_SUB(pnfft_messages_debug) 508 509 if(debug%info) then 510 511 if(mpi_grp_is_root(mpi_world)) then 512 call io_mkdir('debug/PNFFT', namespace) 513 end if 514#ifdef HAVE_MPI 515 call MPI_Barrier(pnfft%comm, ierr) 516#endif 517 518 nn = mpi_world%rank 519 write(filenum, '(i3.3)') nn 520 521 iunit = io_open('debug/PNFFT/rs_partition.'//filenum, & 522 namespace, action='write') 523 524 do i1 = 1, pnfft%M(1) 525 do i2 = 1, pnfft%M(2) 526 do i3 = 1, pnfft%M(3) 527 write(iunit, '(3f18.8)') pnfft%x(1, i1,i2,i3), pnfft%x(2, i1,i2,i3), pnfft%x(3, i1,i2,i3) 528 end do 529 end do 530 end do 531 call io_close(iunit) 532 533 end if 534 535 POP_SUB(pnfft_messages_debug) 536 537 end subroutine pnfft_messages_debug 538 539 !--------------------------------------------------------------------------------- 540! integer function pnfft_idx_3to1(pnfft, ix , iy, iz) result(idx) 541! type(pnfft_t), intent(in) :: pnfft 542! integer, intent(in) :: ix 543! integer, intent(in) :: iy 544! integer, intent(in) :: iz 545! 546! idx = (ix-1)*pnfft%M(2)*pnfft%M(3) + (iy-1)*pnfft%M(3) + (iz-1) + 1 547! 548! end function pnfft_idx_3to1 549 550 #include "undef.F90" 551 #include "real.F90" 552 #include "pnfft_inc.F90" 553 554 #include "undef.F90" 555 #include "complex.F90" 556 #include "pnfft_inc.F90" 557 558end module pnfft_oct_m 559 560!! Local Variables: 561!! mode: f90 562!! coding: utf-8 563!! End: 564