1!! Copyright (C) 2002-2006 M. Marques, A. Castro, A. Rubio, G. Bertsch 2!! Copyright (C) 2011 J. Alberdi-Rodriguez, P. Garcia Risueño, M. Oliveira 3!! 4!! This program is free software; you can redistribute it and/or modify 5!! it under the terms of the GNU General Public License as published by 6!! the Free Software Foundation; either version 2, or (at your option) 7!! any later version. 8!! 9!! This program is distributed in the hope that it will be useful, 10!! but WITHOUT ANY WARRANTY; without even the implied warranty of 11!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12!! GNU General Public License for more details. 13!! 14!! You should have received a copy of the GNU General Public License 15!! along with this program; if not, write to the Free Software 16!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 17!! 02110-1301, USA. 18!! 19 20#include "global.h" 21 22!> Fast Fourier Transform module. 23!! This module provides a single interface that works with different 24!! FFT implementations. 25module fft_oct_m 26 use accel_oct_m 27#ifdef HAVE_OPENCL 28 use cl 29#ifdef HAVE_CLFFT 30 use clfft 31#endif 32#endif 33 use fftw_oct_m 34 use fftw_params_oct_m 35 use global_oct_m 36 use,intrinsic :: iso_c_binding 37 use lalg_basic_oct_m 38 use loct_math_oct_m 39 use messages_oct_m 40 use mpi_oct_m 41 use namespace_oct_m 42 use nfft_oct_m 43#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS) 44 use omp_lib 45#endif 46 use parser_oct_m 47 use pfft_oct_m 48 use pfft_params_oct_m 49 use pnfft_oct_m 50 use profiling_oct_m 51 use types_oct_m 52 use unit_system_oct_m 53 use varinfo_oct_m 54 55 implicit none 56 57 private 58 public :: & 59 fft_t, & 60 fft_all_init, & 61 fft_all_end, & 62 fft_init, & 63 fft_init_stage1, & 64 fft_end, & 65 fft_copy, & 66 fft_get_dims, & 67 pad_feq, & 68 dfft_forward, & 69 zfft_forward, & 70 dfft_backward, & 71 zfft_backward, & 72 fft_scaling_factor 73 74 75 !> global constants 76 integer, public, parameter :: & 77 FFT_NONE = 0, & 78 FFT_REAL = 1, & 79 FFT_COMPLEX = 2 80 81 integer, public, parameter :: & 82 FFTLIB_NONE = 0, & 83 FFTLIB_FFTW = 1, & 84 FFTLIB_PFFT = 2, & 85 FFTLIB_ACCEL = 3, & 86 FFTLIB_NFFT = 4, & 87 FFTLIB_PNFFT = 5 88 89 integer, parameter :: & 90 FFT_MAX = 10, & 91 FFT_NULL = -1 92 93 type fft_t 94 private 95 integer :: slot !< in which slot do we have this fft 96 97 integer, public :: type !< is the fft real or complex 98 integer, public :: library !< what library are we using 99 100 integer :: comm !< MPI communicator 101 integer :: rs_n_global(3) !< total size of the fft in each direction in real space 102 integer :: fs_n_global(3) !< total size of the fft in each direction in fourier space 103 integer :: rs_n(3) !< local size of the fft in in each direction real space 104 integer :: fs_n(3) !< local size of the fft in in each direction fourier space 105 integer :: rs_istart(1:3) !< where does the local portion of the function start in real space 106 integer :: fs_istart(1:3) !< where does the local portion of the function start in fourier space 107 108 integer, public :: stride_rs(1:3) 109 integer, public :: stride_fs(1:3) 110 111 type(c_ptr) :: planf !< plan for forward transform 112 type(c_ptr) :: planb !< plan for backward transform 113 !integer(ptrdiff_t_kind) :: pfft_planf !< PFFT plan for forward transform 114 !integer(ptrdiff_t_kind) :: pfft_planb !< PFFT plan for backward transform 115 116 !> The following arrays have to be stored here and allocated in the initialization routine because of PFFT 117 !> These arrays are also used for FFTW, as we want to have aligned memory 118 FLOAT, pointer, public :: drs_data(:,:,:) !< array used to store the function in real space. 119 CMPLX, pointer, public :: zrs_data(:,:,:) !< array used to store the function in real space. 120 CMPLX, pointer, public :: fs_data(:,:,:) !< array used to store the function in Fourier space. 121#ifdef HAVE_CLFFT 122 !> data for clfft 123 type(clfftPlanHandle) :: cl_plan_fw 124 type(clfftPlanHandle) :: cl_plan_bw !< for real transforms we need a different plan, so we always use 2 125#endif 126 type(c_ptr) :: cuda_plan_fw 127 type(c_ptr) :: cuda_plan_bw 128 type(nfft_t), public :: nfft 129 type(pnfft_t), public :: pnfft 130 131 logical, public :: aligned_memory 132 end type fft_t 133 134 interface dfft_forward 135 module procedure dfft_forward_1d, dfft_forward_cl, dfft_forward_3d 136 end interface dfft_forward 137 138 interface zfft_forward 139 module procedure zfft_forward_1d, zfft_forward_cl, zfft_forward_3d 140 end interface zfft_forward 141 142 interface dfft_backward 143 module procedure dfft_backward_1d, dfft_backward_cl, dfft_backward_3d 144 end interface dfft_backward 145 146 interface zfft_backward 147 module procedure zfft_backward_1d, zfft_backward_cl, zfft_backward_3d 148 end interface zfft_backward 149 150 logical, save, public :: fft_initialized = .false. 151 integer, save :: fft_refs(FFT_MAX) 152 type(fft_t), save :: fft_array(FFT_MAX) 153 logical :: fft_optimize 154 integer, save :: fft_prepare_plan 155 integer, public :: fft_default_lib = -1 156 type(nfft_t), save :: nfft_options 157 type(pnfft_t), save :: pnfft_options 158 159 integer, parameter :: & 160 CUFFT_R2C = z'2a', & 161 CUFFT_C2R = z'2c', & 162 CUFFT_C2C = z'29', & 163 CUFFT_D2Z = z'6a', & 164 CUFFT_Z2D = z'6c', & 165 CUFFT_Z2Z = z'69' 166 167contains 168 169 ! --------------------------------------------------------- 170 !> initialize the table 171 subroutine fft_all_init(namespace) 172 type(namespace_t), intent(in) :: namespace 173 174 integer :: ii 175#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS) 176 integer :: iret 177#endif 178 179 PUSH_SUB(fft_all_init) 180 181 fft_initialized = .true. 182 183 !%Variable FFTOptimize 184 !%Type logical 185 !%Default yes 186 !%Section Mesh::FFTs 187 !%Description 188 !% Should <tt>octopus</tt> optimize the FFT dimensions? 189 !% This means that the mesh to which FFTs are applied is not taken to be as small 190 !% as possible: some points may be added to each direction in order to get a "good number" 191 !% for the performance of the FFT algorithm. 192 !% The best FFT grid dimensions are given by <math>2^a 3^b 5^c 7^d 11^e 13^f</math> 193 !% where <math>a,b,c,d</math> are arbitrary and <math>e,f</math> are 0 or 1. 194 !% (<a href=http://www.fftw.org/doc/Complex-DFTs.html>ref</a>). 195 !% In some cases, namely when using 196 !% the split-operator, or Suzuki-Trotter propagators, this option should be turned off. 197 !% For spatial FFTs in periodic directions, the grid is never optimized, but a warning will 198 !% be written if the number is not good, with a suggestion of a better one to use, so you 199 !% can try a different spacing if you want to get a good number. 200 !%End 201 call parse_variable(namespace, 'FFTOptimize', .true., fft_optimize) 202 do ii = 1, FFT_MAX 203 fft_refs(ii) = FFT_NULL 204 end do 205 206 !%Variable FFTPreparePlan 207 !%Type integer 208 !%Default fftw_measure 209 !%Section Mesh::FFTs 210 !%Description 211 !% The FFTs are performed in octopus with the help of <a href=http://www.fftw.org>FFTW</a> and similar packages. 212 !% Before doing the actual computations, this package prepares a "plan", which means that 213 !% the precise numerical strategy to be followed to compute the FFT is machine/compiler-dependent, 214 !% and therefore the software attempts to figure out which is this precise strategy (see the 215 !% FFTW documentation for details). This plan preparation, which has to be done for each particular 216 !% FFT shape, can be done exhaustively and carefully (slow), or merely estimated. Since this is 217 !% a rather critical numerical step, by default it is done carefully, which implies a longer initial 218 !% initialization, but faster subsequent computations. You can change this behaviour by changing 219 !% this <tt>FFTPreparePlan</tt> variable, and in this way you can force FFTW to do a fast guess or 220 !% estimation of which is the best way to perform the FFT. 221 !%Option fftw_measure 0 222 !% This is the default, and implies a longer initialization, but involves a more careful analysis 223 !% of the strategy to follow, and therefore more efficient FFTs. 224 !%Option fftw_estimate 64 225 !% This is the "fast initialization" scheme, in which the plan is merely guessed from "reasonable" 226 !% assumptions. 227 !%Option fftw_patient 32 228 !% It is like fftw_measure, but considers a wider range of algorithms and often produces a 229 !% "more optimal" plan (especially for large transforms), but at the expense of several times 230 !% longer planning time (especially for large transforms). 231 !%Option fftw_exhaustive 8 232 !% It is like fftw_patient, but considers an even wider range of algorithms, 233 !% including many that we think are unlikely to be fast, to produce the most optimal 234 !% plan but with a substantially increased planning time. 235 !%End 236 call parse_variable(namespace, 'FFTPreparePlan', FFTW_MEASURE, fft_prepare_plan) 237 if(.not. varinfo_valid_option('FFTPreparePlan', fft_prepare_plan)) then 238 call messages_input_error(namespace, 'FFTPreparePlan') 239 end if 240 241 !%Variable FFTLibrary 242 !%Type integer 243 !%Section Mesh::FFTs 244 !%Default fftw 245 !%Description 246 !% (experimental) You can select the FFT library to use. 247 !%Option fftw 1 248 !% Uses FFTW3 library. 249 !%Option pfft 2 250 !% (experimental) Uses PFFT library, which has to be linked. 251 !%Option accel 3 252 !% (experimental) Uses a GPU accelerated library. This only 253 !% works if Octopus was compiled with Cuda or OpenCL support. 254 !%End 255 call parse_variable(namespace, 'FFTLibrary', FFTLIB_FFTW, fft_default_lib) 256 257 if(fft_default_lib == FFTLIB_ACCEL) then 258#if ! (defined(HAVE_CLFFT) || defined(HAVE_CUDA)) 259 call messages_write('You have selected the Accelerated FFT, but Octopus was compiled', new_line = .true.) 260 call messages_write('without clfft (OpenCL) or Cuda support.') 261 call messages_fatal() 262#endif 263 if(.not. accel_is_enabled()) then 264 call messages_write('You have selected the accelerated FFT, but acceleration is disabled.') 265 call messages_fatal() 266 end if 267 end if 268 269#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS) 270 if(omp_get_max_threads() > 1) then 271 272 call messages_write('Info: Initializing Multi-threaded FFTW') 273 call messages_info() 274 275 iret = fftw_init_threads() 276 if (iret == 0) then 277 call messages_write('Initialization of FFTW3 threads failed.') 278 call messages_fatal() 279 end if 280 call fftw_plan_with_nthreads(omp_get_max_threads()) 281 282 end if 283#endif 284 285 call nfft_guru_options(nfft_options, namespace) 286 call pnfft_guru_options(pnfft_options, namespace) 287 288 POP_SUB(fft_all_init) 289 end subroutine fft_all_init 290 291 292 ! --------------------------------------------------------- 293 !> delete all plans 294 subroutine fft_all_end() 295 integer :: ii 296 297 PUSH_SUB(fft_all_end) 298 299 do ii = 1, FFT_MAX 300 if(fft_refs(ii) /= FFT_NULL) then 301 call fft_end(fft_array(ii)) 302 end if 303 end do 304 305#ifdef HAVE_PFFT 306 call pfft_cleanup() 307#endif 308 309#if defined(HAVE_OPENMP) && defined(HAVE_FFTW3_THREADS) 310 call fftw_cleanup_threads() 311#else 312 call fftw_cleanup() 313#endif 314 315 fft_initialized = .false. 316 317 POP_SUB(fft_all_end) 318 end subroutine fft_all_end 319 320 ! --------------------------------------------------------- 321 subroutine fft_init(this, nn, dim, type, library, optimize, optimize_parity, mpi_comm, mpi_grp, use_aligned) 322 type(fft_t), intent(inout) :: this !< FFT data type 323 integer, intent(inout) :: nn(3) !< Size of the box 324 integer, intent(in) :: dim !< Dimensions of the box 325 integer, intent(in) :: type !< The type of the FFT; real or complex 326 integer, intent(in) :: library !< FFT library to be used. Can be overridden. 327 logical, intent(in) :: optimize(3) !< whether we should optimize grid in each direction 328 integer, intent(in) :: optimize_parity(3) !< choose optimized grid in each direction as 329 !! even (0), odd (1), or whatever (negative). 330 integer, optional, intent(out) :: mpi_comm !< MPI communicator 331 type(mpi_grp_t), optional, intent(in) :: mpi_grp !< the mpi_group we want to use for the parallelization 332 logical, optional :: use_aligned !< For FFTW we can use aligned memory 333 334 integer :: ii, jj, fft_dim, idir, column_size, row_size, alloc_size, n3 335 integer :: n_1, n_2, n_3, nn_temp(3) 336 integer :: library_ 337 type(mpi_grp_t) :: mpi_grp_ 338 339#ifdef HAVE_CLFFT 340 real(8) :: scale 341 integer :: status 342#endif 343#ifdef HAVE_PFFT 344 integer :: ierror 345#endif 346 347 PUSH_SUB(fft_init) 348 349 ASSERT(fft_initialized) 350 351 ASSERT(type == FFT_REAL .or. type == FFT_COMPLEX) 352 353 mpi_grp_ = mpi_world 354 if(present(mpi_grp)) mpi_grp_ = mpi_grp 355 356 this%aligned_memory = optional_default(use_aligned, .false.) 357 358 ! First, figure out the dimensionality of the FFT. 359 fft_dim = 0 360 do ii = 1, dim 361 if(nn(ii) <= 1) exit 362 fft_dim = fft_dim + 1 363 end do 364 365 if(fft_dim == 0) then 366 message(1) = "Internal error in fft_init: apparently, a 1x1x1 FFT is required." 367 call messages_fatal(1) 368 end if 369 370 if(fft_dim > 3) call messages_not_implemented('FFT for dimension > 3') 371 372 library_ = library 373 nn_temp(1:fft_dim) = nn(1:fft_dim) 374 375 select case (library_) 376 case (FFTLIB_ACCEL) 377 378 do ii = 1, fft_dim 379 ! the AMD OpenCL FFT only supports sizes 2, 3 and 5, but size 380 ! 5 gives an fpe error on the Radeon 7970 (APPML 1.8), so we 381 ! only use factors 2 and 3 382#ifdef HAVE_CLFFT 383 nn_temp(ii) = fft_size(nn(ii), (/2, 3/)) 384#else 385 nn_temp(ii) = fft_size(nn(ii), (/2, 3, 5, 7/)) 386#endif 387 if(fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii) 388 end do 389 390 ! if we can't optimize, in some cases we can't use the library 391 if(any(nn(1:fft_dim) /= nn_temp(1:fft_dim))) then 392 call messages_write('Invalid grid size for clfft. FFTW will be used instead.') 393 call messages_warning() 394 library_ = FFTLIB_FFTW 395 end if 396 397 case (FFTLIB_NFFT) 398 399 do ii = 1, fft_dim 400 !NFFT likes even grids 401 !The underlying FFT grids are optimized inside the nfft_init routine 402 if(int(nn(ii)/2)*2 /= nn(ii) .and. (fft_optimize .and. optimize(ii)) )& 403 nn(ii)=nn(ii)+1 404 end do 405 406 case (FFTLIB_PNFFT) 407 408 do ii = 1, fft_dim 409 !also PNFFT likes even grids 410 if(int(nn(ii)/2)*2 /= nn(ii)) nn(ii)=nn(ii)+1 411 end do 412 413 if(fft_dim < 3) & 414 call messages_not_implemented('PNFFT support for dimension < 3') 415 416 case default 417 418 if(fft_dim < 3 .and. library_ == FFTLIB_PFFT) & 419 call messages_not_implemented('PFFT support for dimension < 3') 420 421 422 ! FFT optimization 423 if(any(optimize_parity(1:fft_dim) > 1)) then 424 message(1) = "Internal error in fft_init: optimize_parity must be negative, 0, or 1." 425 call messages_fatal(1) 426 end if 427 428 do ii = 1, fft_dim 429 call loct_fft_optimize(nn_temp(ii), optimize_parity(ii)) 430 if(fft_optimize .and. optimize(ii)) nn(ii) = nn_temp(ii) 431 end do 432 433 end select 434 435 ! find out if fft has already been allocated 436 jj = 0 437 do ii = FFT_MAX, 1, -1 438 if(fft_refs(ii) /= FFT_NULL) then 439 if(all(nn(1:dim) == fft_array(ii)%rs_n_global(1:dim)) .and. type == fft_array(ii)%type & 440 .and. library_ == fft_array(ii)%library .and. library_ /= FFTLIB_NFFT & 441 .and. library_ /= FFTLIB_PNFFT & 442 .and. this%aligned_memory .eqv. fft_array(ii)%aligned_memory) then 443 ! NFFT and PNFFT plans are always allocated from scratch since they 444 ! are very likely to be different 445 this = fft_array(ii) ! return a copy 446 fft_refs(ii) = fft_refs(ii) + 1 ! increment the ref count 447 if (present(mpi_comm)) mpi_comm = fft_array(ii)%comm ! also return the MPI communicator 448 POP_SUB(fft_init) 449 return 450 end if 451 else 452 jj = ii 453 end if 454 end do 455 456 if(jj == 0) then 457 message(1) = "Not enough slots for FFTs." 458 message(2) = "Please increase FFT_MAX in fft.F90 and recompile." 459 call messages_fatal(2) 460 end if 461 462 ! jj now contains an empty slot 463 fft_refs(jj) = 1 464 fft_array(jj)%slot = jj 465 fft_array(jj)%type = type 466 fft_array(jj)%library = library_ 467 fft_array(jj)%rs_n_global(1:dim) = nn(1:dim) 468 fft_array(jj)%rs_n_global(dim+1:) = 1 469 nullify(fft_array(jj)%drs_data) 470 nullify(fft_array(jj)%zrs_data) 471 nullify(fft_array(jj)%fs_data) 472 473 fft_array(jj)%aligned_memory = this%aligned_memory 474 475 ! Initialize parallel communicator 476 select case (library_) 477 case (FFTLIB_PFFT) 478#ifdef HAVE_PFFT 479 call pfft_init() 480 481 call pfft_decompose(mpi_grp_%size, column_size, row_size) 482 483 ierror = pfft_create_procmesh_2d(mpi_grp_%comm, column_size, row_size, fft_array(jj)%comm) 484 485 if (ierror /= 0) then 486 message(1) = "The number of rows and columns in PFFT processor grid is not equal to " 487 message(2) = "the number of processor in the MPI communicator." 488 message(3) = "Please check it." 489 call messages_fatal(3) 490 end if 491#endif 492 493 case (FFTLIB_PNFFT) 494 call pnfft_init_procmesh(fft_array(jj)%pnfft, mpi_grp_, fft_array(jj)%comm) 495 496 case default 497 fft_array(jj)%comm = -1 498 499 end select 500 501 if (present(mpi_comm)) mpi_comm = fft_array(jj)%comm 502 503 ! Get dimentions of arrays 504 select case (library_) 505 case (FFTLIB_FFTW) 506 call fftw_get_dims(fft_array(jj)%rs_n_global, type == FFT_REAL, fft_array(jj)%fs_n_global) 507 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global 508 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global 509 fft_array(jj)%rs_istart = 1 510 fft_array(jj)%fs_istart = 1 511 512 if(this%aligned_memory) then 513 call fftw_alloc_memory(fft_array(jj)%rs_n_global, type == FFT_REAL, fft_array(jj)%fs_n_global, & 514 fft_array(jj)%drs_data, fft_array(jj)%zrs_data, fft_array(jj)%fs_data) 515 end if 516 517 case (FFTLIB_PFFT) 518#ifdef HAVE_PFFT 519 call pfft_get_dims(fft_array(jj)%rs_n_global, mpi_comm, type == FFT_REAL, & 520 alloc_size, fft_array(jj)%fs_n_global, fft_array(jj)%rs_n, & 521 fft_array(jj)%fs_n, fft_array(jj)%rs_istart, fft_array(jj)%fs_istart) 522 !write(*,"(6(A,3I4,/),A,I10,/)") "PFFT: rs_n_global = ",fft_array(jj)%rs_n_global,& 523 ! "fs_n_global = ",fft_array(jj)%fs_n_global,& 524 ! "rs_n = ",fft_array(jj)%rs_n,& 525 ! "fs_n = ",fft_array(jj)%fs_n,& 526 ! "rs_istart = ",fft_array(jj)%rs_istart,& 527 ! "fs_istart = ",fft_array(jj)%fs_istart,& 528 ! "alloc_size = ",alloc_size 529#endif 530 531 ! Allocate memory. Note that PFFT may need extra memory space 532 ! and that in fourier space the function will be transposed 533 if (type == FFT_REAL) then 534 n_1 = max(1, fft_array(jj)%rs_n(1)) 535 n_2 = max(1, fft_array(jj)%rs_n(2)) 536 n_3 = max(1, fft_array(jj)%rs_n(3)) 537 538 n3 = ceiling(real(2*alloc_size)/real(n_1*n_2)) 539 SAFE_ALLOCATE(fft_array(jj)%drs_data(1:n_1, 1:n_2, 1:n3)) 540 else 541 n3 = ceiling(real(alloc_size)/real(fft_array(jj)%rs_n(1)*fft_array(jj)%rs_n(2))) 542 SAFE_ALLOCATE(fft_array(jj)%zrs_data(1:fft_array(jj)%rs_n(1), 1:fft_array(jj)%rs_n(2), 1:n3)) 543 end if 544 545 n_1 = max(1, fft_array(jj)%fs_n(1)) 546 n_2 = max(1, fft_array(jj)%fs_n(2)) 547 n_3 = max(1, fft_array(jj)%fs_n(3)) 548 549 n3 = ceiling(real(alloc_size)/real(n_3*n_1)) 550 SAFE_ALLOCATE(fft_array(jj)%fs_data(1:n_3, 1:n_1, 1:n3)) 551 552 case(FFTLIB_ACCEL) 553 call fftw_get_dims(fft_array(jj)%rs_n_global, (type == FFT_REAL), fft_array(jj)%fs_n_global) 554 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global 555 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global 556 fft_array(jj)%rs_istart = 1 557 fft_array(jj)%fs_istart = 1 558 559 case(FFTLIB_NFFT) 560 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global 561 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global 562 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global 563 fft_array(jj)%rs_istart = 1 564 fft_array(jj)%fs_istart = 1 565 566 case(FFTLIB_PNFFT) 567 fft_array(jj)%fs_n_global = fft_array(jj)%rs_n_global 568 fft_array(jj)%rs_n = fft_array(jj)%rs_n_global 569 fft_array(jj)%fs_n = fft_array(jj)%fs_n_global 570 fft_array(jj)%rs_istart = 1 571 fft_array(jj)%fs_istart = 1 572 ! indices partition is performed together with the plan preparation 573 574 575 end select 576 577 ! Prepare plans 578 select case (library_) 579 case (FFTLIB_FFTW) 580 if(.not. this%aligned_memory) then 581 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, & 582 type == FFT_REAL, FFTW_FORWARD, fft_prepare_plan+FFTW_UNALIGNED) 583 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, & 584 type == FFT_REAL, FFTW_BACKWARD, fft_prepare_plan+FFTW_UNALIGNED) 585 else 586 if(type == FFT_REAL) then 587 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, & 588 type == FFT_REAL, FFTW_FORWARD, fft_prepare_plan, & 589 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data) 590 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, & 591 type == FFT_REAL, FFTW_BACKWARD, fft_prepare_plan, & 592 din_=fft_array(jj)%drs_data, cout_=fft_array(jj)%fs_data) 593 else 594 call fftw_prepare_plan(fft_array(jj)%planf, fft_dim, fft_array(jj)%rs_n_global, & 595 type == FFT_REAL, FFTW_FORWARD, fft_prepare_plan, & 596 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data) 597 call fftw_prepare_plan(fft_array(jj)%planb, fft_dim, fft_array(jj)%rs_n_global, & 598 type == FFT_REAL, FFTW_BACKWARD, fft_prepare_plan, & 599 cin_=fft_array(jj)%zrs_data, cout_=fft_array(jj)%fs_data) 600 end if 601 end if 602 603 case(FFTLIB_NFFT) 604 call nfft_copy_info(this%nfft,fft_array(jj)%nfft) !copy default parameters set in the calling routine 605 call nfft_init(fft_array(jj)%nfft, nfft_options, fft_array(jj)%rs_n_global, & 606 fft_dim, fft_array(jj)%rs_n_global, optimize = .true.) 607 608 case (FFTLIB_PFFT) 609#ifdef HAVE_PFFT 610 if(type == FFT_REAL) then 611 call pfft_prepare_plan_r2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%drs_data, & 612 fft_array(jj)%fs_data, FFTW_FORWARD, fft_prepare_plan, mpi_comm) 613 call pfft_prepare_plan_c2r(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, & 614 fft_array(jj)%drs_data, FFTW_BACKWARD, fft_prepare_plan, mpi_comm) 615 else 616 call pfft_prepare_plan_c2c(fft_array(jj)%planf, fft_array(jj)%rs_n_global, fft_array(jj)%zrs_data, & 617 fft_array(jj)%fs_data, FFTW_FORWARD, fft_prepare_plan, mpi_comm) 618 call pfft_prepare_plan_c2c(fft_array(jj)%planb, fft_array(jj)%rs_n_global, fft_array(jj)%fs_data, & 619 fft_array(jj)%zrs_data, FFTW_BACKWARD, fft_prepare_plan, mpi_comm) 620 end if 621#endif 622 case (FFTLIB_PNFFT) 623 call pnfft_copy_params(this%pnfft, fft_array(jj)%pnfft) ! pass default parameters like in NFFT 624 625 ! NOTE: 626 ! PNFFT (likewise NFFT) breaks the symmetry between real space and Fourier space 627 ! by allowing the possibility to have an unstructured grid in rs and by 628 ! using different parallelizations (the rs is transposed w.r.t. fs). 629 ! Octopus, in fourier_space_m, uses the convention for which the mapping 630 ! between rs and fs is done with a forward transform (and fs->rs with backward). 631 ! This is exactly the opposite of the definitions used by all the libraries 632 ! performing FFTs (PNFFT and NFFT included) [see e.g. M. Frigo, and S. G. Johnson, Proc. 633 ! IEEE 93, 216-231 (2005)]. 634 ! While this leads to no problem on ordinary ffts where fs and rs can be exchanged 635 ! it does makes a fundamental difference for PNFFT (for some reason I don`t know NFFT 636 ! is still symmetric). 637 ! Therefore, in order to perform rs->fs tranforms with PNFFT one should use the 638 ! backward transform. 639 640 call pnfft_init_plan(fft_array(jj)%pnfft, pnfft_options, mpi_comm, fft_array(jj)%fs_n_global, & 641 fft_array(jj)%fs_n, fft_array(jj)%fs_istart, fft_array(jj)%rs_n, fft_array(jj)%rs_istart) 642 643 case(FFTLIB_ACCEL) 644 645 fft_array(jj)%stride_rs(1) = 1 646 fft_array(jj)%stride_fs(1) = 1 647 do ii = 2, fft_dim 648 fft_array(jj)%stride_rs(ii) = fft_array(jj)%stride_rs(ii - 1)*fft_array(jj)%rs_n(ii - 1) 649 fft_array(jj)%stride_fs(ii) = fft_array(jj)%stride_fs(ii - 1)*fft_array(jj)%fs_n(ii - 1) 650 end do 651 652#ifdef HAVE_CUDA 653 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_fw, & 654 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), CUFFT_D2Z) 655 call cuda_fft_plan3d(fft_array(jj)%cuda_plan_bw, & 656 fft_array(jj)%rs_n_global(3), fft_array(jj)%rs_n_global(2), fft_array(jj)%rs_n_global(1), CUFFT_Z2D) 657#endif 658 659#ifdef HAVE_CLFFT 660 661 ! create the plans 662 call clfftCreateDefaultPlan(fft_array(jj)%cl_plan_fw, accel%context%cl_context, & 663 fft_dim, int(fft_array(jj)%rs_n_global, 8), status) 664 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftCreateDefaultPlan') 665 666 call clfftCreateDefaultPlan(fft_array(jj)%cl_plan_bw, accel%context%cl_context, & 667 fft_dim, int(fft_array(jj)%rs_n_global, 8), status) 668 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftCreateDefaultPlan') 669 670 ! set precision 671 672 call clfftSetPlanPrecision(fft_array(jj)%cl_plan_fw, CLFFT_DOUBLE, status) 673 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanPrecision') 674 675 call clfftSetPlanPrecision(fft_array(jj)%cl_plan_bw, CLFFT_DOUBLE, status) 676 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanPrecision') 677 678 ! set number of transforms to 1 679 680 call clfftSetPlanBatchSize(fft_array(jj)%cl_plan_fw, 1_8, status) 681 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanBatchSize') 682 683 call clfftSetPlanBatchSize(fft_array(jj)%cl_plan_bw, 1_8, status) 684 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanBatchSize') 685 686 ! set the type precision to double 687 688 call clfftSetPlanPrecision(fft_array(jj)%cl_plan_fw, CLFFT_DOUBLE, status) 689 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanPrecision') 690 691 call clfftSetPlanPrecision(fft_array(jj)%cl_plan_bw, CLFFT_DOUBLE, status) 692 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanPrecision') 693 694 695 ! set the layout 696 697 if(type == FFT_REAL) then 698 699 call clfftSetLayout(fft_array(jj)%cl_plan_fw, CLFFT_REAL, CLFFT_HERMITIAN_INTERLEAVED, status) 700 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetLayout') 701 702 call clfftSetLayout(fft_array(jj)%cl_plan_bw, CLFFT_HERMITIAN_INTERLEAVED, CLFFT_REAL, status) 703 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetLayout') 704 705 else 706 707 call clfftSetLayout(fft_array(jj)%cl_plan_fw, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, status) 708 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetLayout') 709 710 call clfftSetLayout(fft_array(jj)%cl_plan_bw, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED, status) 711 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetLayout') 712 713 end if 714 715 ! set the plans as at out of place 716 717 call clfftSetResultLocation(fft_array(jj)%cl_plan_fw, CLFFT_OUTOFPLACE, status) 718 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetResultLocation') 719 720 call clfftSetResultLocation(fft_array(jj)%cl_plan_bw, CLFFT_OUTOFPLACE, status) 721 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetResultLocation') 722 723 ! the strides 724 725 call clfftSetPlanInStride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_rs, 8), status) 726 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanInStride') 727 728 call clfftSetPlanOutStride(fft_array(jj)%cl_plan_fw, fft_dim, int(fft_array(jj)%stride_fs, 8), status) 729 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanOutStride') 730 731 call clfftSetPlanInStride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_fs, 8), status) 732 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanInStride') 733 734 call clfftSetPlanOutStride(fft_array(jj)%cl_plan_bw, fft_dim, int(fft_array(jj)%stride_rs, 8), status) 735 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanOutStride') 736 737 ! set the scaling factors 738 739 scale = 1.0_8/(product(real(fft_array(jj)%rs_n_global(1:fft_dim), 8))) 740 741 call clfftSetPlanScale(fft_array(jj)%cl_plan_fw, CLFFT_FORWARD, 1.0_8, status) 742 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanScale') 743 744 call clfftSetPlanScale(fft_array(jj)%cl_plan_fw, CLFFT_BACKWARD, scale, status) 745 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanScale') 746 747 if(type == FFT_REAL) then 748 749 call clfftSetPlanScale(fft_array(jj)%cl_plan_bw, CLFFT_FORWARD, 1.0_8, status) 750 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanScale') 751 752 call clfftSetPlanScale(fft_array(jj)%cl_plan_bw, CLFFT_BACKWARD, scale, status) 753 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanScale') 754 755 else 756 757 call clfftSetPlanScale(fft_array(jj)%cl_plan_bw, CLFFT_FORWARD, scale, status) 758 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanScale') 759 760 call clfftSetPlanScale(fft_array(jj)%cl_plan_bw, CLFFT_BACKWARD, 1.0_8, status) 761 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftSetPlanScale') 762 763 end if 764 765 ! now 'bake' the plans, this signals that the plans are ready to use 766 767 call clfftBakePlan(fft_array(jj)%cl_plan_fw, accel%command_queue, status) 768 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftBakePlan') 769 770 call clfftBakePlan(fft_array(jj)%cl_plan_bw, accel%command_queue, status) 771 if(status /= CLFFT_SUCCESS) call clfft_print_error(status, 'clfftBakePlan') 772 773#endif 774 775 case default 776 call messages_write('Invalid FFT library.') 777 call messages_fatal() 778 end select 779 780 this = fft_array(jj) 781 782 ! Write information 783 if (.not. (library_ == FFTLIB_NFFT .or. library_ == FFTLIB_PNFFT)) then 784 call messages_write('Info: FFT grid dimensions =') 785 do idir = 1, dim 786 call messages_write(fft_array(jj)%rs_n_global(idir)) 787 if(idir < dim) call messages_write(" x ") 788 end do 789 call messages_new_line() 790 791 call messages_write(' Total grid size =') 792 call messages_write(product(fft_array(jj)%rs_n_global(1:dim))) 793 call messages_write(' (') 794 call messages_write(product(fft_array(jj)%rs_n_global(1:dim))*CNST(8.0), units = unit_megabytes, fmt = '(f6.1)') 795 call messages_write(' )') 796 if(any(nn(1:fft_dim) /= nn_temp(1:fft_dim))) then 797 call messages_new_line() 798 call messages_write(' Inefficient FFT grid. A better grid would be: ') 799 do idir = 1, fft_dim 800 call messages_write(nn_temp(idir)) 801 end do 802 end if 803 call messages_info() 804 end if 805 806 select case (library_) 807 case (FFTLIB_PFFT) 808 write(message(1),'(a)') "Info: FFT library = PFFT" 809 write(message(2),'(a)') "Info: PFFT processor grid" 810 write(message(3),'(a, i9)') " No. of processors = ", mpi_grp_%size 811 write(message(4),'(a, i9)') " No. of columns in the proc. grid = ", column_size 812 write(message(5),'(a, i9)') " No. of rows in the proc. grid = ", row_size 813 write(message(6),'(a, i9)') " The size of integer is = ", C_INTPTR_T 814 call messages_info(6) 815 816 case (FFTLIB_PNFFT) 817 call messages_write("Info: FFT library = PNFFT") 818 call messages_info() 819 call pnfft_write_info(fft_array(jj)%pnfft) 820 821 case (FFTLIB_NFFT) 822 call messages_write("Info: FFT library = NFFT") 823 call messages_info() 824 call nfft_write_info(fft_array(jj)%nfft) 825 826 end select 827 828 POP_SUB(fft_init) 829 end subroutine fft_init 830 831 ! --------------------------------------------------------- 832 !> Some fft-libraries (only NFFT for the moment) need an additional 833 !! precomputation stage that depends on the spatial grid whose size 834 !! may change after fft_init 835 subroutine fft_init_stage1(this, namespace, XX, nn) 836 type(fft_t), intent(inout) :: this !< FFT data type 837 !> NFFT spatial nodes on x-axis XX(:,1), y-axis XX(:,2), 838 !! and z-axis XX(:,3) 839 type(namespace_t), intent(in) :: namespace 840 FLOAT, intent(in) :: XX(:,:) 841 integer, optional, intent(in) :: nn(:) 842 843 integer :: slot 844 845 PUSH_SUB(fft_init_stage1) 846 847 ASSERT(size(XX,2) == 3) 848 849 slot = this%slot 850 select case (fft_array(slot)%library) 851 case (FFTLIB_FFTW) 852 !Do nothing 853 case (FFTLIB_NFFT) 854 ASSERT(present(nn)) 855 call nfft_precompute(fft_array(slot)%nfft, & 856 XX(1:nn(1),1), XX(1:nn(2),2), XX(1:nn(3),3)) 857 858 case (FFTLIB_PFFT) 859 !Do nothing 860 case(FFTLIB_ACCEL) 861 !Do nothing 862 case(FFTLIB_PNFFT) 863 call pnfft_set_sp_nodes(fft_array(slot)%pnfft, namespace, XX) 864 865 case default 866 call messages_write('Invalid FFT library.') 867 call messages_fatal() 868 end select 869 870 871 872 POP_SUB(fft_init_stage1) 873 end subroutine fft_init_stage1 874 ! --------------------------------------------------------- 875 subroutine fft_end(this) 876 type(fft_t), intent(inout) :: this 877 878 integer :: ii 879#ifdef HAVE_CLFFT 880 integer :: status 881#endif 882 883 PUSH_SUB(fft_end) 884 885 ii = this%slot 886 if(fft_refs(ii) == FFT_NULL) then 887 message(1) = "Trying to deallocate FFT that has not been allocated." 888 call messages_warning(1) 889 else 890 if(fft_refs(ii) > 1) then 891 fft_refs(ii) = fft_refs(ii) - 1 892 else 893 select case (fft_array(ii)%library) 894 case (FFTLIB_FFTW) 895 call fftw_destroy_plan(fft_array(ii)%planf) 896 call fftw_destroy_plan(fft_array(ii)%planb) 897 898 if(this%aligned_memory) then 899 call fftw_free_memory(this%type == FFT_REAL, & 900 fft_array(ii)%drs_data, fft_array(ii)%zrs_data, fft_array(ii)%fs_data) 901 end if 902 903 case (FFTLIB_PFFT) 904#ifdef HAVE_PFFT 905 call pfft_destroy_plan(fft_array(ii)%planf) 906 call pfft_destroy_plan(fft_array(ii)%planb) 907#endif 908 SAFE_DEALLOCATE_P(fft_array(ii)%drs_data) 909 SAFE_DEALLOCATE_P(fft_array(ii)%zrs_data) 910 SAFE_DEALLOCATE_P(fft_array(ii)%fs_data) 911 912 case(FFTLIB_ACCEL) 913#ifdef HAVE_CUDA 914 call cuda_fft_destroy(fft_array(ii)%cuda_plan_fw) 915 call cuda_fft_destroy(fft_array(ii)%cuda_plan_bw) 916#endif 917#ifdef HAVE_CLFFT 918 call clfftDestroyPlan(fft_array(ii)%cl_plan_fw, status) 919 call clfftDestroyPlan(fft_array(ii)%cl_plan_bw, status) 920#endif 921 922 case(FFTLIB_NFFT) 923 call nfft_end(fft_array(ii)%nfft) 924 925 case(FFTLIB_PNFFT) 926 call pnfft_end(fft_array(ii)%pnfft) 927 928 end select 929 fft_refs(ii) = FFT_NULL 930 end if 931 end if 932 933 POP_SUB(fft_end) 934 end subroutine fft_end 935 936 ! --------------------------------------------------------- 937 subroutine fft_copy(fft_i, fft_o) 938 type(fft_t), intent(in) :: fft_i 939 type(fft_t), intent(out) :: fft_o 940 941 PUSH_SUB(fft_copy) 942 943 ASSERT(fft_i%slot>=1.and.fft_i%slot<=FFT_MAX) 944 ASSERT(fft_refs(fft_i%slot) > 0) 945 946 fft_o = fft_i 947 fft_refs(fft_i%slot) = fft_refs(fft_i%slot) + 1 948 949 POP_SUB(fft_copy) 950 end subroutine fft_copy 951 952 ! --------------------------------------------------------- 953 subroutine fft_get_dims(fft, rs_n_global, fs_n_global, rs_n, fs_n, rs_istart, fs_istart) 954 type(fft_t), intent(in) :: fft 955 integer, intent(out) :: rs_n_global(1:3) 956 integer, intent(out) :: fs_n_global(1:3) 957 integer, intent(out) :: rs_n(1:3) 958 integer, intent(out) :: fs_n(1:3) 959 integer, intent(out) :: rs_istart(1:3) 960 integer, intent(out) :: fs_istart(1:3) 961 962 integer :: slot 963 964 PUSH_SUB(fft_get_dims) 965 966 slot = fft%slot 967 rs_n_global(1:3) = fft_array(slot)%rs_n_global(1:3) 968 fs_n_global(1:3) = fft_array(slot)%fs_n_global(1:3) 969 rs_n(1:3) = fft_array(slot)%rs_n(1:3) 970 fs_n(1:3) = fft_array(slot)%fs_n(1:3) 971 rs_istart(1:3) = fft_array(slot)%rs_istart(1:3) 972 fs_istart(1:3) = fft_array(slot)%fs_istart(1:3) 973 974 POP_SUB(fft_get_dims) 975 end subroutine fft_get_dims 976 977 ! --------------------------------------------------------- 978 !> convert between array index and G-vector 979 function pad_feq(ii, nn, mode) 980 integer, intent(in) :: ii,nn 981 logical, intent(in) :: mode 982 integer :: pad_feq 983 984 ! no push_sub: called too frequently 985 986 if(mode) then ! index to frequency number 987 if( ii <= nn/2 + 1 ) then 988 pad_feq = ii - 1 989 else 990 pad_feq = ii - nn - 1 991 end if 992 else 993 if( ii >= 0 ) then 994 pad_feq = ii + 1 995 else 996 pad_feq = ii + nn + 1 997 end if 998 end if 999 1000 return 1001 end function pad_feq 1002 1003 ! ------------------------------------------------------- 1004 1005 integer function fft_size(size, factors) 1006 integer, intent(in) :: size 1007 integer, intent(in) :: factors(:) 1008 1009 integer :: nfactors 1010 integer :: nondiv 1011 integer, allocatable :: exponents(:) 1012 1013 PUSH_SUB(fft_size) 1014 1015 nfactors = ubound(factors, dim = 1) 1016 1017 SAFE_ALLOCATE(exponents(1:nfactors)) 1018 1019 fft_size = size 1020 do 1021 call get_exponents(fft_size, nfactors, factors, exponents, nondiv) 1022 if(nondiv == 1) exit 1023 fft_size = fft_size + 1 1024 end do 1025 1026 SAFE_DEALLOCATE_A(exponents) 1027 1028 POP_SUB(fft_size) 1029 end function fft_size 1030 1031 ! ------------------------------------------------------- 1032 1033 subroutine get_exponents(num, nfactors, factors, exponents, nondiv) 1034 integer, intent(in) :: num 1035 integer, intent(in) :: nfactors 1036 integer, intent(in) :: factors(:) 1037 integer, intent(out) :: exponents(:) 1038 integer, intent(out) :: nondiv 1039 1040 integer :: ifactor 1041 1042 PUSH_SUB(get_exponents) 1043 1044 nondiv = num 1045 do ifactor = 1, nfactors 1046 exponents(ifactor) = 0 1047 do 1048 if(mod(nondiv, factors(ifactor)) /= 0) exit 1049 nondiv = nondiv/factors(ifactor) 1050 exponents(ifactor) = exponents(ifactor) + 1 1051 end do 1052 end do 1053 1054 POP_SUB(get_exponents) 1055 end subroutine get_exponents 1056 1057 1058 ! ---------------------------------------------------------- 1059 1060 subroutine fft_operation_count(fft) 1061 type(fft_t), intent(in) :: fft 1062 1063 real(8) :: fullsize 1064 1065 PUSH_SUB(fft_operation_count) 1066 1067 fullsize = product(TOFLOAT(fft%fs_n(1:3))) 1068 call profiling_count_operations(CNST(5.0)*fullsize*log(fullsize)/log(M_TWO)) 1069 1070 POP_SUB(fft_operation_count) 1071 end subroutine fft_operation_count 1072 1073 1074 ! ---------------------------------------------------------- 1075 1076 !> This function returns the factor required to normalize a function 1077 !> after a forward and backward transform. 1078 FLOAT pure function fft_scaling_factor(fft) result(scaling_factor) 1079 type(fft_t), intent(in) :: fft 1080 1081 ! for the moment this factor is handled by the backwards transform for most libraries 1082 scaling_factor = M_ONE 1083 1084 select case (fft_array(fft%slot)%library) 1085 case(FFTLIB_ACCEL) 1086#ifdef HAVE_CUDA 1087 scaling_factor = & 1088 M_ONE/(fft_array(fft%slot)%rs_n_global(1)*fft_array(fft%slot)%rs_n_global(2)*fft_array(fft%slot)%rs_n_global(3)) 1089#endif 1090 end select 1091 1092 end function fft_scaling_factor 1093 1094#include "undef.F90" 1095#include "real.F90" 1096#include "fft_inc.F90" 1097 1098#include "undef.F90" 1099#include "complex.F90" 1100#include "fft_inc.F90" 1101 1102end module fft_oct_m 1103 1104!! Local Variables: 1105!! mode: f90 1106!! coding: utf-8 1107!! End: 1108