1 //////////////////////////////////////////////////////////////////////// 2 // 3 // Copyright (C) 2001-2021 The Octave Project Developers 4 // 5 // See the file COPYRIGHT.md in the top-level directory of this 6 // distribution or <https://octave.org/copyright/>. 7 // 8 // This file is part of Octave. 9 // 10 // Octave is free software: you can redistribute it and/or modify it 11 // under the terms of the GNU General Public License as published by 12 // the Free Software Foundation, either version 3 of the License, or 13 // (at your option) any later version. 14 // 15 // Octave is distributed in the hope that it will be useful, but 16 // WITHOUT ANY WARRANTY; without even the implied warranty of 17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 // GNU General Public License for more details. 19 // 20 // You should have received a copy of the GNU General Public License 21 // along with Octave; see the file COPYING. If not, see 22 // <https://www.gnu.org/licenses/>. 23 // 24 //////////////////////////////////////////////////////////////////////// 25 26 #if defined (HAVE_CONFIG_H) 27 # include "config.h" 28 #endif 29 30 #if defined (HAVE_FFTW3_H) 31 # include <fftw3.h> 32 #endif 33 34 #include "lo-error.h" 35 #include "oct-fftw.h" 36 #include "oct-locbuf.h" 37 #include "quit.h" 38 #include "singleton-cleanup.h" 39 40 #if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS) 41 # include "nproc-wrapper.h" 42 #endif 43 44 namespace octave 45 { 46 #if defined (HAVE_FFTW) 47 48 fftw_planner *fftw_planner::instance = nullptr; 49 50 // Helper class to create and cache FFTW plans for both 1D and 51 // 2D. This implementation defaults to using FFTW_ESTIMATE to create 52 // the plans, which in theory is suboptimal, but provides quite 53 // reasonable performance in practice. 54 55 // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3 56 // will destroy the input and output arrays. We must, therefore, create a 57 // temporary input array with the same size and 16-byte alignment as 58 // the original array when using a different planner strategy. 59 // Note that we also use any wisdom that is available, either in a 60 // FFTW3 system wide file or as supplied by the user. 61 62 // FIXME: if we can ensure 16 byte alignment in Array<T> 63 // (<T> *data) the FFTW3 can use SIMD instructions for further 64 // acceleration. 65 66 // Note that it is profitable to store the FFTW3 plans, for small FFTs. 67 fftw_planner(void)68 fftw_planner::fftw_planner (void) 69 : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (), 70 rsimd_align (false), nthreads (1) 71 { 72 plan[0] = plan[1] = nullptr; 73 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; 74 simd_align[0] = simd_align[1] = false; 75 inplace[0] = inplace[1] = false; 76 n[0] = n[1] = dim_vector (); 77 78 #if defined (HAVE_FFTW3_THREADS) 79 int init_ret = fftw_init_threads (); 80 if (! init_ret) 81 (*current_liboctave_error_handler) ("Error initializing FFTW threads"); 82 83 // Use number of processors available to the current process 84 // This can be later changed with fftw ("threads", nthreads). 85 nthreads = octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE); 86 fftw_plan_with_nthreads (nthreads); 87 #endif 88 89 // If we have a system wide wisdom file, import it. 90 fftw_import_system_wisdom (); 91 } 92 ~fftw_planner(void)93 fftw_planner::~fftw_planner (void) 94 { 95 fftw_plan *plan_p; 96 97 plan_p = reinterpret_cast<fftw_plan *> (&rplan); 98 if (*plan_p) 99 fftw_destroy_plan (*plan_p); 100 101 plan_p = reinterpret_cast<fftw_plan *> (&plan[0]); 102 if (*plan_p) 103 fftw_destroy_plan (*plan_p); 104 105 plan_p = reinterpret_cast<fftw_plan *> (&plan[1]); 106 if (*plan_p) 107 fftw_destroy_plan (*plan_p); 108 } 109 110 bool instance_ok(void)111 fftw_planner::instance_ok (void) 112 { 113 bool retval = true; 114 115 if (! instance) 116 { 117 instance = new fftw_planner (); 118 singleton_cleanup_list::add (cleanup_instance); 119 } 120 121 return retval; 122 } 123 124 void threads(int nt)125 fftw_planner::threads (int nt) 126 { 127 #if defined (HAVE_FFTW3_THREADS) 128 if (instance_ok () && nt != threads ()) 129 { 130 instance->nthreads = nt; 131 fftw_plan_with_nthreads (nt); 132 // Clear the current plans. 133 instance->rplan = instance->plan[0] = instance->plan[1] = nullptr; 134 } 135 #else 136 octave_unused_parameter (nt); 137 138 (*current_liboctave_warning_handler) 139 ("unable to change number of threads without FFTW thread support"); 140 #endif 141 } 142 143 #define CHECK_SIMD_ALIGNMENT(x) \ 144 (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0) 145 146 void * do_create_plan(int dir,const int rank,const dim_vector & dims,octave_idx_type howmany,octave_idx_type stride,octave_idx_type dist,const Complex * in,Complex * out)147 fftw_planner::do_create_plan (int dir, const int rank, 148 const dim_vector& dims, 149 octave_idx_type howmany, 150 octave_idx_type stride, 151 octave_idx_type dist, 152 const Complex *in, Complex *out) 153 { 154 int which = (dir == FFTW_FORWARD) ? 0 : 1; 155 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&plan[which]); 156 bool create_new_plan = false; 157 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); 158 bool ioinplace = (in == out); 159 160 // Don't create a new plan if we have a non SIMD plan already but 161 // can do SIMD. This prevents endlessly recreating plans if we 162 // change the alignment. 163 164 if (plan[which] == nullptr || d[which] != dist || s[which] != stride 165 || r[which] != rank || h[which] != howmany 166 || ioinplace != inplace[which] 167 || ((ioalign != simd_align[which]) ? ! ioalign : false)) 168 create_new_plan = true; 169 else 170 { 171 // We still might not have the same shape of array. 172 173 for (int i = 0; i < rank; i++) 174 if (dims(i) != n[which](i)) 175 { 176 create_new_plan = true; 177 break; 178 } 179 } 180 181 if (create_new_plan) 182 { 183 d[which] = dist; 184 s[which] = stride; 185 r[which] = rank; 186 h[which] = howmany; 187 simd_align[which] = ioalign; 188 inplace[which] = ioinplace; 189 n[which] = dims; 190 191 // Note reversal of dimensions for column major storage in FFTW. 192 octave_idx_type nn = 1; 193 OCTAVE_LOCAL_BUFFER (int, tmp, rank); 194 195 for (int i = 0, j = rank-1; i < rank; i++, j--) 196 { 197 tmp[i] = dims(j); 198 nn *= dims(j); 199 } 200 201 int plan_flags = 0; 202 bool plan_destroys_in = true; 203 204 switch (meth) 205 { 206 case UNKNOWN: 207 case ESTIMATE: 208 plan_flags |= FFTW_ESTIMATE; 209 plan_destroys_in = false; 210 break; 211 case MEASURE: 212 plan_flags |= FFTW_MEASURE; 213 break; 214 case PATIENT: 215 plan_flags |= FFTW_PATIENT; 216 break; 217 case EXHAUSTIVE: 218 plan_flags |= FFTW_EXHAUSTIVE; 219 break; 220 case HYBRID: 221 if (nn < 8193) 222 plan_flags |= FFTW_MEASURE; 223 else 224 { 225 plan_flags |= FFTW_ESTIMATE; 226 plan_destroys_in = false; 227 } 228 break; 229 } 230 231 if (ioalign) 232 plan_flags &= ~FFTW_UNALIGNED; 233 else 234 plan_flags |= FFTW_UNALIGNED; 235 236 if (*cur_plan_p) 237 fftw_destroy_plan (*cur_plan_p); 238 239 if (plan_destroys_in) 240 { 241 // Create matrix with the same size and 16-byte alignment as input 242 OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32); 243 itmp = reinterpret_cast<Complex *> 244 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + 245 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); 246 247 *cur_plan_p 248 = fftw_plan_many_dft (rank, tmp, howmany, 249 reinterpret_cast<fftw_complex *> (itmp), 250 nullptr, stride, dist, 251 reinterpret_cast<fftw_complex *> (out), 252 nullptr, stride, dist, dir, plan_flags); 253 } 254 else 255 { 256 *cur_plan_p 257 = fftw_plan_many_dft (rank, tmp, howmany, 258 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), 259 nullptr, stride, dist, 260 reinterpret_cast<fftw_complex *> (out), 261 nullptr, stride, dist, dir, plan_flags); 262 } 263 264 if (*cur_plan_p == nullptr) 265 (*current_liboctave_error_handler) ("Error creating fftw plan"); 266 } 267 268 return *cur_plan_p; 269 } 270 271 void * do_create_plan(const int rank,const dim_vector & dims,octave_idx_type howmany,octave_idx_type stride,octave_idx_type dist,const double * in,Complex * out)272 fftw_planner::do_create_plan (const int rank, const dim_vector& dims, 273 octave_idx_type howmany, 274 octave_idx_type stride, 275 octave_idx_type dist, 276 const double *in, Complex *out) 277 { 278 fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&rplan); 279 bool create_new_plan = false; 280 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); 281 282 // Don't create a new plan if we have a non SIMD plan already but 283 // can do SIMD. This prevents endlessly recreating plans if we 284 // change the alignment. 285 286 if (rplan == nullptr || rd != dist || rs != stride || rr != rank 287 || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false)) 288 create_new_plan = true; 289 else 290 { 291 // We still might not have the same shape of array. 292 293 for (int i = 0; i < rank; i++) 294 if (dims(i) != rn(i)) 295 { 296 create_new_plan = true; 297 break; 298 } 299 } 300 301 if (create_new_plan) 302 { 303 rd = dist; 304 rs = stride; 305 rr = rank; 306 rh = howmany; 307 rsimd_align = ioalign; 308 rn = dims; 309 310 // Note reversal of dimensions for column major storage in FFTW. 311 octave_idx_type nn = 1; 312 OCTAVE_LOCAL_BUFFER (int, tmp, rank); 313 314 for (int i = 0, j = rank-1; i < rank; i++, j--) 315 { 316 tmp[i] = dims(j); 317 nn *= dims(j); 318 } 319 320 int plan_flags = 0; 321 bool plan_destroys_in = true; 322 323 switch (meth) 324 { 325 case UNKNOWN: 326 case ESTIMATE: 327 plan_flags |= FFTW_ESTIMATE; 328 plan_destroys_in = false; 329 break; 330 case MEASURE: 331 plan_flags |= FFTW_MEASURE; 332 break; 333 case PATIENT: 334 plan_flags |= FFTW_PATIENT; 335 break; 336 case EXHAUSTIVE: 337 plan_flags |= FFTW_EXHAUSTIVE; 338 break; 339 case HYBRID: 340 if (nn < 8193) 341 plan_flags |= FFTW_MEASURE; 342 else 343 { 344 plan_flags |= FFTW_ESTIMATE; 345 plan_destroys_in = false; 346 } 347 break; 348 } 349 350 if (ioalign) 351 plan_flags &= ~FFTW_UNALIGNED; 352 else 353 plan_flags |= FFTW_UNALIGNED; 354 355 if (*cur_plan_p) 356 fftw_destroy_plan (*cur_plan_p); 357 358 if (plan_destroys_in) 359 { 360 // Create matrix with the same size and 16-byte alignment as input 361 OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32); 362 itmp = reinterpret_cast<double *> 363 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + 364 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); 365 366 *cur_plan_p 367 = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp, 368 nullptr, stride, dist, 369 reinterpret_cast<fftw_complex *> (out), 370 nullptr, stride, dist, plan_flags); 371 } 372 else 373 { 374 *cur_plan_p 375 = fftw_plan_many_dft_r2c (rank, tmp, howmany, 376 (const_cast<double *> (in)), 377 nullptr, stride, dist, 378 reinterpret_cast<fftw_complex *> (out), 379 nullptr, stride, dist, plan_flags); 380 } 381 382 if (*cur_plan_p == nullptr) 383 (*current_liboctave_error_handler) ("Error creating fftw plan"); 384 } 385 386 return *cur_plan_p; 387 } 388 389 fftw_planner::FftwMethod do_method(void)390 fftw_planner::do_method (void) 391 { 392 return meth; 393 } 394 395 fftw_planner::FftwMethod do_method(FftwMethod _meth)396 fftw_planner::do_method (FftwMethod _meth) 397 { 398 FftwMethod ret = meth; 399 if (_meth == ESTIMATE || _meth == MEASURE 400 || _meth == PATIENT || _meth == EXHAUSTIVE 401 || _meth == HYBRID) 402 { 403 if (meth != _meth) 404 { 405 meth = _meth; 406 if (rplan) 407 fftw_destroy_plan (reinterpret_cast<fftw_plan> (rplan)); 408 if (plan[0]) 409 fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[0])); 410 if (plan[1]) 411 fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[1])); 412 rplan = plan[0] = plan[1] = nullptr; 413 } 414 } 415 else 416 ret = UNKNOWN; 417 return ret; 418 } 419 420 float_fftw_planner *float_fftw_planner::instance = nullptr; 421 float_fftw_planner(void)422 float_fftw_planner::float_fftw_planner (void) 423 : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (), 424 rsimd_align (false), nthreads (1) 425 { 426 plan[0] = plan[1] = nullptr; 427 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; 428 simd_align[0] = simd_align[1] = false; 429 inplace[0] = inplace[1] = false; 430 n[0] = n[1] = dim_vector (); 431 432 #if defined (HAVE_FFTW3F_THREADS) 433 int init_ret = fftwf_init_threads (); 434 if (! init_ret) 435 (*current_liboctave_error_handler) ("Error initializing FFTW3F threads"); 436 437 // Use number of processors available to the current process 438 // This can be later changed with fftw ("threads", nthreads). 439 nthreads = octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE); 440 fftwf_plan_with_nthreads (nthreads); 441 #endif 442 443 // If we have a system wide wisdom file, import it. 444 fftwf_import_system_wisdom (); 445 } 446 ~float_fftw_planner(void)447 float_fftw_planner::~float_fftw_planner (void) 448 { 449 fftwf_plan *plan_p; 450 451 plan_p = reinterpret_cast<fftwf_plan *> (&rplan); 452 if (*plan_p) 453 fftwf_destroy_plan (*plan_p); 454 455 plan_p = reinterpret_cast<fftwf_plan *> (&plan[0]); 456 if (*plan_p) 457 fftwf_destroy_plan (*plan_p); 458 459 plan_p = reinterpret_cast<fftwf_plan *> (&plan[1]); 460 if (*plan_p) 461 fftwf_destroy_plan (*plan_p); 462 } 463 464 bool instance_ok(void)465 float_fftw_planner::instance_ok (void) 466 { 467 bool retval = true; 468 469 if (! instance) 470 { 471 instance = new float_fftw_planner (); 472 singleton_cleanup_list::add (cleanup_instance); 473 } 474 475 return retval; 476 } 477 478 void threads(int nt)479 float_fftw_planner::threads (int nt) 480 { 481 #if defined (HAVE_FFTW3F_THREADS) 482 if (instance_ok () && nt != threads ()) 483 { 484 instance->nthreads = nt; 485 fftwf_plan_with_nthreads (nt); 486 // Clear the current plans. 487 instance->rplan = instance->plan[0] = instance->plan[1] = nullptr; 488 } 489 #else 490 octave_unused_parameter (nt); 491 492 (*current_liboctave_warning_handler) 493 ("unable to change number of threads without FFTW thread support"); 494 #endif 495 } 496 497 void * do_create_plan(int dir,const int rank,const dim_vector & dims,octave_idx_type howmany,octave_idx_type stride,octave_idx_type dist,const FloatComplex * in,FloatComplex * out)498 float_fftw_planner::do_create_plan (int dir, const int rank, 499 const dim_vector& dims, 500 octave_idx_type howmany, 501 octave_idx_type stride, 502 octave_idx_type dist, 503 const FloatComplex *in, 504 FloatComplex *out) 505 { 506 int which = (dir == FFTW_FORWARD) ? 0 : 1; 507 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&plan[which]); 508 bool create_new_plan = false; 509 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); 510 bool ioinplace = (in == out); 511 512 // Don't create a new plan if we have a non SIMD plan already but 513 // can do SIMD. This prevents endlessly recreating plans if we 514 // change the alignment. 515 516 if (plan[which] == nullptr || d[which] != dist || s[which] != stride 517 || r[which] != rank || h[which] != howmany 518 || ioinplace != inplace[which] 519 || ((ioalign != simd_align[which]) ? ! ioalign : false)) 520 create_new_plan = true; 521 else 522 { 523 // We still might not have the same shape of array. 524 525 for (int i = 0; i < rank; i++) 526 if (dims(i) != n[which](i)) 527 { 528 create_new_plan = true; 529 break; 530 } 531 } 532 533 if (create_new_plan) 534 { 535 d[which] = dist; 536 s[which] = stride; 537 r[which] = rank; 538 h[which] = howmany; 539 simd_align[which] = ioalign; 540 inplace[which] = ioinplace; 541 n[which] = dims; 542 543 // Note reversal of dimensions for column major storage in FFTW. 544 octave_idx_type nn = 1; 545 OCTAVE_LOCAL_BUFFER (int, tmp, rank); 546 547 for (int i = 0, j = rank-1; i < rank; i++, j--) 548 { 549 tmp[i] = dims(j); 550 nn *= dims(j); 551 } 552 553 int plan_flags = 0; 554 bool plan_destroys_in = true; 555 556 switch (meth) 557 { 558 case UNKNOWN: 559 case ESTIMATE: 560 plan_flags |= FFTW_ESTIMATE; 561 plan_destroys_in = false; 562 break; 563 case MEASURE: 564 plan_flags |= FFTW_MEASURE; 565 break; 566 case PATIENT: 567 plan_flags |= FFTW_PATIENT; 568 break; 569 case EXHAUSTIVE: 570 plan_flags |= FFTW_EXHAUSTIVE; 571 break; 572 case HYBRID: 573 if (nn < 8193) 574 plan_flags |= FFTW_MEASURE; 575 else 576 { 577 plan_flags |= FFTW_ESTIMATE; 578 plan_destroys_in = false; 579 } 580 break; 581 } 582 583 if (ioalign) 584 plan_flags &= ~FFTW_UNALIGNED; 585 else 586 plan_flags |= FFTW_UNALIGNED; 587 588 if (*cur_plan_p) 589 fftwf_destroy_plan (*cur_plan_p); 590 591 if (plan_destroys_in) 592 { 593 // Create matrix with the same size and 16-byte alignment as input 594 OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32); 595 itmp = reinterpret_cast<FloatComplex *> 596 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + 597 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); 598 599 *cur_plan_p 600 = fftwf_plan_many_dft (rank, tmp, howmany, 601 reinterpret_cast<fftwf_complex *> (itmp), 602 nullptr, stride, dist, 603 reinterpret_cast<fftwf_complex *> (out), 604 nullptr, stride, dist, dir, plan_flags); 605 } 606 else 607 { 608 *cur_plan_p 609 = fftwf_plan_many_dft (rank, tmp, howmany, 610 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)), 611 nullptr, stride, dist, 612 reinterpret_cast<fftwf_complex *> (out), 613 nullptr, stride, dist, dir, plan_flags); 614 } 615 616 if (*cur_plan_p == nullptr) 617 (*current_liboctave_error_handler) ("Error creating fftw plan"); 618 } 619 620 return *cur_plan_p; 621 } 622 623 void * do_create_plan(const int rank,const dim_vector & dims,octave_idx_type howmany,octave_idx_type stride,octave_idx_type dist,const float * in,FloatComplex * out)624 float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims, 625 octave_idx_type howmany, 626 octave_idx_type stride, 627 octave_idx_type dist, 628 const float *in, FloatComplex *out) 629 { 630 fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&rplan); 631 bool create_new_plan = false; 632 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); 633 634 // Don't create a new plan if we have a non SIMD plan already but 635 // can do SIMD. This prevents endlessly recreating plans if we 636 // change the alignment. 637 638 if (rplan == nullptr || rd != dist || rs != stride || rr != rank 639 || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false)) 640 create_new_plan = true; 641 else 642 { 643 // We still might not have the same shape of array. 644 645 for (int i = 0; i < rank; i++) 646 if (dims(i) != rn(i)) 647 { 648 create_new_plan = true; 649 break; 650 } 651 } 652 653 if (create_new_plan) 654 { 655 rd = dist; 656 rs = stride; 657 rr = rank; 658 rh = howmany; 659 rsimd_align = ioalign; 660 rn = dims; 661 662 // Note reversal of dimensions for column major storage in FFTW. 663 octave_idx_type nn = 1; 664 OCTAVE_LOCAL_BUFFER (int, tmp, rank); 665 666 for (int i = 0, j = rank-1; i < rank; i++, j--) 667 { 668 tmp[i] = dims(j); 669 nn *= dims(j); 670 } 671 672 int plan_flags = 0; 673 bool plan_destroys_in = true; 674 675 switch (meth) 676 { 677 case UNKNOWN: 678 case ESTIMATE: 679 plan_flags |= FFTW_ESTIMATE; 680 plan_destroys_in = false; 681 break; 682 case MEASURE: 683 plan_flags |= FFTW_MEASURE; 684 break; 685 case PATIENT: 686 plan_flags |= FFTW_PATIENT; 687 break; 688 case EXHAUSTIVE: 689 plan_flags |= FFTW_EXHAUSTIVE; 690 break; 691 case HYBRID: 692 if (nn < 8193) 693 plan_flags |= FFTW_MEASURE; 694 else 695 { 696 plan_flags |= FFTW_ESTIMATE; 697 plan_destroys_in = false; 698 } 699 break; 700 } 701 702 if (ioalign) 703 plan_flags &= ~FFTW_UNALIGNED; 704 else 705 plan_flags |= FFTW_UNALIGNED; 706 707 if (*cur_plan_p) 708 fftwf_destroy_plan (*cur_plan_p); 709 710 if (plan_destroys_in) 711 { 712 // Create matrix with the same size and 16-byte alignment as input 713 OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32); 714 itmp = reinterpret_cast<float *> 715 (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) + 716 ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF)); 717 718 *cur_plan_p 719 = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp, 720 nullptr, stride, dist, 721 reinterpret_cast<fftwf_complex *> (out), 722 nullptr, stride, dist, plan_flags); 723 } 724 else 725 { 726 *cur_plan_p 727 = fftwf_plan_many_dft_r2c (rank, tmp, howmany, 728 (const_cast<float *> (in)), 729 nullptr, stride, dist, 730 reinterpret_cast<fftwf_complex *> (out), 731 nullptr, stride, dist, plan_flags); 732 } 733 734 if (*cur_plan_p == nullptr) 735 (*current_liboctave_error_handler) ("Error creating fftw plan"); 736 } 737 738 return *cur_plan_p; 739 } 740 741 float_fftw_planner::FftwMethod do_method(void)742 float_fftw_planner::do_method (void) 743 { 744 return meth; 745 } 746 747 float_fftw_planner::FftwMethod do_method(FftwMethod _meth)748 float_fftw_planner::do_method (FftwMethod _meth) 749 { 750 FftwMethod ret = meth; 751 if (_meth == ESTIMATE || _meth == MEASURE 752 || _meth == PATIENT || _meth == EXHAUSTIVE 753 || _meth == HYBRID) 754 { 755 if (meth != _meth) 756 { 757 meth = _meth; 758 if (rplan) 759 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (rplan)); 760 if (plan[0]) 761 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[0])); 762 if (plan[1]) 763 fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[1])); 764 rplan = plan[0] = plan[1] = nullptr; 765 } 766 } 767 else 768 ret = UNKNOWN; 769 return ret; 770 } 771 772 template <typename T> 773 static inline void convert_packcomplex_1d(T * out,std::size_t nr,std::size_t nc,octave_idx_type stride,octave_idx_type dist)774 convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc, 775 octave_idx_type stride, octave_idx_type dist) 776 { 777 octave_quit (); 778 779 // Fill in the missing data. 780 781 for (std::size_t i = 0; i < nr; i++) 782 for (std::size_t j = nc/2+1; j < nc; j++) 783 out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]); 784 785 octave_quit (); 786 } 787 788 template <typename T> 789 static inline void convert_packcomplex_Nd(T * out,const dim_vector & dv)790 convert_packcomplex_Nd (T *out, const dim_vector& dv) 791 { 792 std::size_t nc = dv(0); 793 std::size_t nr = dv(1); 794 std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1); 795 std::size_t nrp = nr * np; 796 T *ptr1, *ptr2; 797 798 octave_quit (); 799 800 // Create space for the missing elements. 801 802 for (std::size_t i = 0; i < nrp; i++) 803 { 804 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2); 805 ptr2 = out + i * nc; 806 for (std::size_t j = 0; j < nc/2+1; j++) 807 *ptr2++ = *ptr1++; 808 } 809 810 octave_quit (); 811 812 // Fill in the missing data for the rank = 2 case directly for speed. 813 814 for (std::size_t i = 0; i < np; i++) 815 { 816 for (std::size_t j = 1; j < nr; j++) 817 for (std::size_t k = nc/2+1; k < nc; k++) 818 out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]); 819 820 for (std::size_t j = nc/2+1; j < nc; j++) 821 out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]); 822 } 823 824 octave_quit (); 825 826 // Now do the permutations needed for rank > 2 cases. 827 828 std::size_t jstart = dv(0) * dv(1); 829 std::size_t kstep = dv(0); 830 std::size_t nel = dv.numel (); 831 832 for (int inner = 2; inner < dv.ndims (); inner++) 833 { 834 std::size_t jmax = jstart * dv(inner); 835 for (std::size_t i = 0; i < nel; i+=jmax) 836 for (std::size_t j = jstart, jj = jmax-jstart; j < jj; 837 j+=jstart, jj-=jstart) 838 for (std::size_t k = 0; k < jstart; k+= kstep) 839 for (std::size_t l = nc/2+1; l < nc; l++) 840 { 841 T tmp = out[i+ j + k + l]; 842 out[i + j + k + l] = out[i + jj + k + l]; 843 out[i + jj + k + l] = tmp; 844 } 845 jstart = jmax; 846 } 847 848 octave_quit (); 849 } 850 851 int fft(const double * in,Complex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)852 fftw::fft (const double *in, Complex *out, std::size_t npts, 853 std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) 854 { 855 dist = (dist < 0 ? npts : dist); 856 857 dim_vector dv (npts, 1); 858 void *vplan = fftw_planner::create_plan (1, dv, nsamples, 859 stride, dist, in, out); 860 fftw_plan plan = reinterpret_cast<fftw_plan> (vplan); 861 862 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), 863 reinterpret_cast<fftw_complex *> (out)); 864 865 // Need to create other half of the transform. 866 867 convert_packcomplex_1d (out, nsamples, npts, stride, dist); 868 869 return 0; 870 } 871 872 int fft(const Complex * in,Complex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)873 fftw::fft (const Complex *in, Complex *out, std::size_t npts, 874 std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) 875 { 876 dist = (dist < 0 ? npts : dist); 877 878 dim_vector dv (npts, 1); 879 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv, 880 nsamples, stride, 881 dist, in, out); 882 fftw_plan plan = reinterpret_cast<fftw_plan> (vplan); 883 884 fftw_execute_dft (plan, 885 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), 886 reinterpret_cast<fftw_complex *> (out)); 887 888 return 0; 889 } 890 891 int ifft(const Complex * in,Complex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)892 fftw::ifft (const Complex *in, Complex *out, std::size_t npts, 893 std::size_t nsamples, octave_idx_type stride, 894 octave_idx_type dist) 895 { 896 dist = (dist < 0 ? npts : dist); 897 898 dim_vector dv (npts, 1); 899 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples, 900 stride, dist, in, out); 901 fftw_plan plan = reinterpret_cast<fftw_plan> (vplan); 902 903 fftw_execute_dft (plan, 904 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), 905 reinterpret_cast<fftw_complex *> (out)); 906 907 const Complex scale = npts; 908 for (std::size_t j = 0; j < nsamples; j++) 909 for (std::size_t i = 0; i < npts; i++) 910 out[i*stride + j*dist] /= scale; 911 912 return 0; 913 } 914 915 int fftNd(const double * in,Complex * out,const int rank,const dim_vector & dv)916 fftw::fftNd (const double *in, Complex *out, const int rank, 917 const dim_vector& dv) 918 { 919 octave_idx_type dist = 1; 920 for (int i = 0; i < rank; i++) 921 dist *= dv(i); 922 923 // Fool with the position of the start of the output matrix, so that 924 // creating other half of the matrix won't cause cache problems. 925 926 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); 927 928 void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist, 929 in, out + offset); 930 fftw_plan plan = reinterpret_cast<fftw_plan> (vplan); 931 932 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), 933 reinterpret_cast<fftw_complex *> (out+ offset)); 934 935 // Need to create other half of the transform. 936 937 convert_packcomplex_Nd (out, dv); 938 939 return 0; 940 } 941 942 int fftNd(const Complex * in,Complex * out,const int rank,const dim_vector & dv)943 fftw::fftNd (const Complex *in, Complex *out, const int rank, 944 const dim_vector& dv) 945 { 946 octave_idx_type dist = 1; 947 for (int i = 0; i < rank; i++) 948 dist *= dv(i); 949 950 void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv, 951 1, 1, dist, in, out); 952 fftw_plan plan = reinterpret_cast<fftw_plan> (vplan); 953 954 fftw_execute_dft (plan, 955 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), 956 reinterpret_cast<fftw_complex *> (out)); 957 958 return 0; 959 } 960 961 int ifftNd(const Complex * in,Complex * out,const int rank,const dim_vector & dv)962 fftw::ifftNd (const Complex *in, Complex *out, const int rank, 963 const dim_vector& dv) 964 { 965 octave_idx_type dist = 1; 966 for (int i = 0; i < rank; i++) 967 dist *= dv(i); 968 969 void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv, 970 1, 1, dist, in, out); 971 fftw_plan plan = reinterpret_cast<fftw_plan> (vplan); 972 973 fftw_execute_dft (plan, 974 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), 975 reinterpret_cast<fftw_complex *> (out)); 976 977 const std::size_t npts = dv.numel (); 978 const Complex scale = npts; 979 for (std::size_t i = 0; i < npts; i++) 980 out[i] /= scale; 981 982 return 0; 983 } 984 985 int fft(const float * in,FloatComplex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)986 fftw::fft (const float *in, FloatComplex *out, std::size_t npts, 987 std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) 988 { 989 dist = (dist < 0 ? npts : dist); 990 991 dim_vector dv (npts, 1); 992 void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride, 993 dist, in, out); 994 fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan); 995 996 fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)), 997 reinterpret_cast<fftwf_complex *> (out)); 998 999 // Need to create other half of the transform. 1000 1001 convert_packcomplex_1d (out, nsamples, npts, stride, dist); 1002 1003 return 0; 1004 } 1005 1006 int fft(const FloatComplex * in,FloatComplex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)1007 fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts, 1008 std::size_t nsamples, octave_idx_type stride, octave_idx_type dist) 1009 { 1010 dist = (dist < 0 ? npts : dist); 1011 1012 dim_vector dv (npts, 1); 1013 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv, 1014 nsamples, stride, dist, 1015 in, out); 1016 fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan); 1017 1018 fftwf_execute_dft (plan, 1019 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), 1020 reinterpret_cast<fftwf_complex *> (out)); 1021 1022 return 0; 1023 } 1024 1025 int ifft(const FloatComplex * in,FloatComplex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)1026 fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts, 1027 std::size_t nsamples, octave_idx_type stride, 1028 octave_idx_type dist) 1029 { 1030 dist = (dist < 0 ? npts : dist); 1031 1032 dim_vector dv (npts, 1); 1033 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, 1034 nsamples, stride, dist, 1035 in, out); 1036 fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan); 1037 1038 fftwf_execute_dft (plan, 1039 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), 1040 reinterpret_cast<fftwf_complex *> (out)); 1041 1042 const FloatComplex scale = npts; 1043 for (std::size_t j = 0; j < nsamples; j++) 1044 for (std::size_t i = 0; i < npts; i++) 1045 out[i*stride + j*dist] /= scale; 1046 1047 return 0; 1048 } 1049 1050 int fftNd(const float * in,FloatComplex * out,const int rank,const dim_vector & dv)1051 fftw::fftNd (const float *in, FloatComplex *out, const int rank, 1052 const dim_vector& dv) 1053 { 1054 octave_idx_type dist = 1; 1055 for (int i = 0; i < rank; i++) 1056 dist *= dv(i); 1057 1058 // Fool with the position of the start of the output matrix, so that 1059 // creating other half of the matrix won't cause cache problems. 1060 1061 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); 1062 1063 void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist, 1064 in, out + offset); 1065 fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan); 1066 1067 fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)), 1068 reinterpret_cast<fftwf_complex *> (out+ offset)); 1069 1070 // Need to create other half of the transform. 1071 1072 convert_packcomplex_Nd (out, dv); 1073 1074 return 0; 1075 } 1076 1077 int fftNd(const FloatComplex * in,FloatComplex * out,const int rank,const dim_vector & dv)1078 fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank, 1079 const dim_vector& dv) 1080 { 1081 octave_idx_type dist = 1; 1082 for (int i = 0; i < rank; i++) 1083 dist *= dv(i); 1084 1085 void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv, 1086 1, 1, dist, in, out); 1087 fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan); 1088 1089 fftwf_execute_dft (plan, 1090 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), 1091 reinterpret_cast<fftwf_complex *> (out)); 1092 1093 return 0; 1094 } 1095 1096 int ifftNd(const FloatComplex * in,FloatComplex * out,const int rank,const dim_vector & dv)1097 fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank, 1098 const dim_vector& dv) 1099 { 1100 octave_idx_type dist = 1; 1101 for (int i = 0; i < rank; i++) 1102 dist *= dv(i); 1103 1104 void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv, 1105 1, 1, dist, in, out); 1106 fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan); 1107 1108 fftwf_execute_dft (plan, 1109 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), 1110 reinterpret_cast<fftwf_complex *> (out)); 1111 1112 const std::size_t npts = dv.numel (); 1113 const FloatComplex scale = npts; 1114 for (std::size_t i = 0; i < npts; i++) 1115 out[i] /= scale; 1116 1117 return 0; 1118 } 1119 1120 #endif 1121 1122 std::string fftw_version(void)1123 fftw_version (void) 1124 { 1125 #if defined (HAVE_FFTW) 1126 return ::fftw_version; 1127 #else 1128 return "none"; 1129 #endif 1130 } 1131 1132 std::string fftwf_version(void)1133 fftwf_version (void) 1134 { 1135 #if defined (HAVE_FFTW) 1136 return ::fftwf_version; 1137 #else 1138 return "none"; 1139 #endif 1140 } 1141 } 1142