1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2001-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING.  If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 #if defined (HAVE_CONFIG_H)
27 #  include "config.h"
28 #endif
29 
30 #if defined (HAVE_FFTW3_H)
31 #  include <fftw3.h>
32 #endif
33 
34 #include "lo-error.h"
35 #include "oct-fftw.h"
36 #include "oct-locbuf.h"
37 #include "quit.h"
38 #include "singleton-cleanup.h"
39 
40 #if defined (HAVE_FFTW3_THREADS) || defined (HAVE_FFTW3F_THREADS)
41 #  include "nproc-wrapper.h"
42 #endif
43 
44 namespace octave
45 {
46 #if defined (HAVE_FFTW)
47 
48   fftw_planner *fftw_planner::instance = nullptr;
49 
50   // Helper class to create and cache FFTW plans for both 1D and
51   // 2D.  This implementation defaults to using FFTW_ESTIMATE to create
52   // the plans, which in theory is suboptimal, but provides quite
53   // reasonable performance in practice.
54 
55   // Also note that if FFTW_ESTIMATE is not used then the planner in FFTW3
56   // will destroy the input and output arrays.  We must, therefore, create a
57   // temporary input array with the same size and 16-byte alignment as
58   // the original array when using a different planner strategy.
59   // Note that we also use any wisdom that is available, either in a
60   // FFTW3 system wide file or as supplied by the user.
61 
62   // FIXME: if we can ensure 16 byte alignment in Array<T>
63   // (<T> *data) the FFTW3 can use SIMD instructions for further
64   // acceleration.
65 
66   // Note that it is profitable to store the FFTW3 plans, for small FFTs.
67 
fftw_planner(void)68   fftw_planner::fftw_planner (void)
69     : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (),
70       rsimd_align (false), nthreads (1)
71   {
72     plan[0] = plan[1] = nullptr;
73     d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
74     simd_align[0] = simd_align[1] = false;
75     inplace[0] = inplace[1] = false;
76     n[0] = n[1] = dim_vector ();
77 
78 #if defined (HAVE_FFTW3_THREADS)
79     int init_ret = fftw_init_threads ();
80     if (! init_ret)
81       (*current_liboctave_error_handler) ("Error initializing FFTW threads");
82 
83     // Use number of processors available to the current process
84     // This can be later changed with fftw ("threads", nthreads).
85     nthreads = octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE);
86     fftw_plan_with_nthreads (nthreads);
87 #endif
88 
89     // If we have a system wide wisdom file, import it.
90     fftw_import_system_wisdom ();
91   }
92 
~fftw_planner(void)93   fftw_planner::~fftw_planner (void)
94   {
95     fftw_plan *plan_p;
96 
97     plan_p = reinterpret_cast<fftw_plan *> (&rplan);
98     if (*plan_p)
99       fftw_destroy_plan (*plan_p);
100 
101     plan_p = reinterpret_cast<fftw_plan *> (&plan[0]);
102     if (*plan_p)
103       fftw_destroy_plan (*plan_p);
104 
105     plan_p = reinterpret_cast<fftw_plan *> (&plan[1]);
106     if (*plan_p)
107       fftw_destroy_plan (*plan_p);
108   }
109 
110   bool
instance_ok(void)111   fftw_planner::instance_ok (void)
112   {
113     bool retval = true;
114 
115     if (! instance)
116       {
117         instance = new fftw_planner ();
118         singleton_cleanup_list::add (cleanup_instance);
119       }
120 
121     return retval;
122   }
123 
124   void
threads(int nt)125   fftw_planner::threads (int nt)
126   {
127 #if defined (HAVE_FFTW3_THREADS)
128     if (instance_ok () && nt != threads ())
129       {
130         instance->nthreads = nt;
131         fftw_plan_with_nthreads (nt);
132         // Clear the current plans.
133         instance->rplan = instance->plan[0] = instance->plan[1] = nullptr;
134       }
135 #else
136     octave_unused_parameter (nt);
137 
138     (*current_liboctave_warning_handler)
139       ("unable to change number of threads without FFTW thread support");
140 #endif
141   }
142 
143 #define CHECK_SIMD_ALIGNMENT(x)                         \
144   (((reinterpret_cast<std::ptrdiff_t> (x)) & 0xF) == 0)
145 
146   void *
do_create_plan(int dir,const int rank,const dim_vector & dims,octave_idx_type howmany,octave_idx_type stride,octave_idx_type dist,const Complex * in,Complex * out)147   fftw_planner::do_create_plan (int dir, const int rank,
148                                 const dim_vector& dims,
149                                 octave_idx_type howmany,
150                                 octave_idx_type stride,
151                                 octave_idx_type dist,
152                                 const Complex *in, Complex *out)
153   {
154     int which = (dir == FFTW_FORWARD) ? 0 : 1;
155     fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&plan[which]);
156     bool create_new_plan = false;
157     bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
158     bool ioinplace = (in == out);
159 
160     // Don't create a new plan if we have a non SIMD plan already but
161     // can do SIMD.  This prevents endlessly recreating plans if we
162     // change the alignment.
163 
164     if (plan[which] == nullptr || d[which] != dist || s[which] != stride
165         || r[which] != rank || h[which] != howmany
166         || ioinplace != inplace[which]
167         || ((ioalign != simd_align[which]) ? ! ioalign : false))
168       create_new_plan = true;
169     else
170       {
171         // We still might not have the same shape of array.
172 
173         for (int i = 0; i < rank; i++)
174           if (dims(i) != n[which](i))
175             {
176               create_new_plan = true;
177               break;
178             }
179       }
180 
181     if (create_new_plan)
182       {
183         d[which] = dist;
184         s[which] = stride;
185         r[which] = rank;
186         h[which] = howmany;
187         simd_align[which] = ioalign;
188         inplace[which] = ioinplace;
189         n[which] = dims;
190 
191         // Note reversal of dimensions for column major storage in FFTW.
192         octave_idx_type nn = 1;
193         OCTAVE_LOCAL_BUFFER (int, tmp, rank);
194 
195         for (int i = 0, j = rank-1; i < rank; i++, j--)
196           {
197             tmp[i] = dims(j);
198             nn *= dims(j);
199           }
200 
201         int plan_flags = 0;
202         bool plan_destroys_in = true;
203 
204         switch (meth)
205           {
206           case UNKNOWN:
207           case ESTIMATE:
208             plan_flags |= FFTW_ESTIMATE;
209             plan_destroys_in = false;
210             break;
211           case MEASURE:
212             plan_flags |= FFTW_MEASURE;
213             break;
214           case PATIENT:
215             plan_flags |= FFTW_PATIENT;
216             break;
217           case EXHAUSTIVE:
218             plan_flags |= FFTW_EXHAUSTIVE;
219             break;
220           case HYBRID:
221             if (nn < 8193)
222               plan_flags |= FFTW_MEASURE;
223             else
224               {
225                 plan_flags |= FFTW_ESTIMATE;
226                 plan_destroys_in = false;
227               }
228             break;
229           }
230 
231         if (ioalign)
232           plan_flags &= ~FFTW_UNALIGNED;
233         else
234           plan_flags |= FFTW_UNALIGNED;
235 
236         if (*cur_plan_p)
237           fftw_destroy_plan (*cur_plan_p);
238 
239         if (plan_destroys_in)
240           {
241             // Create matrix with the same size and 16-byte alignment as input
242             OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32);
243             itmp = reinterpret_cast<Complex *>
244                    (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
245                     ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
246 
247             *cur_plan_p
248               = fftw_plan_many_dft (rank, tmp, howmany,
249                                     reinterpret_cast<fftw_complex *> (itmp),
250                                     nullptr, stride, dist,
251                                     reinterpret_cast<fftw_complex *> (out),
252                                     nullptr, stride, dist, dir, plan_flags);
253           }
254         else
255           {
256             *cur_plan_p
257               = fftw_plan_many_dft (rank, tmp, howmany,
258                                     reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
259                                     nullptr, stride, dist,
260                                     reinterpret_cast<fftw_complex *> (out),
261                                     nullptr, stride, dist, dir, plan_flags);
262           }
263 
264         if (*cur_plan_p == nullptr)
265           (*current_liboctave_error_handler) ("Error creating fftw plan");
266       }
267 
268     return *cur_plan_p;
269   }
270 
271   void *
do_create_plan(const int rank,const dim_vector & dims,octave_idx_type howmany,octave_idx_type stride,octave_idx_type dist,const double * in,Complex * out)272   fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
273                                 octave_idx_type howmany,
274                                 octave_idx_type stride,
275                                 octave_idx_type dist,
276                                 const double *in, Complex *out)
277   {
278     fftw_plan *cur_plan_p = reinterpret_cast<fftw_plan *> (&rplan);
279     bool create_new_plan = false;
280     bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
281 
282     // Don't create a new plan if we have a non SIMD plan already but
283     // can do SIMD.  This prevents endlessly recreating plans if we
284     // change the alignment.
285 
286     if (rplan == nullptr || rd != dist || rs != stride || rr != rank
287         || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))
288       create_new_plan = true;
289     else
290       {
291         // We still might not have the same shape of array.
292 
293         for (int i = 0; i < rank; i++)
294           if (dims(i) != rn(i))
295             {
296               create_new_plan = true;
297               break;
298             }
299       }
300 
301     if (create_new_plan)
302       {
303         rd = dist;
304         rs = stride;
305         rr = rank;
306         rh = howmany;
307         rsimd_align = ioalign;
308         rn = dims;
309 
310         // Note reversal of dimensions for column major storage in FFTW.
311         octave_idx_type nn = 1;
312         OCTAVE_LOCAL_BUFFER (int, tmp, rank);
313 
314         for (int i = 0, j = rank-1; i < rank; i++, j--)
315           {
316             tmp[i] = dims(j);
317             nn *= dims(j);
318           }
319 
320         int plan_flags = 0;
321         bool plan_destroys_in = true;
322 
323         switch (meth)
324           {
325           case UNKNOWN:
326           case ESTIMATE:
327             plan_flags |= FFTW_ESTIMATE;
328             plan_destroys_in = false;
329             break;
330           case MEASURE:
331             plan_flags |= FFTW_MEASURE;
332             break;
333           case PATIENT:
334             plan_flags |= FFTW_PATIENT;
335             break;
336           case EXHAUSTIVE:
337             plan_flags |= FFTW_EXHAUSTIVE;
338             break;
339           case HYBRID:
340             if (nn < 8193)
341               plan_flags |= FFTW_MEASURE;
342             else
343               {
344                 plan_flags |= FFTW_ESTIMATE;
345                 plan_destroys_in = false;
346               }
347             break;
348           }
349 
350         if (ioalign)
351           plan_flags &= ~FFTW_UNALIGNED;
352         else
353           plan_flags |= FFTW_UNALIGNED;
354 
355         if (*cur_plan_p)
356           fftw_destroy_plan (*cur_plan_p);
357 
358         if (plan_destroys_in)
359           {
360             // Create matrix with the same size and 16-byte alignment as input
361             OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32);
362             itmp = reinterpret_cast<double *>
363                    (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
364                     ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
365 
366             *cur_plan_p
367               = fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp,
368                                         nullptr, stride, dist,
369                                         reinterpret_cast<fftw_complex *> (out),
370                                         nullptr, stride, dist, plan_flags);
371           }
372         else
373           {
374             *cur_plan_p
375               = fftw_plan_many_dft_r2c (rank, tmp, howmany,
376                                         (const_cast<double *> (in)),
377                                         nullptr, stride, dist,
378                                         reinterpret_cast<fftw_complex *> (out),
379                                         nullptr, stride, dist, plan_flags);
380           }
381 
382         if (*cur_plan_p == nullptr)
383           (*current_liboctave_error_handler) ("Error creating fftw plan");
384       }
385 
386     return *cur_plan_p;
387   }
388 
389   fftw_planner::FftwMethod
do_method(void)390   fftw_planner::do_method (void)
391   {
392     return meth;
393   }
394 
395   fftw_planner::FftwMethod
do_method(FftwMethod _meth)396   fftw_planner::do_method (FftwMethod _meth)
397   {
398     FftwMethod ret = meth;
399     if (_meth == ESTIMATE || _meth == MEASURE
400         || _meth == PATIENT || _meth == EXHAUSTIVE
401         || _meth == HYBRID)
402       {
403         if (meth != _meth)
404           {
405             meth = _meth;
406             if (rplan)
407               fftw_destroy_plan (reinterpret_cast<fftw_plan> (rplan));
408             if (plan[0])
409               fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[0]));
410             if (plan[1])
411               fftw_destroy_plan (reinterpret_cast<fftw_plan> (plan[1]));
412             rplan = plan[0] = plan[1] = nullptr;
413           }
414       }
415     else
416       ret = UNKNOWN;
417     return ret;
418   }
419 
420   float_fftw_planner *float_fftw_planner::instance = nullptr;
421 
float_fftw_planner(void)422   float_fftw_planner::float_fftw_planner (void)
423     : meth (ESTIMATE), rplan (nullptr), rd (0), rs (0), rr (0), rh (0), rn (),
424       rsimd_align (false), nthreads (1)
425   {
426     plan[0] = plan[1] = nullptr;
427     d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
428     simd_align[0] = simd_align[1] = false;
429     inplace[0] = inplace[1] = false;
430     n[0] = n[1] = dim_vector ();
431 
432 #if defined (HAVE_FFTW3F_THREADS)
433     int init_ret = fftwf_init_threads ();
434     if (! init_ret)
435       (*current_liboctave_error_handler) ("Error initializing FFTW3F threads");
436 
437     // Use number of processors available to the current process
438     // This can be later changed with fftw ("threads", nthreads).
439     nthreads = octave_num_processors_wrapper (OCTAVE_NPROC_CURRENT_OVERRIDABLE);
440     fftwf_plan_with_nthreads (nthreads);
441 #endif
442 
443     // If we have a system wide wisdom file, import it.
444     fftwf_import_system_wisdom ();
445   }
446 
~float_fftw_planner(void)447   float_fftw_planner::~float_fftw_planner (void)
448   {
449     fftwf_plan *plan_p;
450 
451     plan_p = reinterpret_cast<fftwf_plan *> (&rplan);
452     if (*plan_p)
453       fftwf_destroy_plan (*plan_p);
454 
455     plan_p = reinterpret_cast<fftwf_plan *> (&plan[0]);
456     if (*plan_p)
457       fftwf_destroy_plan (*plan_p);
458 
459     plan_p = reinterpret_cast<fftwf_plan *> (&plan[1]);
460     if (*plan_p)
461       fftwf_destroy_plan (*plan_p);
462   }
463 
464   bool
instance_ok(void)465   float_fftw_planner::instance_ok (void)
466   {
467     bool retval = true;
468 
469     if (! instance)
470       {
471         instance = new float_fftw_planner ();
472         singleton_cleanup_list::add (cleanup_instance);
473       }
474 
475     return retval;
476   }
477 
478   void
threads(int nt)479   float_fftw_planner::threads (int nt)
480   {
481 #if defined (HAVE_FFTW3F_THREADS)
482     if (instance_ok () && nt != threads ())
483       {
484         instance->nthreads = nt;
485         fftwf_plan_with_nthreads (nt);
486         // Clear the current plans.
487         instance->rplan = instance->plan[0] = instance->plan[1] = nullptr;
488       }
489 #else
490     octave_unused_parameter (nt);
491 
492     (*current_liboctave_warning_handler)
493       ("unable to change number of threads without FFTW thread support");
494 #endif
495   }
496 
497   void *
do_create_plan(int dir,const int rank,const dim_vector & dims,octave_idx_type howmany,octave_idx_type stride,octave_idx_type dist,const FloatComplex * in,FloatComplex * out)498   float_fftw_planner::do_create_plan (int dir, const int rank,
499                                       const dim_vector& dims,
500                                       octave_idx_type howmany,
501                                       octave_idx_type stride,
502                                       octave_idx_type dist,
503                                       const FloatComplex *in,
504                                       FloatComplex *out)
505   {
506     int which = (dir == FFTW_FORWARD) ? 0 : 1;
507     fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&plan[which]);
508     bool create_new_plan = false;
509     bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
510     bool ioinplace = (in == out);
511 
512     // Don't create a new plan if we have a non SIMD plan already but
513     // can do SIMD.  This prevents endlessly recreating plans if we
514     // change the alignment.
515 
516     if (plan[which] == nullptr || d[which] != dist || s[which] != stride
517         || r[which] != rank || h[which] != howmany
518         || ioinplace != inplace[which]
519         || ((ioalign != simd_align[which]) ? ! ioalign : false))
520       create_new_plan = true;
521     else
522       {
523         // We still might not have the same shape of array.
524 
525         for (int i = 0; i < rank; i++)
526           if (dims(i) != n[which](i))
527             {
528               create_new_plan = true;
529               break;
530             }
531       }
532 
533     if (create_new_plan)
534       {
535         d[which] = dist;
536         s[which] = stride;
537         r[which] = rank;
538         h[which] = howmany;
539         simd_align[which] = ioalign;
540         inplace[which] = ioinplace;
541         n[which] = dims;
542 
543         // Note reversal of dimensions for column major storage in FFTW.
544         octave_idx_type nn = 1;
545         OCTAVE_LOCAL_BUFFER (int, tmp, rank);
546 
547         for (int i = 0, j = rank-1; i < rank; i++, j--)
548           {
549             tmp[i] = dims(j);
550             nn *= dims(j);
551           }
552 
553         int plan_flags = 0;
554         bool plan_destroys_in = true;
555 
556         switch (meth)
557           {
558           case UNKNOWN:
559           case ESTIMATE:
560             plan_flags |= FFTW_ESTIMATE;
561             plan_destroys_in = false;
562             break;
563           case MEASURE:
564             plan_flags |= FFTW_MEASURE;
565             break;
566           case PATIENT:
567             plan_flags |= FFTW_PATIENT;
568             break;
569           case EXHAUSTIVE:
570             plan_flags |= FFTW_EXHAUSTIVE;
571             break;
572           case HYBRID:
573             if (nn < 8193)
574               plan_flags |= FFTW_MEASURE;
575             else
576               {
577                 plan_flags |= FFTW_ESTIMATE;
578                 plan_destroys_in = false;
579               }
580             break;
581           }
582 
583         if (ioalign)
584           plan_flags &= ~FFTW_UNALIGNED;
585         else
586           plan_flags |= FFTW_UNALIGNED;
587 
588         if (*cur_plan_p)
589           fftwf_destroy_plan (*cur_plan_p);
590 
591         if (plan_destroys_in)
592           {
593             // Create matrix with the same size and 16-byte alignment as input
594             OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32);
595             itmp = reinterpret_cast<FloatComplex *>
596                    (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
597                     ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
598 
599             *cur_plan_p
600               = fftwf_plan_many_dft (rank, tmp, howmany,
601                                      reinterpret_cast<fftwf_complex *> (itmp),
602                                      nullptr, stride, dist,
603                                      reinterpret_cast<fftwf_complex *> (out),
604                                      nullptr, stride, dist, dir, plan_flags);
605           }
606         else
607           {
608             *cur_plan_p
609               = fftwf_plan_many_dft (rank, tmp, howmany,
610                                      reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)),
611                                      nullptr, stride, dist,
612                                      reinterpret_cast<fftwf_complex *> (out),
613                                      nullptr, stride, dist, dir, plan_flags);
614           }
615 
616         if (*cur_plan_p == nullptr)
617           (*current_liboctave_error_handler) ("Error creating fftw plan");
618       }
619 
620     return *cur_plan_p;
621   }
622 
623   void *
do_create_plan(const int rank,const dim_vector & dims,octave_idx_type howmany,octave_idx_type stride,octave_idx_type dist,const float * in,FloatComplex * out)624   float_fftw_planner::do_create_plan (const int rank, const dim_vector& dims,
625                                       octave_idx_type howmany,
626                                       octave_idx_type stride,
627                                       octave_idx_type dist,
628                                       const float *in, FloatComplex *out)
629   {
630     fftwf_plan *cur_plan_p = reinterpret_cast<fftwf_plan *> (&rplan);
631     bool create_new_plan = false;
632     bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
633 
634     // Don't create a new plan if we have a non SIMD plan already but
635     // can do SIMD.  This prevents endlessly recreating plans if we
636     // change the alignment.
637 
638     if (rplan == nullptr || rd != dist || rs != stride || rr != rank
639         || rh != howmany || ((ioalign != rsimd_align) ? ! ioalign : false))
640       create_new_plan = true;
641     else
642       {
643         // We still might not have the same shape of array.
644 
645         for (int i = 0; i < rank; i++)
646           if (dims(i) != rn(i))
647             {
648               create_new_plan = true;
649               break;
650             }
651       }
652 
653     if (create_new_plan)
654       {
655         rd = dist;
656         rs = stride;
657         rr = rank;
658         rh = howmany;
659         rsimd_align = ioalign;
660         rn = dims;
661 
662         // Note reversal of dimensions for column major storage in FFTW.
663         octave_idx_type nn = 1;
664         OCTAVE_LOCAL_BUFFER (int, tmp, rank);
665 
666         for (int i = 0, j = rank-1; i < rank; i++, j--)
667           {
668             tmp[i] = dims(j);
669             nn *= dims(j);
670           }
671 
672         int plan_flags = 0;
673         bool plan_destroys_in = true;
674 
675         switch (meth)
676           {
677           case UNKNOWN:
678           case ESTIMATE:
679             plan_flags |= FFTW_ESTIMATE;
680             plan_destroys_in = false;
681             break;
682           case MEASURE:
683             plan_flags |= FFTW_MEASURE;
684             break;
685           case PATIENT:
686             plan_flags |= FFTW_PATIENT;
687             break;
688           case EXHAUSTIVE:
689             plan_flags |= FFTW_EXHAUSTIVE;
690             break;
691           case HYBRID:
692             if (nn < 8193)
693               plan_flags |= FFTW_MEASURE;
694             else
695               {
696                 plan_flags |= FFTW_ESTIMATE;
697                 plan_destroys_in = false;
698               }
699             break;
700           }
701 
702         if (ioalign)
703           plan_flags &= ~FFTW_UNALIGNED;
704         else
705           plan_flags |= FFTW_UNALIGNED;
706 
707         if (*cur_plan_p)
708           fftwf_destroy_plan (*cur_plan_p);
709 
710         if (plan_destroys_in)
711           {
712             // Create matrix with the same size and 16-byte alignment as input
713             OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32);
714             itmp = reinterpret_cast<float *>
715                    (((reinterpret_cast<std::ptrdiff_t> (itmp) + 15) & ~ 0xF) +
716                     ((reinterpret_cast<std::ptrdiff_t> (in)) & 0xF));
717 
718             *cur_plan_p
719               = fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp,
720                                          nullptr, stride, dist,
721                                          reinterpret_cast<fftwf_complex *> (out),
722                                          nullptr, stride, dist, plan_flags);
723           }
724         else
725           {
726             *cur_plan_p
727               = fftwf_plan_many_dft_r2c (rank, tmp, howmany,
728                                          (const_cast<float *> (in)),
729                                          nullptr, stride, dist,
730                                          reinterpret_cast<fftwf_complex *> (out),
731                                          nullptr, stride, dist, plan_flags);
732           }
733 
734         if (*cur_plan_p == nullptr)
735           (*current_liboctave_error_handler) ("Error creating fftw plan");
736       }
737 
738     return *cur_plan_p;
739   }
740 
741   float_fftw_planner::FftwMethod
do_method(void)742   float_fftw_planner::do_method (void)
743   {
744     return meth;
745   }
746 
747   float_fftw_planner::FftwMethod
do_method(FftwMethod _meth)748   float_fftw_planner::do_method (FftwMethod _meth)
749   {
750     FftwMethod ret = meth;
751     if (_meth == ESTIMATE || _meth == MEASURE
752         || _meth == PATIENT || _meth == EXHAUSTIVE
753         || _meth == HYBRID)
754       {
755         if (meth != _meth)
756           {
757             meth = _meth;
758             if (rplan)
759               fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (rplan));
760             if (plan[0])
761               fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[0]));
762             if (plan[1])
763               fftwf_destroy_plan (reinterpret_cast<fftwf_plan> (plan[1]));
764             rplan = plan[0] = plan[1] = nullptr;
765           }
766       }
767     else
768       ret = UNKNOWN;
769     return ret;
770   }
771 
772   template <typename T>
773   static inline void
convert_packcomplex_1d(T * out,std::size_t nr,std::size_t nc,octave_idx_type stride,octave_idx_type dist)774   convert_packcomplex_1d (T *out, std::size_t nr, std::size_t nc,
775                           octave_idx_type stride, octave_idx_type dist)
776   {
777     octave_quit ();
778 
779     // Fill in the missing data.
780 
781     for (std::size_t i = 0; i < nr; i++)
782       for (std::size_t j = nc/2+1; j < nc; j++)
783         out[j*stride + i*dist] = conj (out[(nc - j)*stride + i*dist]);
784 
785     octave_quit ();
786   }
787 
788   template <typename T>
789   static inline void
convert_packcomplex_Nd(T * out,const dim_vector & dv)790   convert_packcomplex_Nd (T *out, const dim_vector& dv)
791   {
792     std::size_t nc = dv(0);
793     std::size_t nr = dv(1);
794     std::size_t np = (dv.ndims () > 2 ? dv.numel () / nc / nr : 1);
795     std::size_t nrp = nr * np;
796     T *ptr1, *ptr2;
797 
798     octave_quit ();
799 
800     // Create space for the missing elements.
801 
802     for (std::size_t i = 0; i < nrp; i++)
803       {
804         ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
805         ptr2 = out + i * nc;
806         for (std::size_t j = 0; j < nc/2+1; j++)
807           *ptr2++ = *ptr1++;
808       }
809 
810     octave_quit ();
811 
812     // Fill in the missing data for the rank = 2 case directly for speed.
813 
814     for (std::size_t i = 0; i < np; i++)
815       {
816         for (std::size_t j = 1; j < nr; j++)
817           for (std::size_t k = nc/2+1; k < nc; k++)
818             out[k + (j + i*nr)*nc] = conj (out[nc - k + ((i+1)*nr - j)*nc]);
819 
820         for (std::size_t j = nc/2+1; j < nc; j++)
821           out[j + i*nr*nc] = conj (out[(i*nr+1)*nc - j]);
822       }
823 
824     octave_quit ();
825 
826     // Now do the permutations needed for rank > 2 cases.
827 
828     std::size_t jstart = dv(0) * dv(1);
829     std::size_t kstep = dv(0);
830     std::size_t nel = dv.numel ();
831 
832     for (int inner = 2; inner < dv.ndims (); inner++)
833       {
834         std::size_t jmax = jstart * dv(inner);
835         for (std::size_t i = 0; i < nel; i+=jmax)
836           for (std::size_t j = jstart, jj = jmax-jstart; j < jj;
837                j+=jstart, jj-=jstart)
838             for (std::size_t k = 0; k < jstart; k+= kstep)
839               for (std::size_t l = nc/2+1; l < nc; l++)
840                 {
841                   T tmp = out[i+ j + k + l];
842                   out[i + j + k + l] = out[i + jj + k + l];
843                   out[i + jj + k + l] = tmp;
844                 }
845         jstart = jmax;
846       }
847 
848     octave_quit ();
849   }
850 
851   int
fft(const double * in,Complex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)852   fftw::fft (const double *in, Complex *out, std::size_t npts,
853              std::size_t nsamples, octave_idx_type stride, octave_idx_type dist)
854   {
855     dist = (dist < 0 ? npts : dist);
856 
857     dim_vector dv (npts, 1);
858     void *vplan = fftw_planner::create_plan (1, dv, nsamples,
859                                              stride, dist, in, out);
860     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
861 
862     fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
863                           reinterpret_cast<fftw_complex *> (out));
864 
865     // Need to create other half of the transform.
866 
867     convert_packcomplex_1d (out, nsamples, npts, stride, dist);
868 
869     return 0;
870   }
871 
872   int
fft(const Complex * in,Complex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)873   fftw::fft (const Complex *in, Complex *out, std::size_t npts,
874              std::size_t nsamples, octave_idx_type stride, octave_idx_type dist)
875   {
876     dist = (dist < 0 ? npts : dist);
877 
878     dim_vector dv (npts, 1);
879     void *vplan = fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
880                                              nsamples, stride,
881                                              dist, in, out);
882     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
883 
884     fftw_execute_dft (plan,
885                       reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
886                       reinterpret_cast<fftw_complex *> (out));
887 
888     return 0;
889   }
890 
891   int
ifft(const Complex * in,Complex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)892   fftw::ifft (const Complex *in, Complex *out, std::size_t npts,
893               std::size_t nsamples, octave_idx_type stride,
894               octave_idx_type dist)
895   {
896     dist = (dist < 0 ? npts : dist);
897 
898     dim_vector dv (npts, 1);
899     void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, 1, dv, nsamples,
900                                              stride, dist, in, out);
901     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
902 
903     fftw_execute_dft (plan,
904                       reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
905                       reinterpret_cast<fftw_complex *> (out));
906 
907     const Complex scale = npts;
908     for (std::size_t j = 0; j < nsamples; j++)
909       for (std::size_t i = 0; i < npts; i++)
910         out[i*stride + j*dist] /= scale;
911 
912     return 0;
913   }
914 
915   int
fftNd(const double * in,Complex * out,const int rank,const dim_vector & dv)916   fftw::fftNd (const double *in, Complex *out, const int rank,
917                const dim_vector& dv)
918   {
919     octave_idx_type dist = 1;
920     for (int i = 0; i < rank; i++)
921       dist *= dv(i);
922 
923     // Fool with the position of the start of the output matrix, so that
924     // creating other half of the matrix won't cause cache problems.
925 
926     octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
927 
928     void *vplan = fftw_planner::create_plan (rank, dv, 1, 1, dist,
929                                              in, out + offset);
930     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
931 
932     fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
933                           reinterpret_cast<fftw_complex *> (out+ offset));
934 
935     // Need to create other half of the transform.
936 
937     convert_packcomplex_Nd (out, dv);
938 
939     return 0;
940   }
941 
942   int
fftNd(const Complex * in,Complex * out,const int rank,const dim_vector & dv)943   fftw::fftNd (const Complex *in, Complex *out, const int rank,
944                const dim_vector& dv)
945   {
946     octave_idx_type dist = 1;
947     for (int i = 0; i < rank; i++)
948       dist *= dv(i);
949 
950     void *vplan = fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
951                                              1, 1, dist, in, out);
952     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
953 
954     fftw_execute_dft (plan,
955                       reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
956                       reinterpret_cast<fftw_complex *> (out));
957 
958     return 0;
959   }
960 
961   int
ifftNd(const Complex * in,Complex * out,const int rank,const dim_vector & dv)962   fftw::ifftNd (const Complex *in, Complex *out, const int rank,
963                 const dim_vector& dv)
964   {
965     octave_idx_type dist = 1;
966     for (int i = 0; i < rank; i++)
967       dist *= dv(i);
968 
969     void *vplan = fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
970                                              1, 1, dist, in, out);
971     fftw_plan plan = reinterpret_cast<fftw_plan> (vplan);
972 
973     fftw_execute_dft (plan,
974                       reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
975                       reinterpret_cast<fftw_complex *> (out));
976 
977     const std::size_t npts = dv.numel ();
978     const Complex scale = npts;
979     for (std::size_t i = 0; i < npts; i++)
980       out[i] /= scale;
981 
982     return 0;
983   }
984 
985   int
fft(const float * in,FloatComplex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)986   fftw::fft (const float *in, FloatComplex *out, std::size_t npts,
987              std::size_t nsamples, octave_idx_type stride, octave_idx_type dist)
988   {
989     dist = (dist < 0 ? npts : dist);
990 
991     dim_vector dv (npts, 1);
992     void *vplan = float_fftw_planner::create_plan (1, dv, nsamples, stride,
993                                                    dist, in, out);
994     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
995 
996     fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
997                            reinterpret_cast<fftwf_complex *> (out));
998 
999     // Need to create other half of the transform.
1000 
1001     convert_packcomplex_1d (out, nsamples, npts, stride, dist);
1002 
1003     return 0;
1004   }
1005 
1006   int
fft(const FloatComplex * in,FloatComplex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)1007   fftw::fft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1008              std::size_t nsamples, octave_idx_type stride, octave_idx_type dist)
1009   {
1010     dist = (dist < 0 ? npts : dist);
1011 
1012     dim_vector dv (npts, 1);
1013     void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, 1, dv,
1014                                                    nsamples, stride, dist,
1015                                                    in, out);
1016     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1017 
1018     fftwf_execute_dft (plan,
1019                        reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1020                        reinterpret_cast<fftwf_complex *> (out));
1021 
1022     return 0;
1023   }
1024 
1025   int
ifft(const FloatComplex * in,FloatComplex * out,std::size_t npts,std::size_t nsamples,octave_idx_type stride,octave_idx_type dist)1026   fftw::ifft (const FloatComplex *in, FloatComplex *out, std::size_t npts,
1027               std::size_t nsamples, octave_idx_type stride,
1028               octave_idx_type dist)
1029   {
1030     dist = (dist < 0 ? npts : dist);
1031 
1032     dim_vector dv (npts, 1);
1033     void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, 1, dv,
1034                                                    nsamples, stride, dist,
1035                                                    in, out);
1036     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1037 
1038     fftwf_execute_dft (plan,
1039                        reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1040                        reinterpret_cast<fftwf_complex *> (out));
1041 
1042     const FloatComplex scale = npts;
1043     for (std::size_t j = 0; j < nsamples; j++)
1044       for (std::size_t i = 0; i < npts; i++)
1045         out[i*stride + j*dist] /= scale;
1046 
1047     return 0;
1048   }
1049 
1050   int
fftNd(const float * in,FloatComplex * out,const int rank,const dim_vector & dv)1051   fftw::fftNd (const float *in, FloatComplex *out, const int rank,
1052                const dim_vector& dv)
1053   {
1054     octave_idx_type dist = 1;
1055     for (int i = 0; i < rank; i++)
1056       dist *= dv(i);
1057 
1058     // Fool with the position of the start of the output matrix, so that
1059     // creating other half of the matrix won't cause cache problems.
1060 
1061     octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
1062 
1063     void *vplan = float_fftw_planner::create_plan (rank, dv, 1, 1, dist,
1064                                                    in, out + offset);
1065     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1066 
1067     fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)),
1068                            reinterpret_cast<fftwf_complex *> (out+ offset));
1069 
1070     // Need to create other half of the transform.
1071 
1072     convert_packcomplex_Nd (out, dv);
1073 
1074     return 0;
1075   }
1076 
1077   int
fftNd(const FloatComplex * in,FloatComplex * out,const int rank,const dim_vector & dv)1078   fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1079                const dim_vector& dv)
1080   {
1081     octave_idx_type dist = 1;
1082     for (int i = 0; i < rank; i++)
1083       dist *= dv(i);
1084 
1085     void *vplan = float_fftw_planner::create_plan (FFTW_FORWARD, rank, dv,
1086                                                    1, 1, dist, in, out);
1087     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1088 
1089     fftwf_execute_dft (plan,
1090                        reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1091                        reinterpret_cast<fftwf_complex *> (out));
1092 
1093     return 0;
1094   }
1095 
1096   int
ifftNd(const FloatComplex * in,FloatComplex * out,const int rank,const dim_vector & dv)1097   fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank,
1098                 const dim_vector& dv)
1099   {
1100     octave_idx_type dist = 1;
1101     for (int i = 0; i < rank; i++)
1102       dist *= dv(i);
1103 
1104     void *vplan = float_fftw_planner::create_plan (FFTW_BACKWARD, rank, dv,
1105                                                    1, 1, dist, in, out);
1106     fftwf_plan plan = reinterpret_cast<fftwf_plan> (vplan);
1107 
1108     fftwf_execute_dft (plan,
1109                        reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)),
1110                        reinterpret_cast<fftwf_complex *> (out));
1111 
1112     const std::size_t npts = dv.numel ();
1113     const FloatComplex scale = npts;
1114     for (std::size_t i = 0; i < npts; i++)
1115       out[i] /= scale;
1116 
1117     return 0;
1118   }
1119 
1120 #endif
1121 
1122   std::string
fftw_version(void)1123   fftw_version (void)
1124   {
1125 #if defined (HAVE_FFTW)
1126     return ::fftw_version;
1127 #else
1128     return "none";
1129 #endif
1130   }
1131 
1132   std::string
fftwf_version(void)1133   fftwf_version (void)
1134   {
1135 #if defined (HAVE_FFTW)
1136     return ::fftwf_version;
1137 #else
1138     return "none";
1139 #endif
1140   }
1141 }
1142