1 /*
2 This file is part of pocketfft.
3
4 Copyright (C) 2010-2020 Max-Planck-Society
5 Copyright (C) 2019 Peter Bell
6
7 Authors: Martin Reinecke, Peter Bell
8
9 All rights reserved.
10
11 Redistribution and use in source and binary forms, with or without modification,
12 are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice, this
15 list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above copyright notice, this
17 list of conditions and the following disclaimer in the documentation and/or
18 other materials provided with the distribution.
19 * Neither the name of the copyright holder nor the names of its contributors may
20 be used to endorse or promote products derived from this software without
21 specific prior written permission.
22
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
24 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
27 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
30 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 /*
36 * Python interface.
37 */
38
39 #include <pybind11/pybind11.h>
40 #include <pybind11/numpy.h>
41 #include <pybind11/stl.h>
42 #include <complex>
43
44 #include "ducc0/fft/fft.h"
45 #include "ducc0/bindings/pybind_utils.h"
46
47 namespace ducc0 {
48
49 namespace detail_pymodule_fft {
50
51 namespace {
52
53 using shape_t = ducc0::fmav_info::shape_t;
54 using std::size_t;
55 using std::ptrdiff_t;
56
57 namespace py = pybind11;
58
59 // Only instantiate long double transforms if they offer more precision
60 using ldbl_t = typename std::conditional<
61 sizeof(long double)==sizeof(double), double, long double>::type;
62
63 using c64 = std::complex<float>;
64 using c128 = std::complex<double>;
65 using clong = std::complex<ldbl_t>;
66 using f32 = float;
67 using f64 = double;
68 using flong = ldbl_t;
69 auto None = py::none();
70
makeaxes(const py::array & in,const py::object & axes)71 shape_t makeaxes(const py::array &in, const py::object &axes)
72 {
73 if (axes.is_none())
74 {
75 shape_t res(size_t(in.ndim()));
76 for (size_t i=0; i<res.size(); ++i)
77 res[i]=i;
78 return res;
79 }
80 auto tmp=axes.cast<std::vector<ptrdiff_t>>();
81 auto ndim = in.ndim();
82 if ((tmp.size()>size_t(ndim)) || (tmp.size()==0))
83 throw std::runtime_error("bad axes argument");
84 for (auto& sz: tmp)
85 {
86 if (sz<0)
87 sz += ndim;
88 if ((sz>=ndim) || (sz<0))
89 throw std::invalid_argument("axes exceeds dimensionality of output");
90 }
91 return shape_t(tmp.begin(), tmp.end());
92 }
93
94 #define DISPATCH(arr, T1, T2, T3, func, args) \
95 { \
96 if (py::isinstance<py::array_t<T1>>(arr)) return func<double> args; \
97 if (py::isinstance<py::array_t<T2>>(arr)) return func<float> args; \
98 if (py::isinstance<py::array_t<T3>>(arr)) return func<ldbl_t> args; \
99 throw std::runtime_error("unsupported data type"); \
100 }
101
norm_fct(int inorm,size_t N)102 template<typename T> T norm_fct(int inorm, size_t N)
103 {
104 if (inorm==0) return T(1);
105 if (inorm==2) return T(1/ldbl_t(N));
106 if (inorm==1) return T(1/sqrt(ldbl_t(N)));
107 throw std::invalid_argument("invalid value for inorm (must be 0, 1, or 2)");
108 }
109
norm_fct(int inorm,const shape_t & shape,const shape_t & axes,size_t fct=1,int delta=0)110 template<typename T> T norm_fct(int inorm, const shape_t &shape,
111 const shape_t &axes, size_t fct=1, int delta=0)
112 {
113 if (inorm==0) return T(1);
114 size_t N(1);
115 for (auto a: axes)
116 N *= fct * size_t(int64_t(shape[a])+delta);
117 return norm_fct<T>(inorm, N);
118 }
119
c2c_internal(const py::array & in,const py::object & axes_,bool forward,int inorm,py::object & out_,size_t nthreads)120 template<typename T> py::array c2c_internal(const py::array &in,
121 const py::object &axes_, bool forward, int inorm, py::object &out_,
122 size_t nthreads)
123 {
124 auto axes = makeaxes(in, axes_);
125 auto ain = to_cfmav<std::complex<T>>(in);
126 auto out = get_optional_Pyarr<std::complex<T>>(out_, ain.shape());
127 auto aout = to_vfmav<std::complex<T>>(out);
128 {
129 py::gil_scoped_release release;
130 T fct = norm_fct<T>(inorm, ain.shape(), axes);
131 ducc0::c2c(ain, aout, axes, forward, fct, nthreads);
132 }
133 return move(out);
134 }
135
c2c_sym_internal(const py::array & in,const py::object & axes_,bool forward,int inorm,py::object & out_,size_t nthreads)136 template<typename T> py::array c2c_sym_internal(const py::array &in,
137 const py::object &axes_, bool forward, int inorm, py::object &out_,
138 size_t nthreads)
139 {
140 auto axes = makeaxes(in, axes_);
141 auto ain = to_cfmav<T>(in);
142 auto out = get_optional_Pyarr<std::complex<T>>(out_, ain.shape());
143 auto aout = to_vfmav<std::complex<T>>(out);
144 {
145 py::gil_scoped_release release;
146 T fct = norm_fct<T>(inorm, ain.shape(), axes);
147 ducc0::r2c(ain, aout, axes, forward, fct, nthreads);
148 // now fill in second half
149 using namespace ducc0::detail_fft;
150 hermiteHelper(0, 0, 0, 0, aout, aout, axes, [](const std::complex<T> &c, complex<T> &, complex<T> &c1)
151 {
152 c1 = conj(c);
153 }, nthreads);
154 }
155 return move(out);
156 }
157
c2c(const py::array & a,const py::object & axes_,bool forward,int inorm,py::object & out_,size_t nthreads)158 py::array c2c(const py::array &a, const py::object &axes_, bool forward,
159 int inorm, py::object &out_, size_t nthreads)
160 {
161 if (a.dtype().kind() == 'c')
162 DISPATCH(a, c128, c64, clong, c2c_internal, (a, axes_, forward,
163 inorm, out_, nthreads))
164
165 DISPATCH(a, f64, f32, flong, c2c_sym_internal, (a, axes_, forward,
166 inorm, out_, nthreads))
167 }
168
r2c_internal(const py::array & in,const py::object & axes_,bool forward,int inorm,py::object & out_,size_t nthreads)169 template<typename T> py::array r2c_internal(const py::array &in,
170 const py::object &axes_, bool forward, int inorm, py::object &out_,
171 size_t nthreads)
172 {
173 auto axes = makeaxes(in, axes_);
174 auto ain = to_cfmav<T>(in);
175 auto dims_out(ain.shape());
176 dims_out[axes.back()] = (dims_out[axes.back()]>>1)+1;
177 auto out = get_optional_Pyarr<std::complex<T>>(out_, dims_out);
178 auto aout = to_vfmav<std::complex<T>>(out);
179 {
180 py::gil_scoped_release release;
181 T fct = norm_fct<T>(inorm, ain.shape(), axes);
182 ducc0::r2c(ain, aout, axes, forward, fct, nthreads);
183 }
184 return move(out);
185 }
186
r2c(const py::array & in,const py::object & axes_,bool forward,int inorm,py::object & out_,size_t nthreads)187 py::array r2c(const py::array &in, const py::object &axes_, bool forward,
188 int inorm, py::object &out_, size_t nthreads)
189 {
190 DISPATCH(in, f64, f32, flong, r2c_internal, (in, axes_, forward, inorm, out_,
191 nthreads))
192 }
193
r2r_fftpack_internal(const py::array & in,const py::object & axes_,bool real2hermitian,bool forward,int inorm,py::object & out_,size_t nthreads)194 template<typename T> py::array r2r_fftpack_internal(const py::array &in,
195 const py::object &axes_, bool real2hermitian, bool forward, int inorm,
196 py::object &out_, size_t nthreads)
197 {
198 auto axes = makeaxes(in, axes_);
199 auto ain = to_cfmav<T>(in);
200 auto out = get_optional_Pyarr<T>(out_, ain.shape());
201 auto aout = to_vfmav<T>(out);
202 {
203 py::gil_scoped_release release;
204 T fct = norm_fct<T>(inorm, ain.shape(), axes);
205 ducc0::r2r_fftpack(ain, aout, axes, real2hermitian, forward, fct, nthreads);
206 }
207 return std::move(out);
208 }
209
r2r_fftpack(const py::array & in,const py::object & axes_,bool real2hermitian,bool forward,int inorm,py::object & out_,size_t nthreads)210 py::array r2r_fftpack(const py::array &in, const py::object &axes_,
211 bool real2hermitian, bool forward, int inorm, py::object &out_,
212 size_t nthreads)
213 {
214 DISPATCH(in, f64, f32, flong, r2r_fftpack_internal, (in, axes_,
215 real2hermitian, forward, inorm, out_, nthreads))
216 }
217
r2r_fftw_internal(const py::array & in,const py::object & axes_,bool forward,int inorm,py::object & out_,size_t nthreads)218 template<typename T> py::array r2r_fftw_internal(const py::array &in,
219 const py::object &axes_, bool forward, int inorm,
220 py::object &out_, size_t nthreads)
221 {
222 auto axes = makeaxes(in, axes_);
223 auto ain = to_cfmav<T>(in);
224 auto out = get_optional_Pyarr<T>(out_, ain.shape());
225 auto aout = to_vfmav<T>(out);
226 {
227 py::gil_scoped_release release;
228 T fct = norm_fct<T>(inorm, ain.shape(), axes);
229 ducc0::r2r_fftw(ain, aout, axes, forward, fct, nthreads);
230 }
231 return std::move(out);
232 }
233
r2r_fftw(const py::array & in,const py::object & axes_,bool forward,int inorm,py::object & out_,size_t nthreads)234 py::array r2r_fftw(const py::array &in, const py::object &axes_,
235 bool forward, int inorm, py::object &out_, size_t nthreads)
236 {
237 DISPATCH(in, f64, f32, flong, r2r_fftw_internal, (in, axes_,
238 forward, inorm, out_, nthreads))
239 }
240
dct_internal(const py::array & in,const py::object & axes_,int type,int inorm,py::object & out_,size_t nthreads)241 template<typename T> py::array dct_internal(const py::array &in,
242 const py::object &axes_, int type, int inorm, py::object &out_,
243 size_t nthreads)
244 {
245 auto axes = makeaxes(in, axes_);
246 auto ain = to_cfmav<T>(in);
247 auto out = get_optional_Pyarr<T>(out_, ain.shape());
248 auto aout = to_vfmav<T>(out);
249 {
250 py::gil_scoped_release release;
251 T fct = (type==1) ? norm_fct<T>(inorm, ain.shape(), axes, 2, -1)
252 : norm_fct<T>(inorm, ain.shape(), axes, 2);
253 bool ortho = inorm == true;
254 ducc0::dct(ain, aout, axes, type, fct, ortho, nthreads);
255 }
256 return std::move(out);
257 }
258
dct(const py::array & in,int type,const py::object & axes_,int inorm,py::object & out_,size_t nthreads)259 py::array dct(const py::array &in, int type, const py::object &axes_,
260 int inorm, py::object &out_, size_t nthreads)
261 {
262 if ((type<1) || (type>4)) throw std::invalid_argument("invalid DCT type");
263 DISPATCH(in, f64, f32, flong, dct_internal, (in, axes_, type, inorm, out_,
264 nthreads))
265 }
266
dst_internal(const py::array & in,const py::object & axes_,int type,int inorm,py::object & out_,size_t nthreads)267 template<typename T> py::array dst_internal(const py::array &in,
268 const py::object &axes_, int type, int inorm, py::object &out_,
269 size_t nthreads)
270 {
271 auto axes = makeaxes(in, axes_);
272 auto ain = to_cfmav<T>(in);
273 auto out = get_optional_Pyarr<T>(out_, ain.shape());
274 auto aout = to_vfmav<T>(out);
275 {
276 py::gil_scoped_release release;
277 T fct = (type==1) ? norm_fct<T>(inorm, ain.shape(), axes, 2, 1)
278 : norm_fct<T>(inorm, ain.shape(), axes, 2);
279 bool ortho = inorm == true;
280 ducc0::dst(ain, aout, axes, type, fct, ortho, nthreads);
281 }
282 return std::move(out);
283 }
284
dst(const py::array & in,int type,const py::object & axes_,int inorm,py::object & out_,size_t nthreads)285 py::array dst(const py::array &in, int type, const py::object &axes_,
286 int inorm, py::object &out_, size_t nthreads)
287 {
288 if ((type<1) || (type>4)) throw std::invalid_argument("invalid DST type");
289 DISPATCH(in, f64, f32, flong, dst_internal, (in, axes_, type, inorm,
290 out_, nthreads))
291 }
292
c2r_internal(const py::array & in,const py::object & axes_,size_t lastsize,bool forward,int inorm,py::object & out_,size_t nthreads)293 template<typename T> py::array c2r_internal(const py::array &in,
294 const py::object &axes_, size_t lastsize, bool forward, int inorm,
295 py::object &out_, size_t nthreads)
296 {
297 auto axes = makeaxes(in, axes_);
298 size_t axis = axes.back();
299 auto ain = to_cfmav<std::complex<T>>(in);
300 shape_t dims_out(ain.shape());
301 if (lastsize==0) lastsize=2*ain.shape(axis)-1;
302 if ((lastsize/2) + 1 != ain.shape(axis))
303 throw std::invalid_argument("bad lastsize");
304 dims_out[axis] = lastsize;
305 auto out = get_optional_Pyarr<T>(out_, dims_out);
306 auto aout = to_vfmav<T>(out);
307 {
308 py::gil_scoped_release release;
309 T fct = norm_fct<T>(inorm, aout.shape(), axes);
310 ducc0::c2r(ain, aout, axes, forward, fct, nthreads);
311 }
312 return std::move(out);
313 }
314
c2r(const py::array & in,const py::object & axes_,size_t lastsize,bool forward,int inorm,py::object & out_,size_t nthreads)315 py::array c2r(const py::array &in, const py::object &axes_, size_t lastsize,
316 bool forward, int inorm, py::object &out_, size_t nthreads)
317 {
318 DISPATCH(in, c128, c64, clong, c2r_internal, (in, axes_, lastsize, forward,
319 inorm, out_, nthreads))
320 }
321
separable_hartley_internal(const py::array & in,const py::object & axes_,int inorm,py::object & out_,size_t nthreads)322 template<typename T> py::array separable_hartley_internal(const py::array &in,
323 const py::object &axes_, int inorm, py::object &out_, size_t nthreads)
324 {
325 auto axes = makeaxes(in, axes_);
326 auto ain = to_cfmav<T>(in);
327 auto out = get_optional_Pyarr<T>(out_, ain.shape());
328 auto aout = to_vfmav<T>(out);
329 {
330 py::gil_scoped_release release;
331 T fct = norm_fct<T>(inorm, ain.shape(), axes);
332 ducc0::r2r_separable_hartley(ain, aout, axes, fct, nthreads);
333 }
334 return std::move(out);
335 }
336
separable_hartley(const py::array & in,const py::object & axes_,int inorm,py::object & out_,size_t nthreads)337 py::array separable_hartley(const py::array &in, const py::object &axes_,
338 int inorm, py::object &out_, size_t nthreads)
339 {
340 DISPATCH(in, f64, f32, flong, separable_hartley_internal, (in, axes_, inorm,
341 out_, nthreads))
342 }
343
genuine_hartley_internal(const py::array & in,const py::object & axes_,int inorm,py::object & out_,size_t nthreads)344 template<typename T> py::array genuine_hartley_internal(const py::array &in,
345 const py::object &axes_, int inorm, py::object &out_, size_t nthreads)
346 {
347 auto axes = makeaxes(in, axes_);
348 auto ain = to_cfmav<T>(in);
349 auto out = get_optional_Pyarr<T>(out_, ain.shape());
350 auto aout = to_vfmav<T>(out);
351 {
352 py::gil_scoped_release release;
353 T fct = norm_fct<T>(inorm, ain.shape(), axes);
354 ducc0::r2r_genuine_hartley(ain, aout, axes, fct, nthreads);
355 }
356 return std::move(out);
357 }
358
genuine_hartley(const py::array & in,const py::object & axes_,int inorm,py::object & out_,size_t nthreads)359 py::array genuine_hartley(const py::array &in, const py::object &axes_,
360 int inorm, py::object &out_, size_t nthreads)
361 {
362 DISPATCH(in, f64, f32, flong, genuine_hartley_internal, (in, axes_, inorm,
363 out_, nthreads))
364 }
365
366 // Export good_size in raw C-API to reduce overhead (~4x faster)
good_size(PyObject *,PyObject * args)367 PyObject * good_size(PyObject * /*self*/, PyObject * args)
368 {
369 Py_ssize_t n_ = -1;
370 int real = false;
371 if (!PyArg_ParseTuple(args, "n|p:good_size", &n_, &real))
372 return nullptr;
373
374 if (n_<0)
375 {
376 PyErr_SetString(PyExc_ValueError, "Target length must be positive");
377 return nullptr;
378 }
379 if ((n_-1) > static_cast<Py_ssize_t>(std::numeric_limits<size_t>::max() / 11))
380 {
381 PyErr_Format(PyExc_ValueError,
382 "Target length is too large to perform an FFT: %zi", n_);
383 return nullptr;
384 }
385 const auto n = static_cast<size_t>(n_);
386 using namespace ducc0::detail_fft;
387 return PyLong_FromSize_t(
388 real ? util1d::good_size_real(n) : util1d::good_size_cmplx(n));
389 }
390
convolve_axis_internal(const py::array & in_,py::array & out_,size_t axis,const py::array & kernel_,size_t nthreads)391 template<typename T> py::array convolve_axis_internal(const py::array &in_,
392 py::array &out_, size_t axis, const py::array &kernel_, size_t nthreads)
393 {
394 auto in = to_cfmav<T>(in_);
395 auto out = to_vfmav<T>(out_);
396 auto kernel = to_cmav<T,1>(kernel_);
397 {
398 py::gil_scoped_release release;
399 ducc0::convolve_axis(in, out, axis, kernel, nthreads);
400 }
401 return std::move(out_);
402 }
403
convolve_axis_internal_c(const py::array & in_,py::array & out_,size_t axis,const py::array & kernel_,size_t nthreads)404 template<typename T> py::array convolve_axis_internal_c(const py::array &in_,
405 py::array &out_, size_t axis, const py::array &kernel_, size_t nthreads)
406 {
407 return convolve_axis_internal<std::complex<T>>(in_, out_, axis, kernel_, nthreads);
408 }
409
convolve_axis(const py::array & in,py::array & out,size_t axis,const py::array & kernel,size_t nthreads)410 py::array convolve_axis(const py::array &in, py::array &out, size_t axis,
411 const py::array &kernel, size_t nthreads)
412 {
413 if (in.dtype().kind() == 'c')
414 DISPATCH(in, c128, c64, clong, convolve_axis_internal_c, (in, out, axis,
415 kernel, nthreads))
416 else
417 DISPATCH(in, f64, f32, flong, convolve_axis_internal, (in, out, axis,
418 kernel, nthreads))
419 }
420
421 const char *fft_DS = R"""(Fast Fourier, sine/cosine, and Hartley transforms.
422
423 This module supports
424 - single, double, and long double precision
425 - complex and real-valued transforms
426 - multi-dimensional transforms
427
428 For two- and higher-dimensional transforms the code will use SSE2 and AVX
429 vector instructions for faster execution if these are supported by the CPU and
430 were enabled during compilation.
431 )""";
432
433 const char *c2c_DS = R"""(Performs a complex FFT.
434
435 Parameters
436 ----------
437 a : numpy.ndarray (any complex or real type)
438 The input data. If its type is real, a more efficient real-to-complex
439 transform will be used.
440 axes : list of integers
441 The axes along which the FFT is carried out.
442 If not set, all axes will be transformed.
443 forward : bool
444 If `True`, a negative sign is used in the exponent, else a positive one.
445 inorm : int
446 Normalization type
447 | 0 : no normalization
448 | 1 : divide by sqrt(N)
449 | 2 : divide by N
450
451 where N is the product of the lengths of the transformed axes.
452 out : numpy.ndarray (same shape as `a`, complex type with same accuracy as `a`)
453 May be identical to `a`, but if it isn't, it must not overlap with `a`.
454 If None, a new array is allocated to store the output.
455 nthreads : int
456 Number of threads to use. If 0, use the system default (typically the number
457 of hardware threads on the compute node).
458
459 Returns
460 -------
461 numpy.ndarray (same shape as `a`, complex type with same accuracy as `a`)
462 The transformed data.
463 )""";
464
465 const char *r2c_DS = R"""(Performs an FFT whose input is strictly real.
466
467 Parameters
468 ----------
469 a : numpy.ndarray (any real type)
470 The input data
471 axes : list of integers
472 The axes along which the FFT is carried out.
473 If not set, all axes will be transformed in ascending order.
474 forward : bool
475 If `True`, a negative sign is used in the exponent, else a positive one.
476 inorm : int
477 Normalization type
478 | 0 : no normalization
479 | 1 : divide by sqrt(N)
480 | 2 : divide by N
481
482 where N is the product of the lengths of the transformed input axes.
483 out : numpy.ndarray (complex type with same accuracy as `a`)
484 For the required shape, see the `Returns` section.
485 Must not overlap with `a`.
486 If None, a new array is allocated to store the output.
487 nthreads : int
488 Number of threads to use. If 0, use the system default (typically the number
489 of hardware threads on the compute node).
490
491 Returns
492 -------
493 numpy.ndarray (complex type with same accuracy as `a`)
494 The transformed data. The shape is identical to that of the input array,
495 except for the axis that was transformed last. If the length of that axis
496 was n on input, it is n//2+1 on output.
497 )""";
498
499 const char *c2r_DS = R"""(Performs an FFT whose output is strictly real.
500
501 Parameters
502 ----------
503 a : numpy.ndarray (any complex type)
504 The input data
505 axes : list of integers
506 The axes along which the FFT is carried out.
507 If not set, all axes will be transformed in ascending order.
508 lastsize : the output size of the last axis to be transformed.
509 If the corresponding input axis has size n, this can be 2*n-2 or 2*n-1.
510 forward : bool
511 If `True`, a negative sign is used in the exponent, else a positive one.
512 inorm : int
513 Normalization type
514 | 0 : no normalization
515 | 1 : divide by sqrt(N)
516 | 2 : divide by N
517
518 where N is the product of the lengths of the transformed output axes.
519 out : numpy.ndarray (real type with same accuracy as `a`)
520 For the required shape, see the `Returns` section.
521 Must not overlap with `a`.
522 If None, a new array is allocated to store the output.
523 nthreads : int
524 Number of threads to use. If 0, use the system default (typically the number
525 of hardware threads on the compute node).
526
527 Returns
528 -------
529 numpy.ndarray (real type with same accuracy as `a`)
530 The transformed data. The shape is identical to that of the input array,
531 except for the axis that was transformed last, which has now `lastsize`
532 entries.
533 )""";
534
535 const char *r2r_fftpack_DS = R"""(Performs a real-valued FFT using FFTPACK's halfcomplex storage scheme.
536
537 Parameters
538 ----------
539 a : numpy.ndarray (any real type)
540 The input data
541 axes : list of integers
542 The axes along which the FFT is carried out.
543 If not set, all axes will be transformed.
544 real2hermitian : bool
545 if True, the input is purely real and the output will have Hermitian
546 symmetry and be stored in FFTPACK's halfcomplex ordering, otherwise the
547 opposite.
548 forward : bool
549 If `True`, a negative sign is used in the exponent, else a positive one.
550 inorm : int
551 Normalization type
552 | 0 : no normalization
553 | 1 : divide by sqrt(N)
554 | 2 : divide by N
555
556 where N is the length of `axis`.
557 out : numpy.ndarray (same shape and data type as `a`)
558 May be identical to `a`, but if it isn't, it must not overlap with `a`.
559 If None, a new array is allocated to store the output.
560 nthreads : int
561 Number of threads to use. If 0, use the system default (typically the number
562 of hardware threads on the compute node).
563
564 Returns
565 -------
566 numpy.ndarray (same shape and data type as `a`)
567 The transformed data. The shape is identical to that of the input array.
568 )""";
569
570 const char *r2r_fftw_DS = R"""(Performs a real-valued FFT using FFTW's halfcomplex storage scheme.
571
572 Parameters
573 ----------
574 a : numpy.ndarray (any real type)
575 The input data
576 axes : list of integers
577 The axes along which the FFT is carried out.
578 If not set, all axes will be transformed.
579 forward : bool
580 If `True`, a negative sign is used in the exponent, else a positive one.
581 inorm : int
582 Normalization type
583 | 0 : no normalization
584 | 1 : divide by sqrt(N)
585 | 2 : divide by N
586
587 where N is the length of `axis`.
588 out : numpy.ndarray (same shape and data type as `a`)
589 May be identical to `a`, but if it isn't, it must not overlap with `a`.
590 If None, a new array is allocated to store the output.
591 nthreads : int
592 Number of threads to use. If 0, use the system default (typically the number
593 of hardware threads on the compute node).
594
595 Returns
596 -------
597 numpy.ndarray (same shape and data type as `a`)
598 The transformed data. The shape is identical to that of the input array.
599 )""";
600
601 const char *separable_hartley_DS = R"""(Performs a separable Hartley transform.
602 For every requested axis, a 1D forward Fourier transform is carried out, and
603 the real and imaginary parts of the result are added before the next axis is
604 processed.
605
606 Parameters
607 ----------
608 a : numpy.ndarray (any real type)
609 The input data
610 axes : list of integers
611 The axes along which the transform is carried out.
612 If not set, all axes will be transformed.
613 inorm : int
614 Normalization type
615 | 0 : no normalization
616 | 1 : divide by sqrt(N)
617 | 2 : divide by N
618
619 where N is the product of the lengths of the transformed axes.
620 out : numpy.ndarray (same shape and data type as `a`)
621 May be identical to `a`, but if it isn't, it must not overlap with `a`.
622 If None, a new array is allocated to store the output.
623 nthreads : int
624 Number of threads to use. If 0, use the system default (typically the number
625 of hardware threads on the compute node).
626
627 Returns
628 -------
629 numpy.ndarray (same shape and data type as `a`)
630 The transformed data
631 )""";
632
633 const char *genuine_hartley_DS = R"""(Performs a full Hartley transform.
634 A full Fourier transform is carried out over the requested axes, and the
635 sum of real and imaginary parts of the result is stored in the output
636 array. For a single transformed axis, this is identical to `separable_hartley`,
637 but when transforming multiple axes, the results are different.
638
639 Parameters
640 ----------
641 a : numpy.ndarray (any real type)
642 The input data
643 axes : list of integers
644 The axes along which the transform is carried out.
645 If not set, all axes will be transformed.
646 inorm : int
647 Normalization type
648 | 0 : no normalization
649 | 1 : divide by sqrt(N)
650 | 2 : divide by N
651
652 where N is the product of the lengths of the transformed axes.
653 out : numpy.ndarray (same shape and data type as `a`)
654 May be identical to `a`, but if it isn't, it must not overlap with `a`.
655 If None, a new array is allocated to store the output.
656 nthreads : int
657 Number of threads to use. If 0, use the system default (typically the number
658 of hardware threads on the compute node).
659
660 Returns
661 -------
662 numpy.ndarray (same shape and data type as `a`)
663 The transformed data
664 )""";
665
666 const char *dct_DS = R"""(Performs a discrete cosine transform.
667
668 Parameters
669 ----------
670 a : numpy.ndarray (any real type)
671 The input data
672 type : integer
673 the type of DCT. Must be in [1; 4].
674 axes : list of integers
675 The axes along which the transform is carried out.
676 If not set, all axes will be transformed.
677 inorm : integer
678 the normalization type
679 | 0 : no normalization
680 | 1 : make transform orthogonal and divide by sqrt(N)
681 | 2 : divide by N
682
683 where N is the product of n_i for every transformed axis i.
684 n_i is 2*(<axis_length>-1 for type 1 and 2*<axis length>
685 for types 2, 3, 4.
686 Making the transform orthogonal involves the following additional steps
687 for every 1D sub-transform:
688
689 Type 1
690 multiply first and last input value by sqrt(2);
691 divide first and last output value by sqrt(2)
692 Type 2
693 divide first output value by sqrt(2)
694 Type 3
695 multiply first input value by sqrt(2)
696 Type 4
697 nothing
698
699 out : numpy.ndarray (same shape and data type as `a`)
700 May be identical to `a`, but if it isn't, it must not overlap with `a`.
701 If None, a new array is allocated to store the output.
702 nthreads : int
703 Number of threads to use. If 0, use the system default (typically the number
704 of hardware threads on the compute node).
705
706 Returns
707 -------
708 numpy.ndarray (same shape and data type as `a`)
709 The transformed data
710 )""";
711
712 const char *dst_DS = R"""(Performs a discrete sine transform.
713
714 Parameters
715 ----------
716 a : numpy.ndarray (any real type)
717 The input data
718 type : integer
719 the type of DST. Must be in [1; 4].
720 axes : list of integers
721 The axes along which the transform is carried out.
722 If not set, all axes will be transformed.
723 inorm : int
724 Normalization type
725 | 0 : no normalization
726 | 1 : make transform orthogonal and divide by sqrt(N)
727 | 2 : divide by N
728
729 where N is the product of n_i for every transformed axis i.
730 n_i is 2*(<axis_length>+1 for type 1 and 2*<axis length>
731 for types 2, 3, 4.
732 Making the transform orthogonal involves the following additional steps
733 for every 1D sub-transform:
734
735 Type 1
736 nothing
737 Type 2
738 divide first output value by sqrt(2)
739 Type 3
740 multiply first input value by sqrt(2)
741 Type 4
742 nothing
743
744 out : numpy.ndarray (same shape and data type as `a`)
745 May be identical to `a`, but if it isn't, it must not overlap with `a`.
746 If None, a new array is allocated to store the output.
747 nthreads : int
748 Number of threads to use. If 0, use the system default (typically the number
749 of hardware threads on the compute node).
750
751 Returns
752 -------
753 numpy.ndarray (same shape and data type as `a`)
754 The transformed data
755 )""";
756
757 const char *convolve_axis_DS = R"""(Performs a circular convolution along one axis.
758
759 The result is equivalent to
760
761 .. code-block:: Python
762
763 import scipy.ndimage
764 import scipy.signal
765 import scipy.fft
766 kernel = scipy.fft.fftshift(kernel)
767 tmp = scipy.ndimage.convolve1d(in, kernel, axis, mode='wrap')
768 out[()] = scipy.signal.resample(tmp, out.shape[axis], axis=axis)
769 return out
770
771 Parameters
772 ----------
773 in : numpy.ndarray (any real or complex type)
774 The input data
775 out : numpy.ndarray (same type as `in`)
776 The output data. Must have the same shape as `in` except for the axis
777 to be convolved
778 axis : integer
779 The axis along which the convolution is carried out.
780 kernel : one-dimensional numpy.ndarray (same type as `in`)
781 The kernel to be used for convolution
782 The length of this array must be equal to in.shape[axis]
783 nthreads : int
784 Number of threads to use. If 0, use the system default (typically the number
785 of hardware threads on the compute node).
786
787 Returns
788 -------
789 numpy.ndarray (identical to `out`)
790 The convolved input
791
792 Notes
793 -----
794 The main purpose of this routine is efficiency: the combination of the above
795 operations can be carried out more quickly than running the individual
796 operations in succession.
797
798 If `in.shape[axis]!=out.shape[axis]`, the appropriate amount of zero-padding or
799 truncation will be carried out after the convolution step.
800
801 `in` and `out` may overlap in memory. If they do, their first elements must
802 be at the same memory location, and all their strides must be equal.
803 )""";
804
805 const char * good_size_DS = R"""(Returns a good length to pad an FFT to.
806
807 Parameters
808 ----------
809 n : int
810 Minimum transform length
811 real : bool, optional
812 True if either input or output of FFT should be fully real.
813
814 Returns
815 -------
816 out : int
817 The smallest fast size >= n
818
819 )""";
820
821 } // unnamed namespace
822
add_fft(py::module_ & msup)823 void add_fft(py::module_ &msup)
824 {
825 using namespace pybind11::literals;
826 auto m = msup.def_submodule("fft");
827 m.doc() = fft_DS;
828 m.def("c2c", c2c, c2c_DS, "a"_a, "axes"_a=None, "forward"_a=true,
829 "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
830 m.def("r2c", r2c, r2c_DS, "a"_a, "axes"_a=None, "forward"_a=true,
831 "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
832 m.def("c2r", c2r, c2r_DS, "a"_a, "axes"_a=None, "lastsize"_a=0,
833 "forward"_a=true, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
834 m.def("r2r_fftpack", r2r_fftpack, r2r_fftpack_DS, "a"_a, "axes"_a,
835 "real2hermitian"_a, "forward"_a, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
836 m.def("r2r_fftw", r2r_fftw, r2r_fftw_DS, "a"_a, "axes"_a,
837 "forward"_a, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
838 m.def("separable_hartley", separable_hartley, separable_hartley_DS, "a"_a,
839 "axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
840 m.def("genuine_hartley", genuine_hartley, genuine_hartley_DS, "a"_a,
841 "axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
842 m.def("dct", dct, dct_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0,
843 "out"_a=None, "nthreads"_a=1);
844 m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0,
845 "out"_a=None, "nthreads"_a=1);
846 m.def("convolve_axis", convolve_axis, convolve_axis_DS, "in"_a, "out"_a,
847 "axis"_a, "kernel"_a, "nthreads"_a=1);
848
849 static PyMethodDef good_size_meth[] =
850 {{"good_size", good_size, METH_VARARGS, good_size_DS}, {0, 0, 0, 0}};
851 PyModule_AddFunctions(m.ptr(), good_size_meth);
852 }
853
854 }
855
856 using detail_pymodule_fft::add_fft;
857
858 }
859