1
2 #ifndef NTL_FFT_impl__H
3 #define NTL_FFT_impl__H
4
5 #include <NTL/tools.h>
6
7 NTL_OPEN_NNS
8
9 #ifdef NTL_ENABLE_AVX_FFT
10
11 #if (!defined(NTL_HAVE_AVX512F) && !(defined(NTL_HAVE_AVX2) && defined(NTL_HAVE_FMA)))
12 #error "NTL_ENABLE_AVX_FFT: not supported on this platform"
13 #endif
14
15 #if (defined(NTL_HAVE_AVX512F) && !defined(NTL_AVOID_AVX512))
16 #define NTL_LG2_PDSZ (3)
17 #else
18 #define NTL_LG2_PDSZ (2)
19 #endif
20
21 #define NTL_FFT_RDUP (NTL_LG2_PDSZ+3)
22 #define NTL_PDSZ (1 << NTL_LG2_PDSZ)
23
24 #else
25
26 #define NTL_FFT_RDUP (4)
27 // Currently, this should be at least 2 to support
28 // loop unrolling in the FFT implementation
29
30 #endif
31
32 inline
FFTRoundUp(long xn,long k)33 long FFTRoundUp(long xn, long k)
34 // Assumes k >= 0.
35 // Returns an integer m such that 1 <= m <= n = 2^k and
36 // m divsisible my 2^NTL_FFT_RDUP.
37 // Also, if xn <= n, then m >= xn.
38 {
39 long n = 1L << k;
40 if (xn <= 0) xn = 1;
41
42 xn = ((xn+((1L << NTL_FFT_RDUP)-1)) >> NTL_FFT_RDUP) << NTL_FFT_RDUP;
43
44 if (k >= 10) {
45 if (xn > n - (n >> 4)) xn = n;
46 }
47 else {
48 if (xn > n - (n >> 3)) xn = n;
49 }
50 // truncation just a bit below n does not really help
51 // at all, and can sometimes slow things down slightly, so round up
52 // to n. This also takes care of cases where xn > n.
53 // Actually, for smallish n, we should round up sooner,
54 // at n-n/8, and for larger n, we should round up later,
55 // at n-m/16. At least, experimentally, this is what I see.
56
57 return xn;
58 }
59
60
61 NTL_CLOSE_NNS
62
63 #endif
64