1 
2 #ifndef NTL_FFT_impl__H
3 #define NTL_FFT_impl__H
4 
5 #include <NTL/tools.h>
6 
7 NTL_OPEN_NNS
8 
9 #ifdef NTL_ENABLE_AVX_FFT
10 
11 #if (!defined(NTL_HAVE_AVX512F) && !(defined(NTL_HAVE_AVX2) && defined(NTL_HAVE_FMA)))
12 #error "NTL_ENABLE_AVX_FFT: not supported on this platform"
13 #endif
14 
15 #if (defined(NTL_HAVE_AVX512F) && !defined(NTL_AVOID_AVX512))
16 #define NTL_LG2_PDSZ (3)
17 #else
18 #define NTL_LG2_PDSZ (2)
19 #endif
20 
21 #define NTL_FFT_RDUP (NTL_LG2_PDSZ+3)
22 #define NTL_PDSZ (1 << NTL_LG2_PDSZ)
23 
24 #else
25 
26 #define NTL_FFT_RDUP (4)
27 // Currently, this should be at least 2 to support
28 // loop unrolling in the FFT implementation
29 
30 #endif
31 
32 inline
FFTRoundUp(long xn,long k)33 long FFTRoundUp(long xn, long k)
34 // Assumes k >= 0.
35 // Returns an integer m such that 1 <= m <= n = 2^k and
36 // m divsisible my 2^NTL_FFT_RDUP.
37 // Also, if xn <= n, then m >= xn.
38 {
39    long n = 1L << k;
40    if (xn <= 0) xn = 1;
41 
42    xn = ((xn+((1L << NTL_FFT_RDUP)-1)) >> NTL_FFT_RDUP) << NTL_FFT_RDUP;
43 
44    if (k >= 10) {
45       if (xn > n - (n >> 4)) xn = n;
46    }
47    else {
48       if (xn > n - (n >> 3)) xn = n;
49    }
50    // truncation just a bit below n does not really help
51    // at all, and can sometimes slow things down slightly, so round up
52    // to n.  This also takes care of cases where xn > n.
53    // Actually, for smallish n, we should round up sooner,
54    // at n-n/8, and for larger n, we should round up later,
55    // at n-m/16.  At least, experimentally, this is what I see.
56 
57    return xn;
58 }
59 
60 
61 NTL_CLOSE_NNS
62 
63 #endif
64