1 /*
2     Copyright (C) 2009, 2011 William Hart
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <https://www.gnu.org/licenses/>.
10 */
11 
12 #include "gmp.h"
13 #include "flint.h"
14 #include "fft.h"
15 
fft_butterfly_sqrt2(mp_limb_t * s,mp_limb_t * t,mp_limb_t * i1,mp_limb_t * i2,mp_size_t i,mp_size_t limbs,flint_bitcnt_t w,mp_limb_t * temp)16 void fft_butterfly_sqrt2(mp_limb_t * s, mp_limb_t * t,
17                     mp_limb_t * i1, mp_limb_t * i2, mp_size_t i,
18                          mp_size_t limbs, flint_bitcnt_t w, mp_limb_t * temp)
19 {
20    flint_bitcnt_t wn = limbs*FLINT_BITS;
21    mp_limb_t cy = 0;
22    mp_size_t j = i/2, k = w/2;
23    mp_size_t y;
24    flint_bitcnt_t b1;
25    int negate = 0;
26 
27    b1 = j + wn/4 + i*k;
28    if (b1 >= wn)
29    {
30       negate = 1;
31       b1 -= wn;
32    }
33    y  = b1/FLINT_BITS;
34    b1 = b1%FLINT_BITS;
35 
36    /* sumdiff and multiply by 2^{j + wn/4 + i*k} */
37    butterfly_lshB(s, t, i1, i2, limbs, 0, y);
38    mpn_mul_2expmod_2expp1(t, t, limbs, b1);
39 
40    /* multiply by 2^{wn/2} */
41    y = limbs/2;
42 
43    flint_mpn_copyi(temp + y, t, limbs - y);
44    temp[limbs] = 0;
45    if (y) cy = mpn_neg_n(temp, t + limbs - y, y);
46    mpn_addmod_2expp1_1(temp + y, limbs - y, -t[limbs]);
47    mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy);
48 
49    /* shift by an additional half limb (rare) */
50    if (limbs & 1)
51        mpn_mul_2expmod_2expp1(temp, temp, limbs, FLINT_BITS/2);
52 
53    /* subtract */
54    if (negate)
55        mpn_sub_n(t, t, temp, limbs + 1);
56    else
57        mpn_sub_n(t, temp, t, limbs + 1);
58 }
59 
fft_truncate_sqrt2(mp_limb_t ** ii,mp_size_t n,flint_bitcnt_t w,mp_limb_t ** t1,mp_limb_t ** t2,mp_limb_t ** temp,mp_size_t trunc)60 void fft_truncate_sqrt2(mp_limb_t ** ii, mp_size_t n, flint_bitcnt_t w,
61        mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, mp_size_t trunc)
62 {
63     mp_size_t i;
64     mp_size_t limbs = (w*n)/GMP_LIMB_BITS;
65 
66     if ((w & 1) == 0)
67     {
68         fft_truncate(ii, 2*n, w/2, t1, t2, trunc);
69         return;
70     }
71 
72     for (i = 0; i < trunc - 2*n; i++)
73     {
74         fft_butterfly(*t1, *t2, ii[i], ii[2*n+i], i/2, limbs, w);
75 
76         SWAP_PTRS(ii[i],     *t1);
77         SWAP_PTRS(ii[i+2*n], *t2);
78 
79         i++;
80 
81         fft_butterfly_sqrt2(*t1, *t2, ii[i], ii[2*n+i], i, limbs, w, *temp);
82 
83         SWAP_PTRS(ii[i],     *t1);
84         SWAP_PTRS(ii[2*n+i], *t2);
85     }
86 
87     for (i = trunc - 2*n; i < 2*n; i++)
88     {
89         fft_adjust(ii[i+2*n], ii[i], i/2, limbs, w);
90 
91         i++;
92 
93         fft_adjust_sqrt2(ii[i+2*n], ii[i], i, limbs, w, *temp);
94     }
95 
96     fft_radix2(ii, n, w, t1, t2);
97     fft_truncate1(ii + 2*n, n, w, t1, t2, trunc - 2*n);
98 }
99