1 /* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62. 2 3 Contributed to the GNU project by Niels M�ller. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2006, 2007, 2009 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 #include "gmp.h" 27 #include "gmp-impl.h" 28 29 /* Arithmetic right shift, requiring that the shifted out bits are zero. */ 30 static inline void 31 divexact_2exp (mp_ptr rp, mp_srcptr sp, mp_size_t n, unsigned shift) 32 { 33 mp_limb_t sign; 34 sign = LIMB_HIGHBIT_TO_MASK (sp[n-1] << GMP_NAIL_BITS) << (GMP_NUMB_BITS - shift); 35 ASSERT_NOCARRY (mpn_rshift (rp, sp, n, shift)); 36 rp[n-1] |= sign & GMP_NUMB_MASK; 37 } 38 39 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */ 40 #ifndef mpn_divexact_by3 41 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3) 42 #endif 43 #ifndef mpn_divexact_by9 44 #define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9) 45 #endif 46 #ifndef mpn_divexact_by15 47 #define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15) 48 #endif 49 50 /* Interpolation for toom4, using the evaluation points infinity, 2, 51 1, -1, 1/2, -1/2. More precisely, we want to compute 52 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the 53 seven values 54 55 w0 = f(0), 56 w1 = 64 f(-1/2), 57 w2 = 64 f(1/2), 58 w3 = f(-1), 59 w4 = f(1) 60 w5 = f(2) 61 w6 = limit at infinity of f(x) / x^6, 62 63 The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n }, 64 w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n, 65 w6n }. The other values are 2n + 1 limbs each (with most 66 significant limbs small). f(-1) and f(-1/2) may be negative, signs 67 determined by the flag bits. All intermediate results are 68 represented in two's complement. Inputs are destroyed. 69 70 Needs (2*n + 1) limbs of temporary storage. 71 */ 72 73 void 74 mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom4_flags flags, 75 mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5, 76 mp_size_t w6n, mp_ptr tp) 77 { 78 mp_size_t m = 2*n + 1; 79 mp_ptr w2 = rp + 2*n; 80 mp_ptr w6 = rp + 6*n; 81 mp_limb_t cy; 82 83 ASSERT (w6n > 0); 84 ASSERT (w6n <= 2*n); 85 86 /* Using Marco Bodrato's formulas 87 88 W5 = W5 + W2 89 W3 =(W3 + W4)/2 90 W1 = W1 + W2 91 W2 = W2 - W6 - W0*64 92 W2 =(W2*2 - W1)/8 93 W4 = W4 - W3 94 95 W5 = W5 - W4*65 96 W4 = W4 - W6 - W0 97 W5 = W5 + W4*45 98 W2 =(W2 - W4)/3 99 W4 = W4 - W2 100 101 W1 = W1 - W5 102 W5 =(W5 - W3*16)/ 18 103 W3 = W3 - W5 104 W1 =(W1/30 + W5)/ 2 105 W5 = W5 - W1 106 107 where W0 = f(0), W1 = 64 f(-1/2), W2 = 64 f(1/2), W3 = f(-1), 108 W4 = f(1), W5 = f(2), W6 = f(oo), 109 */ 110 111 mpn_add_n (w5, w5, w2, m); 112 if (flags & toom4_w3_neg) 113 mpn_add_n (w3, w3, w4, m); 114 else 115 mpn_sub_n (w3, w4, w3, m); 116 divexact_2exp (w3, w3, m, 1); 117 if (flags & toom4_w1_neg) 118 mpn_add_n (w1, w1, w2, m); 119 else 120 mpn_sub_n (w1, w2, w1, m); 121 mpn_sub (w2, w2, m, w6, w6n); 122 tp[2*n] = mpn_lshift (tp, rp, 2*n, 6); 123 mpn_sub_n (w2, w2, tp, m); 124 mpn_lshift (w2, w2, m, 1); 125 mpn_sub_n (w2, w2, w1, m); 126 divexact_2exp (w2, w2, m, 3); 127 mpn_sub_n (w4, w4, w3, m); 128 129 mpn_submul_1 (w5, w4, m, 65); 130 mpn_sub (w4, w4, m, w6, w6n); 131 mpn_sub (w4, w4, m, rp, 2*n); 132 mpn_addmul_1 (w5, w4, m, 45); 133 mpn_sub_n (w2, w2, w4, m); 134 /* Rely on divexact working with two's complement */ 135 mpn_divexact_by3 (w2, w2, m); 136 mpn_sub_n (w4, w4, w2, m); 137 138 mpn_sub_n (w1, w1, w5, m); 139 mpn_lshift (tp, w3, m, 4); 140 mpn_sub_n (w5, w5, tp, m); 141 divexact_2exp (w5, w5, m, 1); 142 mpn_divexact_by9 (w5, w5, m); 143 mpn_sub_n (w3, w3, w5, m); 144 divexact_2exp (w1, w1, m, 1); 145 mpn_divexact_by15 (w1, w1, m); 146 mpn_add_n (w1, w1, w5, m); 147 divexact_2exp (w1, w1, m, 1); 148 mpn_sub_n (w5, w5, w1, m); 149 150 /* Two's complement coefficients must be non-negative at the end of 151 this procedure. */ 152 ASSERT ( !(w1[2*n] & GMP_LIMB_HIGHBIT)); 153 ASSERT ( !(w2[2*n] & GMP_LIMB_HIGHBIT)); 154 ASSERT ( !(w3[2*n] & GMP_LIMB_HIGHBIT)); 155 ASSERT ( !(w4[2*n] & GMP_LIMB_HIGHBIT)); 156 ASSERT ( !(w5[2*n] & GMP_LIMB_HIGHBIT)); 157 158 /* Addition chain. Note carries and the 2n'th limbs that need to be 159 * added in. 160 * 161 * Special care is needed for w2[2n] and the corresponding carry, 162 * since the "simple" way of adding it all together would overwrite 163 * the limb at wp[2*n] and rp[4*n] (same location) with the sum of 164 * the high half of w3 and the low half of w4. 165 * 166 * 7 6 5 4 3 2 1 0 167 * | | | | | | | | | 168 * ||w3 (2n+1)| 169 * ||w4 (2n+1)| 170 * ||w5 (2n+1)| ||w1 (2n+1)| 171 * + | w6 (w6n)| ||w2 (2n+1)| w0 (2n) | (share storage with r) 172 * ----------------------------------------------- 173 * r | | | | | | | | | 174 * c7 c6 c5 c4 c3 Carries to propagate 175 */ 176 177 cy = mpn_add_n (rp + n, rp + n, w1, 2*n); 178 MPN_INCR_U (w2 + n, n + 1, w1[2*n] + cy); 179 cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n); 180 MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy); 181 cy = mpn_add_n (rp + 4*n, w3 + n, w4, n); 182 MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy); 183 cy = mpn_add_n (rp + 5*n, w4 + n, w5, n); 184 MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy); 185 if (w6n > n + 1) 186 { 187 mp_limb_t c7 = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1); 188 MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, c7); 189 } 190 else 191 { 192 ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n)); 193 #if WANT_ASSERT 194 { 195 mp_size_t i; 196 for (i = w6n; i <= n; i++) 197 ASSERT (w5[n + i] == 0); 198 } 199 #endif 200 } 201 } 202