1 /* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 #include "gmp.h" 27 #include "gmp-impl.h" 28 29 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */ 30 #ifndef mpn_divexact_by3 31 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3 32 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0) 33 #else 34 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3) 35 #endif 36 #endif 37 38 /* Interpolation for Toom-3.5, using the evaluation points: infinity, 39 1, -1, 2, -2. More precisely, we want to compute 40 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the 41 six values 42 43 w5 = f(0), 44 w4 = f(-1), 45 w3 = f(1) 46 w2 = f(-2), 47 w1 = f(2), 48 w0 = limit at infinity of f(x) / x^5, 49 50 The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at 51 {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at 52 {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most 53 significant limbs small). f(-1) and f(-2) may be negative, signs 54 determined by the flag bits. All intermediate results are positive. 55 Inputs are destroyed. 56 57 Interpolation sequence was taken from the paper: "Integer and 58 Polynomial Multiplication: Towards Optimal Toom-Cook Matrices". 59 Some slight variations were introduced: adaptation to "gmp 60 instruction set", and a final saving of an operation by interlacing 61 interpolation and recomposition phases. 62 */ 63 64 void 65 mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags, 66 mp_ptr w4, mp_ptr w2, mp_ptr w1, 67 mp_size_t w0n) 68 { 69 mp_limb_t cy; 70 /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */ 71 mp_limb_t cy4, cy6, embankment; 72 73 ASSERT( n > 0 ); 74 ASSERT( 2*n >= w0n && w0n > 0 ); 75 76 #define w5 pp /* 2n */ 77 #define w3 (pp + 2 * n) /* 2n+1 */ 78 #define w0 (pp + 5 * n) /* w0n */ 79 80 /* Interpolate with sequence: 81 W2 =(W1 - W2)>>2 82 W1 =(W1 - W5)>>1 83 W1 =(W1 - W2)>>1 84 W4 =(W3 - W4)>>1 85 W2 =(W2 - W4)/3 86 W3 = W3 - W4 - W5 87 W1 =(W1 - W3)/3 88 // Last steps are mixed with recomposition... 89 W2 = W2 - W0<<2 90 W4 = W4 - W2 91 W3 = W3 - W1 92 W2 = W2 - W0 93 */ 94 95 /* W2 =(W1 - W2)>>2 */ 96 if (flags & toom6_vm2_neg) 97 mpn_add_n (w2, w1, w2, 2 * n + 1); 98 else 99 mpn_sub_n (w2, w1, w2, 2 * n + 1); 100 mpn_rshift (w2, w2, 2 * n + 1, 2); 101 102 /* W1 =(W1 - W5)>>1 */ 103 w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n); 104 mpn_rshift (w1, w1, 2 * n + 1, 1); 105 106 /* W1 =(W1 - W2)>>1 */ 107 #if HAVE_NATIVE_mpn_rsh1sub_n 108 mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1); 109 #else 110 mpn_sub_n (w1, w1, w2, 2 * n + 1); 111 mpn_rshift (w1, w1, 2 * n + 1, 1); 112 #endif 113 114 /* W4 =(W3 - W4)>>1 */ 115 if (flags & toom6_vm1_neg) 116 { 117 #if HAVE_NATIVE_mpn_rsh1add_n 118 mpn_rsh1add_n (w4, w3, w4, 2 * n + 1); 119 #else 120 mpn_add_n (w4, w3, w4, 2 * n + 1); 121 mpn_rshift (w4, w4, 2 * n + 1, 1); 122 #endif 123 } 124 else 125 { 126 #if HAVE_NATIVE_mpn_rsh1sub_n 127 mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1); 128 #else 129 mpn_sub_n (w4, w3, w4, 2 * n + 1); 130 mpn_rshift (w4, w4, 2 * n + 1, 1); 131 #endif 132 } 133 134 /* W2 =(W2 - W4)/3 */ 135 mpn_sub_n (w2, w2, w4, 2 * n + 1); 136 mpn_divexact_by3 (w2, w2, 2 * n + 1); 137 138 /* W3 = W3 - W4 - W5 */ 139 mpn_sub_n (w3, w3, w4, 2 * n + 1); 140 w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n); 141 142 /* W1 =(W1 - W3)/3 */ 143 mpn_sub_n (w1, w1, w3, 2 * n + 1); 144 mpn_divexact_by3 (w1, w1, 2 * n + 1); 145 146 /* 147 [1 0 0 0 0 0; 148 0 1 0 0 0 0; 149 1 0 1 0 0 0; 150 0 1 0 1 0 0; 151 1 0 1 0 1 0; 152 0 0 0 0 0 1] 153 154 pp[] prior to operations: 155 |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__| 156 157 summation scheme for remaining operations: 158 |______________5|n_____4|n_____3|n_____2|n______|n______|pp 159 |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__| 160 || H w4 | L w4 | 161 || H w2 | L w2 | 162 || H w1 | L w1 | 163 ||-H w1 |-L w1 | 164 |-H w0 |-L w0 ||-H w2 |-L w2 | 165 */ 166 cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1); 167 MPN_INCR_U (pp + 3 * n + 1, n, cy); 168 169 /* W2 -= W0<<2 */ 170 #if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n 171 #if HAVE_NATIVE_mpn_sublsh2_n 172 cy = mpn_sublsh2_n(w2, w2, w0, w0n); 173 #else 174 cy = mpn_sublsh_n(w2, w2, w0, w0n, 2); 175 #endif 176 #else 177 /* {W4,2*n+1} is now free and can be overwritten. */ 178 cy = mpn_lshift(w4, w0, w0n, 2); 179 cy+= mpn_sub_n(w2, w2, w4, w0n); 180 #endif 181 MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy); 182 183 /* W4L = W4L - W2L */ 184 cy = mpn_sub_n (pp + n, pp + n, w2, n); 185 MPN_DECR_U (w3, 2 * n + 1, cy); 186 187 /* W3H = W3H + W2L */ 188 cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n); 189 /* W1L + W2H */ 190 cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n); 191 MPN_INCR_U (w1 + n, n + 1, cy); 192 193 /* W0 = W0 + W1H */ 194 if (LIKELY (w0n > n)) 195 cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n); 196 else 197 cy6 = mpn_add_n (w0, w0, w1 + n, w0n); 198 199 /* 200 summation scheme for the next operation: 201 |...____5|n_____4|n_____3|n_____2|n______|n______|pp 202 |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__| 203 ...-w0___|-w1_w2 | 204 */ 205 /* if(LIKELY(w0n>n)) the two operands below DO overlap! */ 206 cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n); 207 208 /* embankment is a "dirty trick" to avoid carry/borrow propagation 209 beyond allocated memory */ 210 embankment = w0[w0n - 1] - 1; 211 w0[w0n - 1] = 1; 212 if (LIKELY (w0n > n)) { 213 if ( LIKELY(cy4 > cy6) ) 214 MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6); 215 else 216 MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4); 217 MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy); 218 MPN_INCR_U (w0 + n, w0n - n, cy6); 219 } else { 220 MPN_INCR_U (pp + 4 * n, w0n + n, cy4); 221 MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6); 222 } 223 w0[w0n - 1] += embankment; 224 225 #undef w5 226 #undef w3 227 #undef w0 228 229 } 230