1 /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 #include "gmp.h" 27 #include "gmp-impl.h" 28 29 #define BINVERT_3 MODLIMB_INVERSE_3 30 31 #define BINVERT_15 \ 32 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15) 33 34 #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK) 35 36 #ifndef mpn_divexact_by3 37 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 38 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0) 39 #else 40 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3) 41 #endif 42 #endif 43 44 #ifndef mpn_divexact_by45 45 #if GMP_NUMB_BITS % 12 == 0 46 #define mpn_divexact_by45(dst,src,size) \ 47 (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45))) 48 #else 49 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 50 #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0) 51 #else 52 #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45) 53 #endif 54 #endif 55 #endif 56 57 #if HAVE_NATIVE_mpn_sublsh_n 58 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,src,n,s) 59 #else 60 static mp_limb_t 61 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 62 { 63 #if USE_MUL_1 64 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s)); 65 #else 66 mp_limb_t __cy; 67 __cy = mpn_lshift (ws,src,n,s); 68 return __cy + mpn_sub_n (dst,dst,ws,n); 69 #endif 70 } 71 #endif 72 73 74 #if HAVE_NATIVE_mpn_subrsh 75 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s) 76 #else 77 /* This is not a correct definition, it assumes no carry */ 78 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \ 79 do { \ 80 mp_limb_t __cy; \ 81 MPN_DECR_U (dst, nd, src[0] >> s); \ 82 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \ 83 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \ 84 } while (0) 85 #endif 86 87 /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation 88 points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely, 89 we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of 90 degree 7 (or 6), given the 8 (rsp. 7) values: 91 92 r1 = limit at infinity of f(x) / x^7, 93 r2 = f(4), 94 r3 = f(-4), 95 r4 = f(2), 96 r5 = f(-2), 97 r6 = f(1), 98 r7 = f(-1), 99 r8 = f(0). 100 101 All couples of the form f(n),f(-n) must be already mixed with 102 toom_couple_handling(f(n),...,f(-n),...) 103 104 The result is stored in {pp, spt + 7*n (or 6*n)}. 105 At entry, r8 is stored at {pp, 2n}, 106 r5 is stored at {pp + 3n, 3n + 1}. 107 108 The other values are 2n+... limbs each (with most significant limbs small). 109 110 All intermediate results are positive. 111 Inputs are destroyed. 112 */ 113 114 void 115 mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n, 116 mp_ptr r3, mp_ptr r7, 117 mp_size_t spt, mp_ptr ws) 118 { 119 mp_limb_signed_t cy; 120 mp_ptr r5, r1; 121 r5 = (pp + 3 * n); /* 3n+1 */ 122 r1 = (pp + 7 * n); /* spt */ 123 124 /******************************* interpolation *****************************/ 125 126 DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws); 127 cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws); 128 MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy); 129 130 DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws); 131 cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws); 132 MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy); 133 134 r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n); 135 cy = mpn_sub_n (r7, r7, r1, spt); 136 MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy); 137 138 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1)); 139 ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2)); 140 141 ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1)); 142 143 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1)); 144 145 mpn_divexact_by45 (r3, r3, 3 * n + 1); 146 147 ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1)); 148 149 ASSERT_NOCARRY(DO_mpn_sublsh_n (r5, r3, 3 * n + 1, 2, ws)); 150 151 /* last interpolation steps... */ 152 /* ... are mixed with recomposition */ 153 154 /***************************** recomposition *******************************/ 155 /* 156 pp[] prior to operations: 157 |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp 158 159 summation scheme for remaining operations: 160 |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp 161 |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp 162 ||_H r3|_M r3|_L*r3| 163 ||_H_r7|_M_r7|_L_r7| 164 ||-H r3|-M r3|-L*r3| 165 ||-H*r5|-M_r5|-L_r5| 166 */ 167 168 cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */ 169 cy-= mpn_sub_n (pp + n, pp + n, r5, n); 170 if (0 > cy) 171 MPN_DECR_U (r7 + n, 2*n + 1, 1); 172 else 173 MPN_INCR_U (r7 + n, 2*n + 1, cy); 174 175 cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */ 176 MPN_DECR_U (r7 + 2*n, n + 1, cy); 177 178 cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */ 179 r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */ 180 cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */ 181 if (UNLIKELY(0 > cy)) 182 MPN_DECR_U (r5 + n + 1, 2*n, 1); 183 else 184 MPN_INCR_U (r5 + n + 1, 2*n, cy); 185 186 ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */ 187 188 cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]); 189 MPN_INCR_U (r3 + 2*n, n + 1, cy); 190 cy = r3[3*n] + mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n); 191 MPN_INCR_U (pp + 8*n, spt - n, cy); 192 } 193