1 /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42. 2 3 Contributed to the GNU project by Robert Harley. 4 Improvements by Paul Zimmermann and Marco Bodrato. 5 6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2009 Free Software 11 Foundation, Inc. 12 13 This file is part of the GNU MP Library. 14 15 The GNU MP Library is free software; you can redistribute it and/or modify it 16 under the terms of the GNU Lesser General Public License as published by the 17 Free Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 The GNU MP Library is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 22 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 23 for more details. 24 25 You should have received a copy of the GNU Lesser General Public License 26 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 27 28 #include "gmp.h" 29 #include "gmp-impl.h" 30 31 void 32 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1, 33 mp_size_t k, mp_size_t twor, int sa, 34 mp_limb_t vinf0) 35 { 36 mp_limb_t cy, saved; 37 mp_size_t twok; 38 mp_size_t kk1; 39 mp_ptr c1, v1, c3, vinf; 40 41 twok = k + k; 42 kk1 = twok + 1; 43 44 c1 = c + k; 45 v1 = c1 + k; 46 c3 = v1 + k; 47 vinf = c3 + k; 48 49 #define v0 (c) 50 /* (1) v2 <- v2-vm1 < v2+|vm1|, (16 8 4 2 1) - (1 -1 1 -1 1) = 51 thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k) (15 9 3 3 0) 52 */ 53 if (sa) 54 ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1)); 55 else 56 ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1)); 57 58 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 59 v0 v1 hi(vinf) |vm1| v2-vm1 EMPTY */ 60 61 ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */ 62 /* (5 3 1 1 0)*/ 63 64 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 65 v0 v1 hi(vinf) |vm1| (v2-vm1)/3 EMPTY */ 66 67 /* (2) vm1 <- tm1 := (v1 - vm1) / 2 [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 = 68 tm1 >= 0 (0 1 0 1 0) 69 No carry comes out from {v1, kk1} +/- {vm1, kk1}, 70 and the division by two is exact. 71 If (sa!=0) the sign of vm1 is negative */ 72 if (sa) 73 { 74 #ifdef HAVE_NATIVE_mpn_rsh1add_n 75 mpn_rsh1add_n (vm1, v1, vm1, kk1); 76 #else 77 ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1)); 78 ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1)); 79 #endif 80 } 81 else 82 { 83 #ifdef HAVE_NATIVE_mpn_rsh1sub_n 84 mpn_rsh1sub_n (vm1, v1, vm1, kk1); 85 #else 86 ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1)); 87 ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1)); 88 #endif 89 } 90 91 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 92 v0 v1 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ 93 94 /* (3) v1 <- t1 := v1 - v0 (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0) 95 t1 >= 0 96 */ 97 vinf[0] -= mpn_sub_n (v1, v1, c, twok); 98 99 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 100 v0 v1-v0 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ 101 102 /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6 103 t2 >= 0 [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0) 104 */ 105 #ifdef HAVE_NATIVE_mpn_rsh1sub_n 106 mpn_rsh1sub_n (v2, v2, v1, kk1); 107 #else 108 ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1)); 109 ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1)); 110 #endif 111 112 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 113 v0 v1-v0 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */ 114 115 /* (5) v1 <- t1-tm1 (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0) 116 result is v1 >= 0 117 */ 118 ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1)); 119 120 /* We do not need to read the value in vm1, so we add it in {c+k, ...} */ 121 cy = mpn_add_n (c1, c1, vm1, kk1); 122 MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */ 123 /* Memory allocated for vm1 is now free, it can be recycled ...*/ 124 125 /* (6) v2 <- v2 - 2*vinf, (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0) 126 result is v2 >= 0 */ 127 saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */ 128 vinf[0] = vinf0; /* Set the right value for vinf0 */ 129 #ifdef HAVE_NATIVE_mpn_sublsh1_n 130 cy = mpn_sublsh1_n (v2, v2, vinf, twor); 131 #else 132 /* Overwrite unused vm1 */ 133 cy = mpn_lshift (vm1, vinf, twor, 1); 134 cy += mpn_sub_n (v2, v2, vm1, twor); 135 #endif 136 MPN_DECR_U (v2 + twor, kk1 - twor, cy); 137 138 /* Current matrix is 139 [1 0 0 0 0; vinf 140 0 1 0 0 0; v2 141 1 0 1 0 0; v1 142 0 1 0 1 0; vm1 143 0 0 0 0 1] v0 144 Some vaues already are in-place (we added vm1 in the correct position) 145 | vinf| v1 | v0 | 146 | vm1 | 147 One still is in a separated area 148 | +v2 | 149 We have to compute v1-=vinf; vm1 -= v2, 150 |-vinf| 151 | -v2 | 152 Carefully reordering operations we can avoid to compute twice the sum 153 of the high half of v2 plus the low half of vinf. 154 */ 155 156 /* Add the high half of t2 in {vinf} */ 157 if ( LIKELY(twor > k + 1) ) { /* This is the expected flow */ 158 cy = mpn_add_n (vinf, vinf, v2 + k, k + 1); 159 MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */ 160 } else { /* triggered only by very unbalanced cases like 161 (k+k+(k-2))x(k+k+1) , should be handled by toom32 */ 162 ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor)); 163 } 164 /* (7) v1 <- v1 - vinf, (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0) 165 result is >= 0 */ 166 /* Side effect: we also subtracted (high half) vm1 -= v2 */ 167 cy = mpn_sub_n (v1, v1, vinf, twor); /* vinf is at most twor long. */ 168 vinf0 = vinf[0]; /* Save again the right value for vinf0 */ 169 vinf[0] = saved; 170 MPN_DECR_U (v1 + twor, kk1 - twor, cy); /* Treat the last bytes. */ 171 172 /* (8) vm1 <- vm1-v2 (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0) 173 Operate only on the low half. 174 */ 175 cy = mpn_sub_n (c1, c1, v2, k); 176 MPN_DECR_U (v1, kk1, cy); 177 178 /********************* Beginning the final phase **********************/ 179 180 /* Most of the recomposition was done */ 181 182 /* add t2 in {c+3k, ...}, but only the low half */ 183 cy = mpn_add_n (c3, c3, v2, k); 184 vinf[0] += cy; 185 ASSERT(vinf[0] >= cy); /* No carry */ 186 MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */ 187 188 #undef v0 189 } 190