1 /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42. 2 3 Contributed to the GNU project by Robert Harley. 4 Improvements by Paul Zimmermann and Marco Bodrato. 5 6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007 Free Software Foundation, 11 Inc. 12 13 This file is part of the GNU MP Library. 14 15 The GNU MP Library is free software; you can redistribute it and/or modify it 16 under the terms of the GNU Lesser General Public License as published by the 17 Free Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 The GNU MP Library is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 22 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 23 for more details. 24 25 You should have received a copy of the GNU Lesser General Public License 26 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 27 28 #include "gmp.h" 29 #include "gmp-impl.h" 30 31 void 32 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1, 33 mp_size_t k, mp_size_t twor, int sa, 34 mp_limb_t vinf0, mp_ptr ws) 35 { 36 mp_limb_t cy, saved; 37 mp_size_t twok = k + k; 38 mp_size_t kk1 = twok + 1; 39 mp_ptr c1, v1, c3, vinf, c5; 40 mp_limb_t cout; /* final carry, should be zero at the end */ 41 42 c1 = c + k; 43 v1 = c1 + k; 44 c3 = v1 + k; 45 vinf = c3 + k; 46 c5 = vinf + k; 47 48 #define v0 (c) 49 /* (1) v2 <- v2-vm1 < v2+|vm1|, (16 8 4 2 1) - (1 -1 1 -1 1) = 50 thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k) (15 9 3 3 0) 51 */ 52 if (sa <= 0) 53 mpn_add_n (v2, v2, vm1, kk1); 54 else 55 mpn_sub_n (v2, v2, vm1, kk1); 56 57 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 58 v0 v1 hi(vinf) |vm1| v2-vm1 EMPTY */ 59 60 ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */ 61 /* (5 3 1 1 0)*/ 62 63 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 64 v0 v1 hi(vinf) |vm1| (v2-vm1)/3 EMPTY */ 65 66 /* (2) vm1 <- tm1 := (v1 - sa*vm1) / 2 [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 = 67 tm1 >= 0 (0 1 0 1 0) 68 No carry comes out from {v1, kk1} +/- {vm1, kk1}, 69 and the division by two is exact */ 70 if (sa <= 0) 71 { 72 #ifdef HAVE_NATIVE_mpn_rsh1add_n 73 mpn_rsh1add_n (vm1, v1, vm1, kk1); 74 #else 75 mpn_add_n (vm1, v1, vm1, kk1); 76 mpn_rshift (vm1, vm1, kk1, 1); 77 #endif 78 } 79 else 80 { 81 #ifdef HAVE_NATIVE_mpn_rsh1sub_n 82 mpn_rsh1sub_n (vm1, v1, vm1, kk1); 83 #else 84 mpn_sub_n (vm1, v1, vm1, kk1); 85 mpn_rshift (vm1, vm1, kk1, 1); 86 #endif 87 } 88 89 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 90 v0 v1 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ 91 92 /* (3) v1 <- t1 := v1 - v0 (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0) 93 t1 >= 0 94 */ 95 vinf[0] -= mpn_sub_n (v1, v1, c, twok); 96 97 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 98 v0 v1-v0 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ 99 100 /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6 101 t2 >= 0 [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0) 102 */ 103 #ifdef HAVE_NATIVE_mpn_rsh1sub_n 104 mpn_rsh1sub_n (v2, v2, v1, kk1); 105 #else 106 mpn_sub_n (v2, v2, v1, kk1); 107 mpn_rshift (v2, v2, kk1, 1); 108 #endif 109 110 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 111 v0 v1-v0 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */ 112 113 /* (5) v1 <- t1-tm1 (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0) 114 result is v1 >= 0 115 */ 116 mpn_sub_n (v1, v1, vm1, kk1); 117 118 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 119 v0 v1-v0-tm1 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */ 120 121 /* (6) v2 <- v2 - 2*vinf, (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0) 122 result is v2 >= 0 */ 123 saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */ 124 vinf[0] = vinf0; /* Set the right value for vinf0 */ 125 #ifdef HAVE_NATIVE_mpn_sublsh1_n 126 cy = mpn_sublsh1_n (v2, v2, vinf, twor); 127 #else 128 cy = mpn_lshift (ws, vinf, twor, 1); 129 cy += mpn_sub_n (v2, v2, ws, twor); 130 #endif 131 MPN_DECR_U (v2 + twor, kk1 - twor, cy); 132 133 /* (7) v1 <- v1 - vinf, (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0) 134 result is >= 0 */ 135 cy = mpn_sub_n (v1, v1, vinf, twor); /* vinf is at most twor long. */ 136 vinf[0] = saved; 137 MPN_DECR_U (v1 + twor, kk1 - twor, cy); /* Treat the last bytes. */ 138 __GMPN_ADD_1 (cout, vinf, vinf, twor, vinf0); /* Add vinf0, propagate carry. */ 139 140 /* (8) vm1 <- vm1-t2 (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0) 141 vm1 >= 0 142 */ 143 mpn_sub_n (vm1, vm1, v2, kk1); /* No overlapping here. */ 144 145 /********************* Beginning the final phase **********************/ 146 147 /* {c,2k} {c+2k,2k } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 148 v0 t1 hi(t1)+vinf tm1 (v2-vm1-3t1)/6 EMPTY */ 149 150 /* (9) add t2 in {c+3k, ...} */ 151 cy = mpn_add_n (c3, c3, v2, kk1); 152 __GMPN_ADD_1 (cout, c5 + 1, c5 + 1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */ 153 154 /* {c,2k} {c+2k,2k } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 155 v0 t1 hi(t1)+vinf tm1 (v2-vm1-3t1)/6 EMPTY */ 156 /* c c+k c+2k c+3k c+4k t t+2k+1 t+4k+2 157 v0 t1 vinf tm1 t2 158 +t2 */ 159 160 /* add vm1 in {c+k, ...} */ 161 cy = mpn_add_n (c1, c1, vm1, kk1); 162 __GMPN_ADD_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */ 163 164 /* c c+k c+2k c+3k c+4k t t+2k+1 t+4k+2 165 v0 t1 vinf tm1 t2 166 +tm1 +t2 */ 167 168 #undef v0 169 #undef t2 170 } 171