1 /* mpn_toom33_mul -- Multiply {ap,an} and {bp,bn} where an and bn are close in 2 size. Or more accurately, bn <= an < (3/2)bn. 3 4 Contributed to the GNU project by Torbjorn Granlund. 5 6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2006, 2007, 2008 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of the GNU Lesser General Public License as published by 16 the Free Software Foundation; either version 3 of the License, or (at your 17 option) any later version. 18 19 The GNU MP Library is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 22 License for more details. 23 24 You should have received a copy of the GNU Lesser General Public License 25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 26 27 28 /* 29 Things to work on: 30 31 1. Trim allocation. The allocations for as1, asm1, bs1, and bsm1 could be 32 avoided by instead reusing the pp area and the scratch area. 33 2. Use new toom functions for the recursive calls. 34 */ 35 36 #include "gmp.h" 37 #include "gmp-impl.h" 38 39 /* Evaluate in: -1, 0, +1, +2, +inf 40 41 <-s-><--n--><--n--><--n--> 42 ___ ______ ______ ______ 43 |a3_|___a2_|___a1_|___a0_| 44 |_b1_|___b0_| 45 <-t--><--n--> 46 47 v0 = a0 * b0 # A(0)*B(0) 48 v1 = (a0+ a1+ a2)*(b0+ b1+ b2) # A(1)*B(1) ah <= 2 bh <= 2 49 vm1 = (a0- a1+ a2)*(b0- b1+ b2) # A(-1)*B(-1) |ah| <= 1 bh <= 1 50 v2 = (a0+2a1+4a2)*(b0+2b1+4b2) # A(2)*B(2) ah <= 6 bh <= 6 51 vinf= a2 * b2 # A(inf)*B(inf) 52 */ 53 54 #if TUNE_PROGRAM_BUILD 55 #define MAYBE_mul_basecase 1 56 #define MAYBE_mul_toom33 1 57 #else 58 #define MAYBE_mul_basecase \ 59 (MUL_TOOM33_THRESHOLD < 3 * MUL_KARATSUBA_THRESHOLD) 60 #define MAYBE_mul_toom33 \ 61 (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD) 62 #endif 63 64 #define TOOM33_MUL_N_REC(p, a, b, n, ws) \ 65 do { \ 66 if (MAYBE_mul_basecase \ 67 && BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD)) \ 68 mpn_mul_basecase (p, a, n, b, n); \ 69 else if (! MAYBE_mul_toom33 \ 70 || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \ 71 mpn_kara_mul_n (p, a, b, n, ws); \ 72 else \ 73 mpn_toom3_mul_n (p, a, b, n, ws); \ 74 } while (0) 75 76 void 77 mpn_toom33_mul (mp_ptr pp, 78 mp_srcptr ap, mp_size_t an, 79 mp_srcptr bp, mp_size_t bn, 80 mp_ptr scratch) 81 { 82 mp_size_t n, s, t; 83 int vm1_neg; 84 mp_limb_t cy, vinf0; 85 mp_ptr gp; 86 mp_ptr as1, asm1, as2; 87 mp_ptr bs1, bsm1, bs2; 88 TMP_DECL; 89 90 #define a0 ap 91 #define a1 (ap + n) 92 #define a2 (ap + 2*n) 93 #define b0 bp 94 #define b1 (bp + n) 95 #define b2 (bp + 2*n) 96 97 n = (an + 2) / (size_t) 3; 98 99 s = an - 2 * n; 100 t = bn - 2 * n; 101 102 ASSERT (an >= bn); 103 104 ASSERT (0 < s && s <= n); 105 ASSERT (0 < t && t <= n); 106 107 TMP_MARK; 108 109 as1 = TMP_SALLOC_LIMBS (n + 1); 110 asm1 = TMP_SALLOC_LIMBS (n + 1); 111 as2 = TMP_SALLOC_LIMBS (n + 1); 112 113 bs1 = TMP_SALLOC_LIMBS (n + 1); 114 bsm1 = TMP_SALLOC_LIMBS (n + 1); 115 bs2 = TMP_SALLOC_LIMBS (n + 1); 116 117 gp = pp; 118 119 vm1_neg = 0; 120 121 /* Compute as1 and asm1. */ 122 cy = mpn_add (gp, a0, n, a2, s); 123 #if HAVE_NATIVE_mpn_addsub_n 124 if (cy == 0 && mpn_cmp (gp, a1, n) < 0) 125 { 126 cy = mpn_addsub_n (as1, asm1, a1, gp, n); 127 as1[n] = 0; 128 asm1[n] = 0; 129 vm1_neg = 1; 130 } 131 else 132 { 133 cy2 = mpn_addsub_n (as1, asm1, gp, a1, n); 134 as1[n] = cy + (cy2 >> 1); 135 asm1[n] = cy - (cy & 1); 136 } 137 #else 138 as1[n] = cy + mpn_add_n (as1, gp, a1, n); 139 if (cy == 0 && mpn_cmp (gp, a1, n) < 0) 140 { 141 mpn_sub_n (asm1, a1, gp, n); 142 asm1[n] = 0; 143 vm1_neg = 1; 144 } 145 else 146 { 147 cy -= mpn_sub_n (asm1, gp, a1, n); 148 asm1[n] = cy; 149 } 150 #endif 151 152 /* Compute as2. */ 153 #if HAVE_NATIVE_mpn_addlsh1_n 154 cy = mpn_addlsh1_n (as2, a1, a2, s); 155 if (s != n) 156 cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy); 157 cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n); 158 #else 159 cy = mpn_lshift (as2, a2, s, 1); 160 cy += mpn_add_n (as2, a1, as2, s); 161 if (s != n) 162 cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy); 163 cy = 2 * cy + mpn_lshift (as2, as2, n, 1); 164 cy += mpn_add_n (as2, a0, as2, n); 165 #endif 166 as2[n] = cy; 167 168 /* Compute bs1 and bsm1. */ 169 cy = mpn_add (gp, b0, n, b2, t); 170 #if HAVE_NATIVE_mpn_addsub_n 171 if (cy == 0 && mpn_cmp (gp, b1, n) < 0) 172 { 173 cy = mpn_addsub_n (bs1, bsm1, b1, gp, n); 174 bs1[n] = 0; 175 bsm1[n] = 0; 176 vm1_neg ^= 1; 177 } 178 else 179 { 180 cy2 = mpn_addsub_n (bs1, bsm1, gp, b1, n); 181 bs1[n] = cy + (cy2 >> 1); 182 bsm1[n] = cy - (cy & 1); 183 } 184 #else 185 bs1[n] = cy + mpn_add_n (bs1, gp, b1, n); 186 if (cy == 0 && mpn_cmp (gp, b1, n) < 0) 187 { 188 mpn_sub_n (bsm1, b1, gp, n); 189 bsm1[n] = 0; 190 vm1_neg ^= 1; 191 } 192 else 193 { 194 cy -= mpn_sub_n (bsm1, gp, b1, n); 195 bsm1[n] = cy; 196 } 197 #endif 198 199 /* Compute bs2. */ 200 #if HAVE_NATIVE_mpn_addlsh1_n 201 cy = mpn_addlsh1_n (bs2, b1, b2, t); 202 if (t != n) 203 cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy); 204 cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n); 205 #else 206 cy = mpn_lshift (bs2, b2, t, 1); 207 cy += mpn_add_n (bs2, b1, bs2, t); 208 if (t != n) 209 cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy); 210 cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1); 211 cy += mpn_add_n (bs2, b0, bs2, n); 212 #endif 213 bs2[n] = cy; 214 215 ASSERT (as1[n] <= 2); 216 ASSERT (bs1[n] <= 2); 217 ASSERT (asm1[n] <= 1); 218 ASSERT (bsm1[n] <= 1); 219 ASSERT (as2[n] <= 6); 220 ASSERT (bs2[n] <= 6); 221 222 #define v0 pp /* 2n */ 223 #define v1 (pp + 2 * n) /* 2n+1 */ 224 #define vinf (pp + 4 * n) /* s+t */ 225 #define vm1 scratch /* 2n+1 */ 226 #define v2 (scratch + 2 * n + 1) /* 2n+2 */ 227 #define scratch_out (scratch + 4 * n + 4) 228 229 /* vm1, 2n+1 limbs */ 230 #ifdef SMALLER_RECURSION 231 TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out); 232 cy = 0; 233 if (asm1[n] != 0) 234 cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n); 235 if (bsm1[n] != 0) 236 cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n); 237 vm1[2 * n] = cy; 238 #else 239 TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out); 240 #endif 241 242 TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */ 243 244 /* vinf, s+t limbs */ 245 if (s > t) mpn_mul (vinf, a2, s, b2, t); 246 else TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out); 247 248 vinf0 = vinf[0]; /* v1 overlaps with this */ 249 250 #ifdef SMALLER_RECURSION 251 /* v1, 2n+1 limbs */ 252 TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out); 253 if (as1[n] == 1) 254 { 255 cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n); 256 } 257 else if (as1[n] != 0) 258 { 259 #if HAVE_NATIVE_mpn_addlsh1_n 260 cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n); 261 #else 262 cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2)); 263 #endif 264 } 265 else 266 cy = 0; 267 if (bs1[n] == 1) 268 { 269 cy += mpn_add_n (v1 + n, v1 + n, as1, n); 270 } 271 else if (bs1[n] != 0) 272 { 273 #if HAVE_NATIVE_mpn_addlsh1_n 274 cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n); 275 #else 276 cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2)); 277 #endif 278 } 279 v1[2 * n] = cy; 280 #else 281 cy = vinf[1]; 282 TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out); 283 vinf[1] = cy; 284 #endif 285 286 TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */ 287 288 mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, 1^vm1_neg, vinf0, scratch_out); 289 290 TMP_FREE; 291 } 292