1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 30 31 #if GMP_NUMB_BITS < 29 32 #error Not implemented. 33 #endif 34 35 #if GMP_NUMB_BITS < 43 36 #define BIT_CORRECTION 1 37 #define CORRECTION_BITS GMP_NUMB_BITS 38 #else 39 #define BIT_CORRECTION 0 40 #define CORRECTION_BITS 0 41 #endif 42 43 44 #if TUNE_PROGRAM_BUILD 45 #define MAYBE_mul_basecase 1 46 #define MAYBE_mul_toom22 1 47 #define MAYBE_mul_toom33 1 48 #define MAYBE_mul_toom44 1 49 #define MAYBE_mul_toom8h 1 50 #else 51 #define MAYBE_mul_basecase \ 52 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD) 53 #define MAYBE_mul_toom22 \ 54 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD) 55 #define MAYBE_mul_toom33 \ 56 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD) 57 #define MAYBE_mul_toom44 \ 58 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD) 59 #define MAYBE_mul_toom8h \ 60 (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD) 61 #endif 62 63 #define TOOM8H_MUL_N_REC(p, a, b, n, ws) \ 64 do { \ 65 if (MAYBE_mul_basecase \ 66 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \ 67 mpn_mul_basecase (p, a, n, b, n); \ 68 else if (MAYBE_mul_toom22 \ 69 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \ 70 mpn_toom22_mul (p, a, n, b, n, ws); \ 71 else if (MAYBE_mul_toom33 \ 72 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) \ 73 mpn_toom33_mul (p, a, n, b, n, ws); \ 74 else if (MAYBE_mul_toom44 \ 75 && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) \ 76 mpn_toom44_mul (p, a, n, b, n, ws); \ 77 else if (! MAYBE_mul_toom8h \ 78 || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) \ 79 mpn_toom6h_mul (p, a, n, b, n, ws); \ 80 else \ 81 mpn_toom8h_mul (p, a, n, b, n, ws); \ 82 } while (0) 83 84 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws) \ 85 do { mpn_mul (p, a, na, b, nb); \ 86 } while (0) 87 88 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn} 89 With: an >= bn >= 86, an*5 < bn * 11. 90 It _may_ work with bn<=?? and bn*?? < an*? < bn*?? 91 92 Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0. 93 */ 94 /* Estimate on needed scratch: 95 S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8), 96 since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6 97 */ 98 99 void 100 mpn_toom8h_mul (mp_ptr pp, 101 mp_srcptr ap, mp_size_t an, 102 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 103 { 104 mp_size_t n, s, t; 105 int p, q, half; 106 int sign; 107 108 /***************************** decomposition *******************************/ 109 110 ASSERT (an >= bn); 111 /* Can not handle too small operands */ 112 ASSERT (bn >= 86); 113 /* Can not handle too much unbalancement */ 114 ASSERT (an*4 <= bn*13); 115 ASSERT (GMP_NUMB_BITS > 12*3 || an*4 <= bn*12); 116 ASSERT (GMP_NUMB_BITS > 11*3 || an*5 <= bn*11); 117 ASSERT (GMP_NUMB_BITS > 10*3 || an*6 <= bn*10); 118 ASSERT (GMP_NUMB_BITS > 9*3 || an*7 <= bn* 9); 119 120 /* Limit num/den is a rational number between 121 (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1)) */ 122 #define LIMIT_numerator (21) 123 #define LIMIT_denominat (20) 124 125 if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */ 126 { 127 half = 0; 128 n = 1 + ((an - 1)>>3); 129 p = q = 7; 130 s = an - p * n; 131 t = bn - q * n; 132 } 133 else 134 { 135 if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */ 136 { p = 9; q = 8; } 137 else if (GMP_NUMB_BITS <= 9*3 || 138 an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1)) 139 { p = 9; q = 7; } 140 else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */ 141 { p =10; q = 7; } 142 else if (GMP_NUMB_BITS <= 10*3 || 143 an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn) 144 { p =10; q = 6; } 145 else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/ 146 { p =11; q = 6; } 147 else if (GMP_NUMB_BITS <= 11*3 || 148 an * 4 < 9 * bn) 149 { p =11; q = 5; } 150 else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn ) /* is 4*... <12*... */ 151 { p =12; q = 5; } 152 else if (GMP_NUMB_BITS <= 12*3 || 153 an * 9 < 28 * bn ) /* is 4*... <12*... */ 154 { p =12; q = 4; } 155 else 156 { p =13; q = 4; } 157 158 half = (p+q)&1; 159 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q); 160 p--; q--; 161 162 s = an - p * n; 163 t = bn - q * n; 164 165 if(half) { /* Recover from badly chosen splitting */ 166 if (s<1) {p--; s+=n; half=0;} 167 else if (t<1) {q--; t+=n; half=0;} 168 } 169 } 170 #undef LIMIT_numerator 171 #undef LIMIT_denominat 172 173 ASSERT (0 < s && s <= n); 174 ASSERT (0 < t && t <= n); 175 ASSERT (half || s + t > 3); 176 ASSERT (n > 2); 177 178 #define r6 (pp + 3 * n) /* 3n+1 */ 179 #define r4 (pp + 7 * n) /* 3n+1 */ 180 #define r2 (pp +11 * n) /* 3n+1 */ 181 #define r0 (pp +15 * n) /* s+t <= 2*n */ 182 #define r7 (scratch) /* 3n+1 */ 183 #define r5 (scratch + 3 * n + 1) /* 3n+1 */ 184 #define r3 (scratch + 6 * n + 2) /* 3n+1 */ 185 #define r1 (scratch + 9 * n + 3) /* 3n+1 */ 186 #define v0 (pp +11 * n) /* n+1 */ 187 #define v1 (pp +12 * n+1) /* n+1 */ 188 #define v2 (pp +13 * n+2) /* n+1 */ 189 #define v3 (scratch +12 * n + 4) /* n+1 */ 190 #define wsi (scratch +12 * n + 4) /* 3n+1 */ 191 #define wse (scratch +13 * n + 5) /* 2n+1 */ 192 193 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may 194 need all of them */ 195 /* if (scratch == NULL) */ 196 /* scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */ 197 ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn)); 198 ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8)); 199 200 /********************** evaluation and recursive calls *********************/ 201 202 /* $\pm1/8$ */ 203 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^ 204 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp); 205 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */ 206 TOOM8H_MUL_N_REC(r7, v2, v3, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */ 207 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half)); 208 209 /* $\pm1/4$ */ 210 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^ 211 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp); 212 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */ 213 TOOM8H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */ 214 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half)); 215 216 /* $\pm2$ */ 217 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^ 218 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp); 219 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */ 220 TOOM8H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(+2)*B(+2) */ 221 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2); 222 223 /* $\pm8$ */ 224 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^ 225 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp); 226 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-8)*B(-8) */ 227 TOOM8H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+8)*B(+8) */ 228 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6); 229 230 /* $\pm1/2$ */ 231 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^ 232 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp); 233 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */ 234 TOOM8H_MUL_N_REC(r6, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */ 235 mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half); 236 237 /* $\pm1$ */ 238 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp); 239 if (q == 3) 240 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp); 241 else 242 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp); 243 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */ 244 TOOM8H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(1)*B(1) */ 245 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0); 246 247 /* $\pm4$ */ 248 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^ 249 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp); 250 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */ 251 TOOM8H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+4)*B(+4) */ 252 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4); 253 254 #undef v0 255 #undef v1 256 #undef v2 257 #undef v3 258 #undef wse 259 260 /* A(0)*B(0) */ 261 TOOM8H_MUL_N_REC(pp, ap, bp, n, wsi); 262 263 /* Infinity */ 264 if( half != 0) { 265 if(s>t) { 266 TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi); 267 } else { 268 TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi); 269 }; 270 }; 271 272 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi); 273 274 #undef r0 275 #undef r1 276 #undef r2 277 #undef r3 278 #undef r4 279 #undef r5 280 #undef r6 281 #undef wsi 282 } 283 284 #undef TOOM8H_MUL_N_REC 285 #undef TOOM8H_MUL_REC 286 #undef MAYBE_mul_basecase 287 #undef MAYBE_mul_toom22 288 #undef MAYBE_mul_toom33 289 #undef MAYBE_mul_toom44 290 #undef MAYBE_mul_toom8h 291