1 /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 30 31 #if GMP_NUMB_BITS < 21 32 #error Not implemented. 33 #endif 34 35 #if TUNE_PROGRAM_BUILD 36 #define MAYBE_mul_basecase 1 37 #define MAYBE_mul_toom22 1 38 #define MAYBE_mul_toom33 1 39 #define MAYBE_mul_toom6h 1 40 #else 41 #define MAYBE_mul_basecase \ 42 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD) 43 #define MAYBE_mul_toom22 \ 44 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD) 45 #define MAYBE_mul_toom33 \ 46 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD) 47 #define MAYBE_mul_toom6h \ 48 (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD) 49 #endif 50 51 #define TOOM6H_MUL_N_REC(p, a, b, n, ws) \ 52 do { \ 53 if (MAYBE_mul_basecase \ 54 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \ 55 mpn_mul_basecase (p, a, n, b, n); \ 56 else if (MAYBE_mul_toom22 \ 57 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \ 58 mpn_toom22_mul (p, a, n, b, n, ws); \ 59 else if (MAYBE_mul_toom33 \ 60 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) \ 61 mpn_toom33_mul (p, a, n, b, n, ws); \ 62 else if (! MAYBE_mul_toom6h \ 63 || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) \ 64 mpn_toom44_mul (p, a, n, b, n, ws); \ 65 else \ 66 mpn_toom6h_mul (p, a, n, b, n, ws); \ 67 } while (0) 68 69 #define TOOM6H_MUL_REC(p, a, na, b, nb, ws) \ 70 do { mpn_mul (p, a, na, b, nb); \ 71 } while (0) 72 73 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn} 74 With: an >= bn >= 46, an*6 < bn * 17. 75 It _may_ work with bn<=46 and bn*17 < an*6 < bn*18 76 77 Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0. 78 */ 79 /* Estimate on needed scratch: 80 S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6), 81 since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6 82 */ 83 84 void 85 mpn_toom6h_mul (mp_ptr pp, 86 mp_srcptr ap, mp_size_t an, 87 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 88 { 89 mp_size_t n, s, t; 90 int p, q, half; 91 int sign; 92 93 /***************************** decomposition *******************************/ 94 95 ASSERT( an >= bn); 96 /* Can not handle too much unbalancement */ 97 ASSERT( bn >= 42 ); 98 /* Can not handle too much unbalancement */ 99 ASSERT((an*3 < bn * 8) || ( bn >= 46 && an*6 < bn * 17 )); 100 101 /* Limit num/den is a rational number between 102 (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1)) */ 103 #define LIMIT_numerator (18) 104 #define LIMIT_denominat (17) 105 106 if( an * LIMIT_denominat < LIMIT_numerator * bn ) /* is 6*... < 6*... */ 107 { p = q = 6; } 108 else if( an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn ) 109 { p = 7; q = 6; } 110 else if( an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn ) 111 { p = 7; q = 5; } 112 else if( an * LIMIT_numerator < LIMIT_denominat * 2 * bn ) /* is 4*... < 8*... */ 113 { p = 8; q = 5; } 114 else if( an * LIMIT_denominat < LIMIT_numerator * 2 * bn ) /* is 4*... < 8*... */ 115 { p = 8; q = 4; } 116 else 117 { p = 9; q = 4; } 118 119 half = (p ^ q) & 1; 120 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q); 121 p--; q--; 122 123 s = an - p * n; 124 t = bn - q * n; 125 126 /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/ 127 if (half) { /* Recover from badly chosen splitting */ 128 if (s<1) {p--; s+=n; half=0;} 129 else if (t<1) {q--; t+=n; half=0;} 130 } 131 #undef LIMIT_numerator 132 #undef LIMIT_denominat 133 134 ASSERT (0 < s && s <= n); 135 ASSERT (0 < t && t <= n); 136 ASSERT (half || s + t > 3); 137 ASSERT (n > 2); 138 139 #define r4 (pp + 3 * n) /* 3n+1 */ 140 #define r2 (pp + 7 * n) /* 3n+1 */ 141 #define r0 (pp +11 * n) /* s+t <= 2*n */ 142 #define r5 (scratch) /* 3n+1 */ 143 #define r3 (scratch + 3 * n + 1) /* 3n+1 */ 144 #define r1 (scratch + 6 * n + 2) /* 3n+1 */ 145 #define v0 (pp + 7 * n) /* n+1 */ 146 #define v1 (pp + 8 * n+1) /* n+1 */ 147 #define v2 (pp + 9 * n+2) /* n+1 */ 148 #define v3 (scratch + 9 * n + 3) /* n+1 */ 149 #define wsi (scratch + 9 * n + 3) /* 3n+1 */ 150 #define wse (scratch +10 * n + 4) /* 2n+1 */ 151 152 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may 153 need all of them */ 154 /* if (scratch == NULL) */ 155 /* scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */ 156 ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn)); 157 ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6)); 158 159 /********************** evaluation and recursive calls *********************/ 160 /* $\pm1/2$ */ 161 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^ 162 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp); 163 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */ 164 TOOM6H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */ 165 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half); 166 167 /* $\pm1$ */ 168 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp); 169 if (q == 3) 170 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp); 171 else 172 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp); 173 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */ 174 TOOM6H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(1)*B(1) */ 175 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0); 176 177 /* $\pm4$ */ 178 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^ 179 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp); 180 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */ 181 TOOM6H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */ 182 mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4); 183 184 /* $\pm1/4$ */ 185 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^ 186 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp); 187 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */ 188 TOOM6H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */ 189 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half)); 190 191 /* $\pm2$ */ 192 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^ 193 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp); 194 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */ 195 TOOM6H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+2)*B(+2) */ 196 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2); 197 198 #undef v0 199 #undef v1 200 #undef v2 201 #undef v3 202 #undef wse 203 204 /* A(0)*B(0) */ 205 TOOM6H_MUL_N_REC(pp, ap, bp, n, wsi); 206 207 /* Infinity */ 208 if( half != 0) { 209 if(s>t) { 210 TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi); 211 } else { 212 TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi); 213 }; 214 }; 215 216 mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi); 217 218 #undef r0 219 #undef r1 220 #undef r2 221 #undef r3 222 #undef r4 223 #undef r5 224 #undef wsi 225 } 226 227 #undef TOOM6H_MUL_N_REC 228 #undef TOOM6H_MUL_REC 229 #undef MAYBE_mul_basecase 230 #undef MAYBE_mul_toom22 231 #undef MAYBE_mul_toom33 232 #undef MAYBE_mul_toom6h 233