1 /* Implementation of the squaring algorithm with Toom-Cook 6.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 30 31 #if GMP_NUMB_BITS < 21 32 #error Not implemented. 33 #endif 34 35 36 #if TUNE_PROGRAM_BUILD 37 #define MAYBE_sqr_basecase 1 38 #define MAYBE_sqr_above_basecase 1 39 #define MAYBE_sqr_toom2 1 40 #define MAYBE_sqr_above_toom2 1 41 #define MAYBE_sqr_toom3 1 42 #define MAYBE_sqr_above_toom3 1 43 #define MAYBE_sqr_above_toom4 1 44 #else 45 #ifdef SQR_TOOM8_THRESHOLD 46 #define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6) 47 #else 48 #define SQR_TOOM6_MAX \ 49 ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ? \ 50 ((SQR_FFT_THRESHOLD+6*2-1+5)/6) \ 51 : MP_SIZE_T_MAX ) 52 #endif 53 #define MAYBE_sqr_basecase \ 54 (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD) 55 #define MAYBE_sqr_above_basecase \ 56 (SQR_TOOM6_MAX >= SQR_TOOM2_THRESHOLD) 57 #define MAYBE_sqr_toom2 \ 58 (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD) 59 #define MAYBE_sqr_above_toom2 \ 60 (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD) 61 #define MAYBE_sqr_toom3 \ 62 (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD) 63 #define MAYBE_sqr_above_toom3 \ 64 (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD) 65 #define MAYBE_sqr_above_toom4 \ 66 (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD) 67 #endif 68 69 #define TOOM6_SQR_REC(p, a, n, ws) \ 70 do { \ 71 if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \ 72 || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) \ 73 mpn_sqr_basecase (p, a, n); \ 74 else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \ 75 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) \ 76 mpn_toom2_sqr (p, a, n, ws); \ 77 else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \ 78 || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) \ 79 mpn_toom3_sqr (p, a, n, ws); \ 80 else if (! MAYBE_sqr_above_toom4 \ 81 || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)) \ 82 mpn_toom4_sqr (p, a, n, ws); \ 83 else \ 84 mpn_toom6_sqr (p, a, n, ws); \ 85 } while (0) 86 87 void 88 mpn_toom6_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch) 89 { 90 mp_size_t n, s; 91 92 /***************************** decomposition *******************************/ 93 94 ASSERT( an >= 18 ); 95 96 n = 1 + (an - 1) / (size_t) 6; 97 98 s = an - 5 * n; 99 100 ASSERT (0 < s && s <= n); 101 102 #define r4 (pp + 3 * n) /* 3n+1 */ 103 #define r2 (pp + 7 * n) /* 3n+1 */ 104 #define r0 (pp +11 * n) /* s+t <= 2*n */ 105 #define r5 (scratch) /* 3n+1 */ 106 #define r3 (scratch + 3 * n + 1) /* 3n+1 */ 107 #define r1 (scratch + 6 * n + 2) /* 3n+1 */ 108 #define v0 (pp + 7 * n) /* n+1 */ 109 #define v2 (pp + 9 * n+2) /* n+1 */ 110 #define wse (scratch + 9 * n + 3) /* 3n+1 */ 111 112 /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may 113 need all of them, when DO_mpn_sublsh_n usea a scratch */ 114 /* if (scratch== NULL) */ 115 /* scratch = TMP_SALLOC_LIMBS (12 * n + 6); */ 116 117 /********************** evaluation and recursive calls *********************/ 118 /* $\pm1/2$ */ 119 mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp); 120 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */ 121 TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */ 122 mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0); 123 124 /* $\pm1$ */ 125 mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp); 126 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */ 127 TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */ 128 mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0); 129 130 /* $\pm4$ */ 131 mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp); 132 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */ 133 TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */ 134 mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4); 135 136 /* $\pm1/4$ */ 137 mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp); 138 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */ 139 TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */ 140 mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0); 141 142 /* $\pm2$ */ 143 mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp); 144 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */ 145 TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */ 146 mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2); 147 148 #undef v0 149 #undef v2 150 151 /* A(0)*B(0) */ 152 TOOM6_SQR_REC(pp, ap, n, wse); 153 154 mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse); 155 156 #undef r0 157 #undef r1 158 #undef r2 159 #undef r3 160 #undef r4 161 #undef r5 162 163 } 164 #undef TOOM6_SQR_REC 165 #undef MAYBE_sqr_basecase 166 #undef MAYBE_sqr_above_basecase 167 #undef MAYBE_sqr_toom2 168 #undef MAYBE_sqr_above_toom2 169 #undef MAYBE_sqr_toom3 170 #undef MAYBE_sqr_above_toom3 171 #undef MAYBE_sqr_above_toom4 172