1 /* Implementation of the squaring algorithm with Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 30 #if GMP_NUMB_BITS < 29 31 #error Not implemented. 32 #endif 33 34 #if GMP_NUMB_BITS < 43 35 #define BIT_CORRECTION 1 36 #define CORRECTION_BITS GMP_NUMB_BITS 37 #else 38 #define BIT_CORRECTION 0 39 #define CORRECTION_BITS 0 40 #endif 41 42 #ifndef SQR_TOOM8_THRESHOLD 43 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD 44 #endif 45 46 #ifndef SQR_TOOM6_THRESHOLD 47 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD 48 #endif 49 50 #if TUNE_PROGRAM_BUILD 51 #define MAYBE_sqr_basecase 1 52 #define MAYBE_sqr_above_basecase 1 53 #define MAYBE_sqr_toom2 1 54 #define MAYBE_sqr_above_toom2 1 55 #define MAYBE_sqr_toom3 1 56 #define MAYBE_sqr_above_toom3 1 57 #define MAYBE_sqr_toom4 1 58 #define MAYBE_sqr_above_toom4 1 59 #define MAYBE_sqr_above_toom6 1 60 #else 61 #define SQR_TOOM8_MAX \ 62 ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ? \ 63 ((SQR_FFT_THRESHOLD+8*2-1+7)/8) \ 64 : MP_SIZE_T_MAX ) 65 #define MAYBE_sqr_basecase \ 66 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD) 67 #define MAYBE_sqr_above_basecase \ 68 (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD) 69 #define MAYBE_sqr_toom2 \ 70 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD) 71 #define MAYBE_sqr_above_toom2 \ 72 (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD) 73 #define MAYBE_sqr_toom3 \ 74 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD) 75 #define MAYBE_sqr_above_toom3 \ 76 (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD) 77 #define MAYBE_sqr_toom4 \ 78 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD) 79 #define MAYBE_sqr_above_toom4 \ 80 (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD) 81 #define MAYBE_sqr_above_toom6 \ 82 (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD) 83 #endif 84 85 #define TOOM8_SQR_REC(p, a, n, ws) \ 86 do { \ 87 if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \ 88 || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) \ 89 mpn_sqr_basecase (p, a, n); \ 90 else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \ 91 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) \ 92 mpn_toom2_sqr (p, a, n, ws); \ 93 else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \ 94 || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) \ 95 mpn_toom3_sqr (p, a, n, ws); \ 96 else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4 \ 97 || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) \ 98 mpn_toom4_sqr (p, a, n, ws); \ 99 else if (! MAYBE_sqr_above_toom6 \ 100 || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) \ 101 mpn_toom6_sqr (p, a, n, ws); \ 102 else \ 103 mpn_toom8_sqr (p, a, n, ws); \ 104 } while (0) 105 106 void 107 mpn_toom8_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch) 108 { 109 mp_size_t n, s; 110 111 /***************************** decomposition *******************************/ 112 113 ASSERT ( an >= 40 ); 114 115 n = 1 + ((an - 1)>>3); 116 117 s = an - 7 * n; 118 119 ASSERT (0 < s && s <= n); 120 ASSERT ( s + s > 3 ); 121 122 #define r6 (pp + 3 * n) /* 3n+1 */ 123 #define r4 (pp + 7 * n) /* 3n+1 */ 124 #define r2 (pp +11 * n) /* 3n+1 */ 125 #define r0 (pp +15 * n) /* s+t <= 2*n */ 126 #define r7 (scratch) /* 3n+1 */ 127 #define r5 (scratch + 3 * n + 1) /* 3n+1 */ 128 #define r3 (scratch + 6 * n + 2) /* 3n+1 */ 129 #define r1 (scratch + 9 * n + 3) /* 3n+1 */ 130 #define v0 (pp +11 * n) /* n+1 */ 131 #define v2 (pp +13 * n+2) /* n+1 */ 132 #define wse (scratch +12 * n + 4) /* 3n+1 */ 133 134 /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may 135 need all of them, when DO_mpn_sublsh_n usea a scratch */ 136 /* if (scratch == NULL) */ 137 /* scratch = TMP_SALLOC_LIMBS (30 * n + 6); */ 138 139 /********************** evaluation and recursive calls *********************/ 140 /* $\pm1/8$ */ 141 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp); 142 TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */ 143 TOOM8_SQR_REC(r7, v2, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */ 144 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0); 145 146 /* $\pm1/4$ */ 147 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp); 148 TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */ 149 TOOM8_SQR_REC(r5, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */ 150 mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0); 151 152 /* $\pm2$ */ 153 mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp); 154 TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */ 155 TOOM8_SQR_REC(r3, v2, n + 1, wse); /* A(+2)*B(+2) */ 156 mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2); 157 158 /* $\pm8$ */ 159 mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp); 160 TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-8)*B(-8) */ 161 TOOM8_SQR_REC(r1, v2, n + 1, wse); /* A(+8)*B(+8) */ 162 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6); 163 164 /* $\pm1/2$ */ 165 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp); 166 TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */ 167 TOOM8_SQR_REC(r6, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */ 168 mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0); 169 170 /* $\pm1$ */ 171 mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s, pp); 172 TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */ 173 TOOM8_SQR_REC(r4, v2, n + 1, wse); /* A(1)*B(1) */ 174 mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0); 175 176 /* $\pm4$ */ 177 mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp); 178 TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */ 179 TOOM8_SQR_REC(r2, v2, n + 1, wse); /* A(+4)*B(+4) */ 180 mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4); 181 182 #undef v0 183 #undef v2 184 185 /* A(0)*B(0) */ 186 TOOM8_SQR_REC(pp, ap, n, wse); 187 188 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse); 189 190 #undef r0 191 #undef r1 192 #undef r2 193 #undef r3 194 #undef r4 195 #undef r5 196 #undef r6 197 #undef wse 198 199 } 200 201 #undef TOOM8_SQR_REC 202 #undef MAYBE_sqr_basecase 203 #undef MAYBE_sqr_above_basecase 204 #undef MAYBE_sqr_toom2 205 #undef MAYBE_sqr_above_toom2 206 #undef MAYBE_sqr_toom3 207 #undef MAYBE_sqr_above_toom3 208 #undef MAYBE_sqr_above_toom4 209