1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37
38 #include "gmp.h"
39 #include "gmp-impl.h"
40
41
42 #if GMP_NUMB_BITS < 29
43 #error Not implemented.
44 #endif
45
46 #if GMP_NUMB_BITS < 43
47 #define BIT_CORRECTION 1
48 #define CORRECTION_BITS GMP_NUMB_BITS
49 #else
50 #define BIT_CORRECTION 0
51 #define CORRECTION_BITS 0
52 #endif
53
54
55 #if TUNE_PROGRAM_BUILD
56 #define MAYBE_mul_basecase 1
57 #define MAYBE_mul_toom22 1
58 #define MAYBE_mul_toom33 1
59 #define MAYBE_mul_toom44 1
60 #define MAYBE_mul_toom8h 1
61 #else
62 #define MAYBE_mul_basecase \
63 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
64 #define MAYBE_mul_toom22 \
65 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
66 #define MAYBE_mul_toom33 \
67 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
68 #define MAYBE_mul_toom44 \
69 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
70 #define MAYBE_mul_toom8h \
71 (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
72 #endif
73
74 #define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \
75 do { \
76 if (MAYBE_mul_basecase \
77 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \
78 mpn_mul_basecase (p, a, n, b, n); \
79 if (f) mpn_mul_basecase (p2, a2, n, b2, n); \
80 } else if (MAYBE_mul_toom22 \
81 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \
82 mpn_toom22_mul (p, a, n, b, n, ws); \
83 if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws); \
84 } else if (MAYBE_mul_toom33 \
85 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \
86 mpn_toom33_mul (p, a, n, b, n, ws); \
87 if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws); \
88 } else if (MAYBE_mul_toom44 \
89 && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \
90 mpn_toom44_mul (p, a, n, b, n, ws); \
91 if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws); \
92 } else if (! MAYBE_mul_toom8h \
93 || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) { \
94 mpn_toom6h_mul (p, a, n, b, n, ws); \
95 if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws); \
96 } else { \
97 mpn_toom8h_mul (p, a, n, b, n, ws); \
98 if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws); \
99 } \
100 } while (0)
101
102 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws) \
103 do { mpn_mul (p, a, na, b, nb); } while (0)
104
105 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
106 With: an >= bn >= 86, an*5 < bn * 11.
107 It _may_ work with bn<=?? and bn*?? < an*? < bn*??
108
109 Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
110 */
111 /* Estimate on needed scratch:
112 S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
113 since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
114 */
115
116 void
mpn_toom8h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)117 mpn_toom8h_mul (mp_ptr pp,
118 mp_srcptr ap, mp_size_t an,
119 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
120 {
121 mp_size_t n, s, t;
122 int p, q, half;
123 int sign;
124
125 /***************************** decomposition *******************************/
126
127 ASSERT (an >= bn);
128 /* Can not handle too small operands */
129 ASSERT (bn >= 86);
130 /* Can not handle too much unbalancement */
131 ASSERT (an <= bn*4);
132 ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
133 ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
134 ASSERT (GMP_NUMB_BITS > 9*3 || an*2 <= bn* 3);
135
136 /* Limit num/den is a rational number between
137 (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1)) */
138 #define LIMIT_numerator (21)
139 #define LIMIT_denominat (20)
140
141 if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
142 {
143 half = 0;
144 n = 1 + ((an - 1)>>3);
145 p = q = 7;
146 s = an - 7 * n;
147 t = bn - 7 * n;
148 }
149 else
150 {
151 if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
152 { p = 9; q = 8; }
153 else if (GMP_NUMB_BITS <= 9*3 ||
154 an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
155 { p = 9; q = 7; }
156 else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
157 { p =10; q = 7; }
158 else if (GMP_NUMB_BITS <= 10*3 ||
159 an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
160 { p =10; q = 6; }
161 else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
162 { p =11; q = 6; }
163 else if (GMP_NUMB_BITS <= 11*3 ||
164 an * 4 < 9 * bn)
165 { p =11; q = 5; }
166 else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn) /* is 4*... <12*... */
167 { p =12; q = 5; }
168 else if (GMP_NUMB_BITS <= 12*3 ||
169 an * 9 < 28 * bn ) /* is 4*... <12*... */
170 { p =12; q = 4; }
171 else
172 { p =13; q = 4; }
173
174 half = (p+q)&1;
175 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
176 p--; q--;
177
178 s = an - p * n;
179 t = bn - q * n;
180
181 if(half) { /* Recover from badly chosen splitting */
182 if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
183 else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
184 }
185 }
186 #undef LIMIT_numerator
187 #undef LIMIT_denominat
188
189 ASSERT (0 < s && s <= n);
190 ASSERT (0 < t && t <= n);
191 ASSERT (half || s + t > 3);
192 ASSERT (n > 2);
193
194 #define r6 (pp + 3 * n) /* 3n+1 */
195 #define r4 (pp + 7 * n) /* 3n+1 */
196 #define r2 (pp +11 * n) /* 3n+1 */
197 #define r0 (pp +15 * n) /* s+t <= 2*n */
198 #define r7 (scratch) /* 3n+1 */
199 #define r5 (scratch + 3 * n + 1) /* 3n+1 */
200 #define r3 (scratch + 6 * n + 2) /* 3n+1 */
201 #define r1 (scratch + 9 * n + 3) /* 3n+1 */
202 #define v0 (pp +11 * n) /* n+1 */
203 #define v1 (pp +12 * n+1) /* n+1 */
204 #define v2 (pp +13 * n+2) /* n+1 */
205 #define v3 (scratch +12 * n + 4) /* n+1 */
206 #define wsi (scratch +12 * n + 4) /* 3n+1 */
207 #define wse (scratch +13 * n + 5) /* 2n+1 */
208
209 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
210 need all of them */
211 /* if (scratch == NULL) */
212 /* scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
213 ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
214 ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
215
216 /********************** evaluation and recursive calls *********************/
217
218 /* $\pm1/8$ */
219 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
220 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
221 /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
222 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
223 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
224
225 /* $\pm1/4$ */
226 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
227 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
228 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
229 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
230 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
231
232 /* $\pm2$ */
233 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
234 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
235 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
236 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
237 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
238
239 /* $\pm8$ */
240 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
241 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
242 /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
243 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
244 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
245
246 /* $\pm1/2$ */
247 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
248 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
249 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
250 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
251 mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
252
253 /* $\pm1$ */
254 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);
255 if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
256 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
257 else
258 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp);
259 /* A(-1)*B(-1) */ /* A(1)*B(1) */
260 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
261 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
262
263 /* $\pm4$ */
264 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
265 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
266 /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
267 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
268 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
269
270 #undef v0
271 #undef v1
272 #undef v2
273 #undef v3
274 #undef wse
275
276 /* A(0)*B(0) */
277 TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
278
279 /* Infinity */
280 if (UNLIKELY (half != 0)) {
281 if (s > t) {
282 TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
283 } else {
284 TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
285 };
286 };
287
288 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
289
290 #undef r0
291 #undef r1
292 #undef r2
293 #undef r3
294 #undef r4
295 #undef r5
296 #undef r6
297 #undef wsi
298 }
299
300 #undef TOOM8H_MUL_N_REC
301 #undef TOOM8H_MUL_REC
302 #undef MAYBE_mul_basecase
303 #undef MAYBE_mul_toom22
304 #undef MAYBE_mul_toom33
305 #undef MAYBE_mul_toom44
306 #undef MAYBE_mul_toom8h
307