1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37
38 #include "gmp-impl.h"
39
40
41 #if GMP_NUMB_BITS < 29
42 #error Not implemented.
43 #endif
44
45 #if GMP_NUMB_BITS < 43
46 #define BIT_CORRECTION 1
47 #define CORRECTION_BITS GMP_NUMB_BITS
48 #else
49 #define BIT_CORRECTION 0
50 #define CORRECTION_BITS 0
51 #endif
52
53
54 #if TUNE_PROGRAM_BUILD
55 #define MAYBE_mul_basecase 1
56 #define MAYBE_mul_toom22 1
57 #define MAYBE_mul_toom33 1
58 #define MAYBE_mul_toom44 1
59 #define MAYBE_mul_toom8h 1
60 #else
61 #define MAYBE_mul_basecase \
62 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
63 #define MAYBE_mul_toom22 \
64 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
65 #define MAYBE_mul_toom33 \
66 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
67 #define MAYBE_mul_toom44 \
68 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
69 #define MAYBE_mul_toom8h \
70 (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
71 #endif
72
73 #define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \
74 do { \
75 if (MAYBE_mul_basecase \
76 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \
77 mpn_mul_basecase (p, a, n, b, n); \
78 if (f) mpn_mul_basecase (p2, a2, n, b2, n); \
79 } else if (MAYBE_mul_toom22 \
80 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \
81 mpn_toom22_mul (p, a, n, b, n, ws); \
82 if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws); \
83 } else if (MAYBE_mul_toom33 \
84 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \
85 mpn_toom33_mul (p, a, n, b, n, ws); \
86 if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws); \
87 } else if (MAYBE_mul_toom44 \
88 && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \
89 mpn_toom44_mul (p, a, n, b, n, ws); \
90 if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws); \
91 } else if (! MAYBE_mul_toom8h \
92 || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) { \
93 mpn_toom6h_mul (p, a, n, b, n, ws); \
94 if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws); \
95 } else { \
96 mpn_toom8h_mul (p, a, n, b, n, ws); \
97 if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws); \
98 } \
99 } while (0)
100
101 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws) \
102 do { mpn_mul (p, a, na, b, nb); } while (0)
103
104 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
105 With: an >= bn >= 86, an*5 < bn * 11.
106 It _may_ work with bn<=?? and bn*?? < an*? < bn*??
107
108 Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
109 */
110 /* Estimate on needed scratch:
111 S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
112 since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
113 */
114
115 void
mpn_toom8h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)116 mpn_toom8h_mul (mp_ptr pp,
117 mp_srcptr ap, mp_size_t an,
118 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
119 {
120 mp_size_t n, s, t;
121 int p, q, half;
122 int sign;
123
124 /***************************** decomposition *******************************/
125
126 ASSERT (an >= bn);
127 /* Can not handle too small operands */
128 ASSERT (bn >= 86);
129 /* Can not handle too much unbalancement */
130 ASSERT (an <= bn*4);
131 ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
132 ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
133 ASSERT (GMP_NUMB_BITS > 9*3 || an*2 <= bn* 3);
134
135 /* Limit num/den is a rational number between
136 (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1)) */
137 #define LIMIT_numerator (21)
138 #define LIMIT_denominat (20)
139
140 if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
141 {
142 half = 0;
143 n = 1 + ((an - 1)>>3);
144 p = q = 7;
145 s = an - 7 * n;
146 t = bn - 7 * n;
147 }
148 else
149 {
150 if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
151 { p = 9; q = 8; }
152 else if (GMP_NUMB_BITS <= 9*3 ||
153 an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
154 { p = 9; q = 7; }
155 else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
156 { p =10; q = 7; }
157 else if (GMP_NUMB_BITS <= 10*3 ||
158 an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
159 { p =10; q = 6; }
160 else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
161 { p =11; q = 6; }
162 else if (GMP_NUMB_BITS <= 11*3 ||
163 an * 4 < 9 * bn)
164 { p =11; q = 5; }
165 else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn) /* is 4*... <12*... */
166 { p =12; q = 5; }
167 else if (GMP_NUMB_BITS <= 12*3 ||
168 an * 9 < 28 * bn ) /* is 4*... <12*... */
169 { p =12; q = 4; }
170 else
171 { p =13; q = 4; }
172
173 half = (p+q)&1;
174 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
175 p--; q--;
176
177 s = an - p * n;
178 t = bn - q * n;
179
180 if(half) { /* Recover from badly chosen splitting */
181 if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
182 else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
183 }
184 }
185 #undef LIMIT_numerator
186 #undef LIMIT_denominat
187
188 ASSERT (0 < s && s <= n);
189 ASSERT (0 < t && t <= n);
190 ASSERT (half || s + t > 3);
191 ASSERT (n > 2);
192
193 #define r6 (pp + 3 * n) /* 3n+1 */
194 #define r4 (pp + 7 * n) /* 3n+1 */
195 #define r2 (pp +11 * n) /* 3n+1 */
196 #define r0 (pp +15 * n) /* s+t <= 2*n */
197 #define r7 (scratch) /* 3n+1 */
198 #define r5 (scratch + 3 * n + 1) /* 3n+1 */
199 #define r3 (scratch + 6 * n + 2) /* 3n+1 */
200 #define r1 (scratch + 9 * n + 3) /* 3n+1 */
201 #define v0 (pp +11 * n) /* n+1 */
202 #define v1 (pp +12 * n+1) /* n+1 */
203 #define v2 (pp +13 * n+2) /* n+1 */
204 #define v3 (scratch +12 * n + 4) /* n+1 */
205 #define wsi (scratch +12 * n + 4) /* 3n+1 */
206 #define wse (scratch +13 * n + 5) /* 2n+1 */
207
208 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
209 need all of them */
210 /* if (scratch == NULL) */
211 /* scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
212 ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
213 ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
214
215 /********************** evaluation and recursive calls *********************/
216
217 /* $\pm1/8$ */
218 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
219 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
220 /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
221 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
222 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
223
224 /* $\pm1/4$ */
225 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
226 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
227 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
228 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
229 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
230
231 /* $\pm2$ */
232 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
233 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
234 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
235 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
236 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
237
238 /* $\pm8$ */
239 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
240 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
241 /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
242 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
243 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
244
245 /* $\pm1/2$ */
246 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
247 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
248 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
249 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
250 mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
251
252 /* $\pm1$ */
253 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);
254 if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
255 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
256 else
257 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp);
258 /* A(-1)*B(-1) */ /* A(1)*B(1) */
259 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
260 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
261
262 /* $\pm4$ */
263 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
264 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
265 /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
266 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
267 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
268
269 #undef v0
270 #undef v1
271 #undef v2
272 #undef v3
273 #undef wse
274
275 /* A(0)*B(0) */
276 TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
277
278 /* Infinity */
279 if (UNLIKELY (half != 0)) {
280 if (s > t) {
281 TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
282 } else {
283 TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
284 };
285 };
286
287 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
288
289 #undef r0
290 #undef r1
291 #undef r2
292 #undef r3
293 #undef r4
294 #undef r5
295 #undef r6
296 #undef wsi
297 }
298
299 #undef TOOM8H_MUL_N_REC
300 #undef TOOM8H_MUL_REC
301 #undef MAYBE_mul_basecase
302 #undef MAYBE_mul_toom22
303 #undef MAYBE_mul_toom33
304 #undef MAYBE_mul_toom44
305 #undef MAYBE_mul_toom8h
306