1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2010 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25
26
27 #include "gmp.h"
28 #include "gmp-impl.h"
29
30
31 #if GMP_NUMB_BITS < 29
32 #error Not implemented.
33 #endif
34
35 #if GMP_NUMB_BITS < 43
36 #define BIT_CORRECTION 1
37 #define CORRECTION_BITS GMP_NUMB_BITS
38 #else
39 #define BIT_CORRECTION 0
40 #define CORRECTION_BITS 0
41 #endif
42
43
44 #if TUNE_PROGRAM_BUILD
45 #define MAYBE_mul_basecase 1
46 #define MAYBE_mul_toom22 1
47 #define MAYBE_mul_toom33 1
48 #define MAYBE_mul_toom44 1
49 #define MAYBE_mul_toom8h 1
50 #else
51 #define MAYBE_mul_basecase \
52 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
53 #define MAYBE_mul_toom22 \
54 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
55 #define MAYBE_mul_toom33 \
56 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
57 #define MAYBE_mul_toom44 \
58 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
59 #define MAYBE_mul_toom8h \
60 (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
61 #endif
62
63 #define TOOM8H_MUL_N_REC(p, a, b, n, ws) \
64 do { \
65 if (MAYBE_mul_basecase \
66 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
67 mpn_mul_basecase (p, a, n, b, n); \
68 else if (MAYBE_mul_toom22 \
69 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
70 mpn_toom22_mul (p, a, n, b, n, ws); \
71 else if (MAYBE_mul_toom33 \
72 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) \
73 mpn_toom33_mul (p, a, n, b, n, ws); \
74 else if (MAYBE_mul_toom44 \
75 && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) \
76 mpn_toom44_mul (p, a, n, b, n, ws); \
77 else if (! MAYBE_mul_toom8h \
78 || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) \
79 mpn_toom6h_mul (p, a, n, b, n, ws); \
80 else \
81 mpn_toom8h_mul (p, a, n, b, n, ws); \
82 } while (0)
83
84 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws) \
85 do { mpn_mul (p, a, na, b, nb); \
86 } while (0)
87
88 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
89 With: an >= bn >= 86, an*5 < bn * 11.
90 It _may_ work with bn<=?? and bn*?? < an*? < bn*??
91
92 Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
93 */
94 /* Estimate on needed scratch:
95 S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
96 since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
97 */
98
99 void
mpn_toom8h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)100 mpn_toom8h_mul (mp_ptr pp,
101 mp_srcptr ap, mp_size_t an,
102 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
103 {
104 mp_size_t n, s, t;
105 int p, q, half;
106 int sign;
107
108 /***************************** decomposition *******************************/
109
110 ASSERT (an >= bn);
111 /* Can not handle too small operands */
112 ASSERT (bn >= 86);
113 /* Can not handle too much unbalancement */
114 ASSERT (an*4 <= bn*13);
115 ASSERT (GMP_NUMB_BITS > 12*3 || an*4 <= bn*12);
116 ASSERT (GMP_NUMB_BITS > 11*3 || an*5 <= bn*11);
117 ASSERT (GMP_NUMB_BITS > 10*3 || an*6 <= bn*10);
118 ASSERT (GMP_NUMB_BITS > 9*3 || an*7 <= bn* 9);
119
120 /* Limit num/den is a rational number between
121 (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1)) */
122 #define LIMIT_numerator (21)
123 #define LIMIT_denominat (20)
124
125 if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
126 {
127 half = 0;
128 n = 1 + ((an - 1)>>3);
129 p = q = 7;
130 s = an - p * n;
131 t = bn - q * n;
132 }
133 else
134 {
135 if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
136 { p = 9; q = 8; }
137 else if (GMP_NUMB_BITS <= 9*3 ||
138 an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
139 { p = 9; q = 7; }
140 else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
141 { p =10; q = 7; }
142 else if (GMP_NUMB_BITS <= 10*3 ||
143 an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
144 { p =10; q = 6; }
145 else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
146 { p =11; q = 6; }
147 else if (GMP_NUMB_BITS <= 11*3 ||
148 an * 4 < 9 * bn)
149 { p =11; q = 5; }
150 else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn ) /* is 4*... <12*... */
151 { p =12; q = 5; }
152 else if (GMP_NUMB_BITS <= 12*3 ||
153 an * 9 < 28 * bn ) /* is 4*... <12*... */
154 { p =12; q = 4; }
155 else
156 { p =13; q = 4; }
157
158 half = (p+q)&1;
159 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
160 p--; q--;
161
162 s = an - p * n;
163 t = bn - q * n;
164
165 if(half) { /* Recover from badly chosen splitting */
166 if (s<1) {p--; s+=n; half=0;}
167 else if (t<1) {q--; t+=n; half=0;}
168 }
169 }
170 #undef LIMIT_numerator
171 #undef LIMIT_denominat
172
173 ASSERT (0 < s && s <= n);
174 ASSERT (0 < t && t <= n);
175 ASSERT (half || s + t > 3);
176 ASSERT (n > 2);
177
178 #define r6 (pp + 3 * n) /* 3n+1 */
179 #define r4 (pp + 7 * n) /* 3n+1 */
180 #define r2 (pp +11 * n) /* 3n+1 */
181 #define r0 (pp +15 * n) /* s+t <= 2*n */
182 #define r7 (scratch) /* 3n+1 */
183 #define r5 (scratch + 3 * n + 1) /* 3n+1 */
184 #define r3 (scratch + 6 * n + 2) /* 3n+1 */
185 #define r1 (scratch + 9 * n + 3) /* 3n+1 */
186 #define v0 (pp +11 * n) /* n+1 */
187 #define v1 (pp +12 * n+1) /* n+1 */
188 #define v2 (pp +13 * n+2) /* n+1 */
189 #define v3 (scratch +12 * n + 4) /* n+1 */
190 #define wsi (scratch +12 * n + 4) /* 3n+1 */
191 #define wse (scratch +13 * n + 5) /* 2n+1 */
192
193 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
194 need all of them */
195 /* if (scratch == NULL) */
196 /* scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
197 ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
198 ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
199
200 /********************** evaluation and recursive calls *********************/
201
202 /* $\pm1/8$ */
203 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
204 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
205 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */
206 TOOM8H_MUL_N_REC(r7, v2, v3, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */
207 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
208
209 /* $\pm1/4$ */
210 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
211 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
212 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
213 TOOM8H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
214 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
215
216 /* $\pm2$ */
217 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
218 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
219 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
220 TOOM8H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
221 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
222
223 /* $\pm8$ */
224 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
225 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
226 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-8)*B(-8) */
227 TOOM8H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+8)*B(+8) */
228 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
229
230 /* $\pm1/2$ */
231 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
232 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
233 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
234 TOOM8H_MUL_N_REC(r6, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
235 mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
236
237 /* $\pm1$ */
238 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);
239 if (q == 3)
240 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
241 else
242 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp);
243 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
244 TOOM8H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(1)*B(1) */
245 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
246
247 /* $\pm4$ */
248 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
249 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
250 TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
251 TOOM8H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
252 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
253
254 #undef v0
255 #undef v1
256 #undef v2
257 #undef v3
258 #undef wse
259
260 /* A(0)*B(0) */
261 TOOM8H_MUL_N_REC(pp, ap, bp, n, wsi);
262
263 /* Infinity */
264 if( half != 0) {
265 if(s>t) {
266 TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
267 } else {
268 TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
269 };
270 };
271
272 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
273
274 #undef r0
275 #undef r1
276 #undef r2
277 #undef r3
278 #undef r4
279 #undef r5
280 #undef r6
281 #undef wsi
282 }
283
284 #undef TOOM8H_MUL_N_REC
285 #undef TOOM8H_MUL_REC
286 #undef MAYBE_mul_basecase
287 #undef MAYBE_mul_toom22
288 #undef MAYBE_mul_toom33
289 #undef MAYBE_mul_toom44
290 #undef MAYBE_mul_toom8h
291