1 /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37
38 #include "gmp-impl.h"
39
40
41 #if GMP_NUMB_BITS < 21
42 #error Not implemented.
43 #endif
44
45 #if TUNE_PROGRAM_BUILD
46 #define MAYBE_mul_basecase 1
47 #define MAYBE_mul_toom22 1
48 #define MAYBE_mul_toom33 1
49 #define MAYBE_mul_toom6h 1
50 #else
51 #define MAYBE_mul_basecase \
52 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
53 #define MAYBE_mul_toom22 \
54 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
55 #define MAYBE_mul_toom33 \
56 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
57 #define MAYBE_mul_toom6h \
58 (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
59 #endif
60
61 #define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \
62 do { \
63 if (MAYBE_mul_basecase \
64 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \
65 mpn_mul_basecase (p, a, n, b, n); \
66 if (f) \
67 mpn_mul_basecase (p2, a2, n, b2, n); \
68 } else if (MAYBE_mul_toom22 \
69 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \
70 mpn_toom22_mul (p, a, n, b, n, ws); \
71 if (f) \
72 mpn_toom22_mul (p2, a2, n, b2, n, ws); \
73 } else if (MAYBE_mul_toom33 \
74 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \
75 mpn_toom33_mul (p, a, n, b, n, ws); \
76 if (f) \
77 mpn_toom33_mul (p2, a2, n, b2, n, ws); \
78 } else if (! MAYBE_mul_toom6h \
79 || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \
80 mpn_toom44_mul (p, a, n, b, n, ws); \
81 if (f) \
82 mpn_toom44_mul (p2, a2, n, b2, n, ws); \
83 } else { \
84 mpn_toom6h_mul (p, a, n, b, n, ws); \
85 if (f) \
86 mpn_toom6h_mul (p2, a2, n, b2, n, ws); \
87 } \
88 } while (0)
89
90 #define TOOM6H_MUL_REC(p, a, na, b, nb, ws) \
91 do { mpn_mul (p, a, na, b, nb); \
92 } while (0)
93
94 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
95 With: an >= bn >= 46, an*6 < bn * 17.
96 It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
97
98 Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
99 */
100 /* Estimate on needed scratch:
101 S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
102 since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
103 */
104
105 void
mpn_toom6h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)106 mpn_toom6h_mul (mp_ptr pp,
107 mp_srcptr ap, mp_size_t an,
108 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
109 {
110 mp_size_t n, s, t;
111 int p, q, half;
112 int sign;
113
114 /***************************** decomposition *******************************/
115
116 ASSERT (an >= bn);
117 /* Can not handle too much unbalancement */
118 ASSERT (bn >= 42);
119 /* Can not handle too much unbalancement */
120 ASSERT ((an*3 < bn * 8) || (bn >= 46 && an * 6 < bn * 17));
121
122 /* Limit num/den is a rational number between
123 (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1)) */
124 #define LIMIT_numerator (18)
125 #define LIMIT_denominat (17)
126
127 if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
128 {
129 n = 1 + (an - 1) / (size_t) 6;
130 p = q = 5;
131 half = 0;
132
133 s = an - 5 * n;
134 t = bn - 5 * n;
135 }
136 else {
137 if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
138 { p = 7; q = 6; }
139 else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
140 { p = 7; q = 5; }
141 else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn) /* is 4*... < 8*... */
142 { p = 8; q = 5; }
143 else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn) /* is 4*... < 8*... */
144 { p = 8; q = 4; }
145 else
146 { p = 9; q = 4; }
147
148 half = (p ^ q) & 1;
149 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
150 p--; q--;
151
152 s = an - p * n;
153 t = bn - q * n;
154
155 /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
156 if (half) { /* Recover from badly chosen splitting */
157 if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
158 else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
159 }
160 }
161 #undef LIMIT_numerator
162 #undef LIMIT_denominat
163
164 ASSERT (0 < s && s <= n);
165 ASSERT (0 < t && t <= n);
166 ASSERT (half || s + t > 3);
167 ASSERT (n > 2);
168
169 #define r4 (pp + 3 * n) /* 3n+1 */
170 #define r2 (pp + 7 * n) /* 3n+1 */
171 #define r0 (pp +11 * n) /* s+t <= 2*n */
172 #define r5 (scratch) /* 3n+1 */
173 #define r3 (scratch + 3 * n + 1) /* 3n+1 */
174 #define r1 (scratch + 6 * n + 2) /* 3n+1 */
175 #define v0 (pp + 7 * n) /* n+1 */
176 #define v1 (pp + 8 * n+1) /* n+1 */
177 #define v2 (pp + 9 * n+2) /* n+1 */
178 #define v3 (scratch + 9 * n + 3) /* n+1 */
179 #define wsi (scratch + 9 * n + 3) /* 3n+1 */
180 #define wse (scratch +10 * n + 4) /* 2n+1 */
181
182 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
183 need all of them */
184 /* if (scratch == NULL) */
185 /* scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
186 ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
187 ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
188
189 /********************** evaluation and recursive calls *********************/
190 /* $\pm1/2$ */
191 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
192 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
193 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
194 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
195 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
196
197 /* $\pm1$ */
198 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);
199 if (UNLIKELY (q == 3))
200 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
201 else
202 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp);
203 /* A(-1)*B(-1) */ /* A(1)*B(1) */
204 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
205 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
206
207 /* $\pm4$ */
208 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
209 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
210 /* A(-4)*B(-4) */
211 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
212 mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
213
214 /* $\pm1/4$ */
215 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
216 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
217 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
218 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
219 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
220
221 /* $\pm2$ */
222 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
223 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
224 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
225 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
226 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
227
228 #undef v0
229 #undef v1
230 #undef v2
231 #undef v3
232 #undef wse
233
234 /* A(0)*B(0) */
235 TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
236
237 /* Infinity */
238 if (UNLIKELY (half != 0)) {
239 if (s > t) {
240 TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
241 } else {
242 TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
243 };
244 };
245
246 mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
247
248 #undef r0
249 #undef r1
250 #undef r2
251 #undef r3
252 #undef r4
253 #undef r5
254 #undef wsi
255 }
256
257 #undef TOOM6H_MUL_N_REC
258 #undef TOOM6H_MUL_REC
259 #undef MAYBE_mul_basecase
260 #undef MAYBE_mul_toom22
261 #undef MAYBE_mul_toom33
262 #undef MAYBE_mul_toom6h
263