1 /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2010 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25
26
27 #include "gmp.h"
28 #include "gmp-impl.h"
29
30
31 #if GMP_NUMB_BITS < 21
32 #error Not implemented.
33 #endif
34
35 #if TUNE_PROGRAM_BUILD
36 #define MAYBE_mul_basecase 1
37 #define MAYBE_mul_toom22 1
38 #define MAYBE_mul_toom33 1
39 #define MAYBE_mul_toom6h 1
40 #else
41 #define MAYBE_mul_basecase \
42 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
43 #define MAYBE_mul_toom22 \
44 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
45 #define MAYBE_mul_toom33 \
46 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
47 #define MAYBE_mul_toom6h \
48 (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
49 #endif
50
51 #define TOOM6H_MUL_N_REC(p, a, b, n, ws) \
52 do { \
53 if (MAYBE_mul_basecase \
54 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
55 mpn_mul_basecase (p, a, n, b, n); \
56 else if (MAYBE_mul_toom22 \
57 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
58 mpn_toom22_mul (p, a, n, b, n, ws); \
59 else if (MAYBE_mul_toom33 \
60 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) \
61 mpn_toom33_mul (p, a, n, b, n, ws); \
62 else if (! MAYBE_mul_toom6h \
63 || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) \
64 mpn_toom44_mul (p, a, n, b, n, ws); \
65 else \
66 mpn_toom6h_mul (p, a, n, b, n, ws); \
67 } while (0)
68
69 #define TOOM6H_MUL_REC(p, a, na, b, nb, ws) \
70 do { mpn_mul (p, a, na, b, nb); \
71 } while (0)
72
73 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
74 With: an >= bn >= 46, an*6 < bn * 17.
75 It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
76
77 Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
78 */
79 /* Estimate on needed scratch:
80 S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
81 since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
82 */
83
84 void
mpn_toom6h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)85 mpn_toom6h_mul (mp_ptr pp,
86 mp_srcptr ap, mp_size_t an,
87 mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
88 {
89 mp_size_t n, s, t;
90 int p, q, half;
91 int sign;
92
93 /***************************** decomposition *******************************/
94
95 ASSERT( an >= bn);
96 /* Can not handle too much unbalancement */
97 ASSERT( bn >= 42 );
98 /* Can not handle too much unbalancement */
99 ASSERT((an*3 < bn * 8) || ( bn >= 46 && an*6 < bn * 17 ));
100
101 /* Limit num/den is a rational number between
102 (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1)) */
103 #define LIMIT_numerator (18)
104 #define LIMIT_denominat (17)
105
106 if( an * LIMIT_denominat < LIMIT_numerator * bn ) /* is 6*... < 6*... */
107 { p = q = 6; }
108 else if( an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn )
109 { p = 7; q = 6; }
110 else if( an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn )
111 { p = 7; q = 5; }
112 else if( an * LIMIT_numerator < LIMIT_denominat * 2 * bn ) /* is 4*... < 8*... */
113 { p = 8; q = 5; }
114 else if( an * LIMIT_denominat < LIMIT_numerator * 2 * bn ) /* is 4*... < 8*... */
115 { p = 8; q = 4; }
116 else
117 { p = 9; q = 4; }
118
119 half = (p ^ q) & 1;
120 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
121 p--; q--;
122
123 s = an - p * n;
124 t = bn - q * n;
125
126 /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
127 if (half) { /* Recover from badly chosen splitting */
128 if (s<1) {p--; s+=n; half=0;}
129 else if (t<1) {q--; t+=n; half=0;}
130 }
131 #undef LIMIT_numerator
132 #undef LIMIT_denominat
133
134 ASSERT (0 < s && s <= n);
135 ASSERT (0 < t && t <= n);
136 ASSERT (half || s + t > 3);
137 ASSERT (n > 2);
138
139 #define r4 (pp + 3 * n) /* 3n+1 */
140 #define r2 (pp + 7 * n) /* 3n+1 */
141 #define r0 (pp +11 * n) /* s+t <= 2*n */
142 #define r5 (scratch) /* 3n+1 */
143 #define r3 (scratch + 3 * n + 1) /* 3n+1 */
144 #define r1 (scratch + 6 * n + 2) /* 3n+1 */
145 #define v0 (pp + 7 * n) /* n+1 */
146 #define v1 (pp + 8 * n+1) /* n+1 */
147 #define v2 (pp + 9 * n+2) /* n+1 */
148 #define v3 (scratch + 9 * n + 3) /* n+1 */
149 #define wsi (scratch + 9 * n + 3) /* 3n+1 */
150 #define wse (scratch +10 * n + 4) /* 2n+1 */
151
152 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
153 need all of them */
154 /* if (scratch == NULL) */
155 /* scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
156 ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
157 ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
158
159 /********************** evaluation and recursive calls *********************/
160 /* $\pm1/2$ */
161 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
162 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
163 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
164 TOOM6H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
165 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
166
167 /* $\pm1$ */
168 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);
169 if (q == 3)
170 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
171 else
172 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp);
173 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
174 TOOM6H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(1)*B(1) */
175 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
176
177 /* $\pm4$ */
178 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
179 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
180 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
181 TOOM6H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
182 mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
183
184 /* $\pm1/4$ */
185 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
186 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
187 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
188 TOOM6H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
189 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
190
191 /* $\pm2$ */
192 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
193 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
194 TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
195 TOOM6H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
196 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
197
198 #undef v0
199 #undef v1
200 #undef v2
201 #undef v3
202 #undef wse
203
204 /* A(0)*B(0) */
205 TOOM6H_MUL_N_REC(pp, ap, bp, n, wsi);
206
207 /* Infinity */
208 if( half != 0) {
209 if(s>t) {
210 TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
211 } else {
212 TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
213 };
214 };
215
216 mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
217
218 #undef r0
219 #undef r1
220 #undef r2
221 #undef r3
222 #undef r4
223 #undef r5
224 #undef wsi
225 }
226
227 #undef TOOM6H_MUL_N_REC
228 #undef TOOM6H_MUL_REC
229 #undef MAYBE_mul_basecase
230 #undef MAYBE_mul_toom22
231 #undef MAYBE_mul_toom33
232 #undef MAYBE_mul_toom6h
233