1 /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 
38 #include "gmp.h"
39 #include "gmp-impl.h"
40 
41 
42 #if GMP_NUMB_BITS < 21
43 #error Not implemented.
44 #endif
45 
46 #if TUNE_PROGRAM_BUILD
47 #define MAYBE_mul_basecase 1
48 #define MAYBE_mul_toom22   1
49 #define MAYBE_mul_toom33   1
50 #define MAYBE_mul_toom6h   1
51 #else
52 #define MAYBE_mul_basecase						\
53   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
54 #define MAYBE_mul_toom22						\
55   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
56 #define MAYBE_mul_toom33						\
57   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
58 #define MAYBE_mul_toom6h						\
59   (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
60 #endif
61 
62 #define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
63   do {									\
64     if (MAYBE_mul_basecase						\
65 	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
66       mpn_mul_basecase (p, a, n, b, n);					\
67       if (f)								\
68 	mpn_mul_basecase (p2, a2, n, b2, n);				\
69     } else if (MAYBE_mul_toom22						\
70 	       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
71       mpn_toom22_mul (p, a, n, b, n, ws);				\
72       if (f)								\
73 	mpn_toom22_mul (p2, a2, n, b2, n, ws);				\
74     } else if (MAYBE_mul_toom33						\
75 	       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
76       mpn_toom33_mul (p, a, n, b, n, ws);				\
77       if (f)								\
78 	mpn_toom33_mul (p2, a2, n, b2, n, ws);				\
79     } else if (! MAYBE_mul_toom6h					\
80 	       || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
81       mpn_toom44_mul (p, a, n, b, n, ws);				\
82       if (f)								\
83 	mpn_toom44_mul (p2, a2, n, b2, n, ws);				\
84     } else {								\
85       mpn_toom6h_mul (p, a, n, b, n, ws);				\
86       if (f)								\
87 	mpn_toom6h_mul (p2, a2, n, b2, n, ws);				\
88     }									\
89   } while (0)
90 
91 #define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
92   do { mpn_mul (p, a, na, b, nb);			\
93   } while (0)
94 
95 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
96    With: an >= bn >= 46, an*6 <  bn * 17.
97    It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
98 
99    Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
100 */
101 /* Estimate on needed scratch:
102    S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
103    since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
104  */
105 
106 void
mpn_toom6h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)107 mpn_toom6h_mul   (mp_ptr pp,
108 		  mp_srcptr ap, mp_size_t an,
109 		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
110 {
111   mp_size_t n, s, t;
112   int p, q, half;
113   int sign;
114 
115   /***************************** decomposition *******************************/
116 
117   ASSERT (an >= bn);
118   /* Can not handle too much unbalancement */
119   ASSERT (bn >= 42);
120   /* Can not handle too much unbalancement */
121   ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
122 
123   /* Limit num/den is a rational number between
124      (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
125 #define LIMIT_numerator (18)
126 #define LIMIT_denominat (17)
127 
128   if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
129     {
130       n = 1 + (an - 1) / (size_t) 6;
131       p = q = 5;
132       half = 0;
133 
134       s = an - 5 * n;
135       t = bn - 5 * n;
136     }
137   else {
138     if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
139       { p = 7; q = 6; }
140     else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
141       { p = 7; q = 5; }
142     else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
143       { p = 8; q = 5; }
144     else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
145       { p = 8; q = 4; }
146     else
147       { p = 9; q = 4; }
148 
149     half = (p ^ q) & 1;
150     n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
151     p--; q--;
152 
153     s = an - p * n;
154     t = bn - q * n;
155 
156     /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
157     if (half) { /* Recover from badly chosen splitting */
158       if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
159       else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
160     }
161   }
162 #undef LIMIT_numerator
163 #undef LIMIT_denominat
164 
165   ASSERT (0 < s && s <= n);
166   ASSERT (0 < t && t <= n);
167   ASSERT (half || s + t > 3);
168   ASSERT (n > 2);
169 
170 #define   r4    (pp + 3 * n)			/* 3n+1 */
171 #define   r2    (pp + 7 * n)			/* 3n+1 */
172 #define   r0    (pp +11 * n)			/* s+t <= 2*n */
173 #define   r5    (scratch)			/* 3n+1 */
174 #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
175 #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
176 #define   v0    (pp + 7 * n)			/* n+1 */
177 #define   v1    (pp + 8 * n+1)			/* n+1 */
178 #define   v2    (pp + 9 * n+2)			/* n+1 */
179 #define   v3    (scratch + 9 * n + 3)		/* n+1 */
180 #define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
181 #define   wse   (scratch +10 * n + 4)		/* 2n+1 */
182 
183   /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
184      need all of them  */
185 /*   if (scratch == NULL) */
186 /*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
187   ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
188   ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
189 
190   /********************** evaluation and recursive calls *********************/
191   /* $\pm1/2$ */
192   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
193 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
194   /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
195   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
196   mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
197 
198   /* $\pm1$ */
199   sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
200   if (UNLIKELY (q == 3))
201     sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
202   else
203     sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
204   /* A(-1)*B(-1) */ /* A(1)*B(1) */
205   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
206   mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
207 
208   /* $\pm4$ */
209   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
210 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
211   /* A(-4)*B(-4) */
212   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
213   mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
214 
215   /* $\pm1/4$ */
216   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
217 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
218   /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
219   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
220   mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
221 
222   /* $\pm2$ */
223   sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
224 	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
225   /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
226   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
227   mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
228 
229 #undef v0
230 #undef v1
231 #undef v2
232 #undef v3
233 #undef wse
234 
235   /* A(0)*B(0) */
236   TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
237 
238   /* Infinity */
239   if (UNLIKELY (half != 0)) {
240     if (s > t) {
241       TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
242     } else {
243       TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
244     };
245   };
246 
247   mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
248 
249 #undef r0
250 #undef r1
251 #undef r2
252 #undef r3
253 #undef r4
254 #undef r5
255 #undef wsi
256 }
257 
258 #undef TOOM6H_MUL_N_REC
259 #undef TOOM6H_MUL_REC
260 #undef MAYBE_mul_basecase
261 #undef MAYBE_mul_toom22
262 #undef MAYBE_mul_toom33
263 #undef MAYBE_mul_toom6h
264