1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 
38 #include "gmp.h"
39 #include "gmp-impl.h"
40 
41 
42 #if GMP_NUMB_BITS < 29
43 #error Not implemented.
44 #endif
45 
46 #if GMP_NUMB_BITS < 43
47 #define BIT_CORRECTION 1
48 #define CORRECTION_BITS GMP_NUMB_BITS
49 #else
50 #define BIT_CORRECTION 0
51 #define CORRECTION_BITS 0
52 #endif
53 
54 
55 #if TUNE_PROGRAM_BUILD
56 #define MAYBE_mul_basecase 1
57 #define MAYBE_mul_toom22   1
58 #define MAYBE_mul_toom33   1
59 #define MAYBE_mul_toom44   1
60 #define MAYBE_mul_toom8h   1
61 #else
62 #define MAYBE_mul_basecase						\
63   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
64 #define MAYBE_mul_toom22						\
65   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
66 #define MAYBE_mul_toom33						\
67   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
68 #define MAYBE_mul_toom44						\
69   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
70 #define MAYBE_mul_toom8h						\
71   (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
72 #endif
73 
74 #define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
75   do {									\
76     if (MAYBE_mul_basecase						\
77 	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
78       mpn_mul_basecase (p, a, n, b, n);					\
79       if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
80     } else if (MAYBE_mul_toom22						\
81 	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
82       mpn_toom22_mul (p, a, n, b, n, ws);				\
83       if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
84     } else if (MAYBE_mul_toom33						\
85 	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
86       mpn_toom33_mul (p, a, n, b, n, ws);				\
87       if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
88     } else if (MAYBE_mul_toom44						\
89 	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
90       mpn_toom44_mul (p, a, n, b, n, ws);				\
91       if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
92     } else if (! MAYBE_mul_toom8h					\
93 	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
94       mpn_toom6h_mul (p, a, n, b, n, ws);				\
95       if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
96     } else {								\
97       mpn_toom8h_mul (p, a, n, b, n, ws);				\
98       if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
99     }									\
100   } while (0)
101 
102 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
103   do { mpn_mul (p, a, na, b, nb); } while (0)
104 
105 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
106    With: an >= bn >= 86, an*5 <  bn * 11.
107    It _may_ work with bn<=?? and bn*?? < an*? < bn*??
108 
109    Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
110 */
111 /* Estimate on needed scratch:
112    S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
113    since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
114  */
115 
116 void
mpn_toom8h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)117 mpn_toom8h_mul   (mp_ptr pp,
118 		  mp_srcptr ap, mp_size_t an,
119 		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
120 {
121   mp_size_t n, s, t;
122   int p, q, half;
123   int sign;
124 
125   /***************************** decomposition *******************************/
126 
127   ASSERT (an >= bn);
128   /* Can not handle too small operands */
129   ASSERT (bn >= 86);
130   /* Can not handle too much unbalancement */
131   ASSERT (an <= bn*4);
132   ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
133   ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
134   ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
135 
136   /* Limit num/den is a rational number between
137      (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
138 #define LIMIT_numerator (21)
139 #define LIMIT_denominat (20)
140 
141   if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
142     {
143       half = 0;
144       n = 1 + ((an - 1)>>3);
145       p = q = 7;
146       s = an - 7 * n;
147       t = bn - 7 * n;
148     }
149   else
150     {
151       if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
152 	{ p = 9; q = 8; }
153       else if (GMP_NUMB_BITS <= 9*3 ||
154 	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
155 	{ p = 9; q = 7; }
156       else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
157 	{ p =10; q = 7; }
158       else if (GMP_NUMB_BITS <= 10*3 ||
159 	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
160 	{ p =10; q = 6; }
161       else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
162 	{ p =11; q = 6; }
163       else if (GMP_NUMB_BITS <= 11*3 ||
164 	       an * 4 < 9 * bn)
165 	{ p =11; q = 5; }
166       else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
167 	{ p =12; q = 5; }
168       else if (GMP_NUMB_BITS <= 12*3 ||
169 	       an * 9 < 28 * bn )  /* is 4*... <12*... */
170 	{ p =12; q = 4; }
171       else
172 	{ p =13; q = 4; }
173 
174       half = (p+q)&1;
175       n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
176       p--; q--;
177 
178       s = an - p * n;
179       t = bn - q * n;
180 
181       if(half) { /* Recover from badly chosen splitting */
182 	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
183 	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
184       }
185     }
186 #undef LIMIT_numerator
187 #undef LIMIT_denominat
188 
189   ASSERT (0 < s && s <= n);
190   ASSERT (0 < t && t <= n);
191   ASSERT (half || s + t > 3);
192   ASSERT (n > 2);
193 
194 #define   r6    (pp + 3 * n)			/* 3n+1 */
195 #define   r4    (pp + 7 * n)			/* 3n+1 */
196 #define   r2    (pp +11 * n)			/* 3n+1 */
197 #define   r0    (pp +15 * n)			/* s+t <= 2*n */
198 #define   r7    (scratch)			/* 3n+1 */
199 #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
200 #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
201 #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
202 #define   v0    (pp +11 * n)			/* n+1 */
203 #define   v1    (pp +12 * n+1)			/* n+1 */
204 #define   v2    (pp +13 * n+2)			/* n+1 */
205 #define   v3    (scratch +12 * n + 4)		/* n+1 */
206 #define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
207 #define   wse   (scratch +13 * n + 5)		/* 2n+1 */
208 
209   /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
210      need all of them  */
211 /*   if (scratch == NULL) */
212 /*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
213   ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
214   ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
215 
216   /********************** evaluation and recursive calls *********************/
217 
218   /* $\pm1/8$ */
219   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
220 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
221   /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
222   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
223   mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
224 
225   /* $\pm1/4$ */
226   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
227 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
228   /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
229   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
230   mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
231 
232   /* $\pm2$ */
233   sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
234 	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
235   /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
236   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
237   mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
238 
239   /* $\pm8$ */
240   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
241 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
242   /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
243   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
244   mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
245 
246   /* $\pm1/2$ */
247   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
248 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
249   /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
250   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
251   mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
252 
253   /* $\pm1$ */
254   sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
255   if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
256     sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
257   else
258     sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
259   /* A(-1)*B(-1) */ /* A(1)*B(1) */
260   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
261   mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
262 
263   /* $\pm4$ */
264   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
265 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
266   /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
267   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
268   mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
269 
270 #undef v0
271 #undef v1
272 #undef v2
273 #undef v3
274 #undef wse
275 
276   /* A(0)*B(0) */
277   TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
278 
279   /* Infinity */
280   if (UNLIKELY (half != 0)) {
281     if (s > t) {
282       TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
283     } else {
284       TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
285     };
286   };
287 
288   mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
289 
290 #undef r0
291 #undef r1
292 #undef r2
293 #undef r3
294 #undef r4
295 #undef r5
296 #undef r6
297 #undef wsi
298 }
299 
300 #undef TOOM8H_MUL_N_REC
301 #undef TOOM8H_MUL_REC
302 #undef MAYBE_mul_basecase
303 #undef MAYBE_mul_toom22
304 #undef MAYBE_mul_toom33
305 #undef MAYBE_mul_toom44
306 #undef MAYBE_mul_toom8h
307