1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 
38 #include "gmp-impl.h"
39 
40 
41 #if GMP_NUMB_BITS < 29
42 #error Not implemented.
43 #endif
44 
45 #if GMP_NUMB_BITS < 43
46 #define BIT_CORRECTION 1
47 #define CORRECTION_BITS GMP_NUMB_BITS
48 #else
49 #define BIT_CORRECTION 0
50 #define CORRECTION_BITS 0
51 #endif
52 
53 
54 #if TUNE_PROGRAM_BUILD
55 #define MAYBE_mul_basecase 1
56 #define MAYBE_mul_toom22   1
57 #define MAYBE_mul_toom33   1
58 #define MAYBE_mul_toom44   1
59 #define MAYBE_mul_toom8h   1
60 #else
61 #define MAYBE_mul_basecase						\
62   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
63 #define MAYBE_mul_toom22						\
64   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
65 #define MAYBE_mul_toom33						\
66   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
67 #define MAYBE_mul_toom44						\
68   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
69 #define MAYBE_mul_toom8h						\
70   (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
71 #endif
72 
73 #define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
74   do {									\
75     if (MAYBE_mul_basecase						\
76 	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
77       mpn_mul_basecase (p, a, n, b, n);					\
78       if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
79     } else if (MAYBE_mul_toom22						\
80 	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
81       mpn_toom22_mul (p, a, n, b, n, ws);				\
82       if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
83     } else if (MAYBE_mul_toom33						\
84 	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
85       mpn_toom33_mul (p, a, n, b, n, ws);				\
86       if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
87     } else if (MAYBE_mul_toom44						\
88 	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
89       mpn_toom44_mul (p, a, n, b, n, ws);				\
90       if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
91     } else if (! MAYBE_mul_toom8h					\
92 	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
93       mpn_toom6h_mul (p, a, n, b, n, ws);				\
94       if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
95     } else {								\
96       mpn_toom8h_mul (p, a, n, b, n, ws);				\
97       if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
98     }									\
99   } while (0)
100 
101 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
102   do { mpn_mul (p, a, na, b, nb); } while (0)
103 
104 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
105    With: an >= bn >= 86, an*5 <  bn * 11.
106    It _may_ work with bn<=?? and bn*?? < an*? < bn*??
107 
108    Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
109 */
110 /* Estimate on needed scratch:
111    S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
112    since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
113  */
114 
115 void
mpn_toom8h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)116 mpn_toom8h_mul   (mp_ptr pp,
117 		  mp_srcptr ap, mp_size_t an,
118 		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
119 {
120   mp_size_t n, s, t;
121   int p, q, half;
122   int sign;
123 
124   /***************************** decomposition *******************************/
125 
126   ASSERT (an >= bn);
127   /* Can not handle too small operands */
128   ASSERT (bn >= 86);
129   /* Can not handle too much unbalancement */
130   ASSERT (an <= bn*4);
131   ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
132   ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
133   ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
134 
135   /* Limit num/den is a rational number between
136      (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
137 #define LIMIT_numerator (21)
138 #define LIMIT_denominat (20)
139 
140   if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
141     {
142       half = 0;
143       n = 1 + ((an - 1)>>3);
144       p = q = 7;
145       s = an - 7 * n;
146       t = bn - 7 * n;
147     }
148   else
149     {
150       if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
151 	{ p = 9; q = 8; }
152       else if (GMP_NUMB_BITS <= 9*3 ||
153 	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
154 	{ p = 9; q = 7; }
155       else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
156 	{ p =10; q = 7; }
157       else if (GMP_NUMB_BITS <= 10*3 ||
158 	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
159 	{ p =10; q = 6; }
160       else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
161 	{ p =11; q = 6; }
162       else if (GMP_NUMB_BITS <= 11*3 ||
163 	       an * 4 < 9 * bn)
164 	{ p =11; q = 5; }
165       else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
166 	{ p =12; q = 5; }
167       else if (GMP_NUMB_BITS <= 12*3 ||
168 	       an * 9 < 28 * bn )  /* is 4*... <12*... */
169 	{ p =12; q = 4; }
170       else
171 	{ p =13; q = 4; }
172 
173       half = (p+q)&1;
174       n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
175       p--; q--;
176 
177       s = an - p * n;
178       t = bn - q * n;
179 
180       if(half) { /* Recover from badly chosen splitting */
181 	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
182 	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
183       }
184     }
185 #undef LIMIT_numerator
186 #undef LIMIT_denominat
187 
188   ASSERT (0 < s && s <= n);
189   ASSERT (0 < t && t <= n);
190   ASSERT (half || s + t > 3);
191   ASSERT (n > 2);
192 
193 #define   r6    (pp + 3 * n)			/* 3n+1 */
194 #define   r4    (pp + 7 * n)			/* 3n+1 */
195 #define   r2    (pp +11 * n)			/* 3n+1 */
196 #define   r0    (pp +15 * n)			/* s+t <= 2*n */
197 #define   r7    (scratch)			/* 3n+1 */
198 #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
199 #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
200 #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
201 #define   v0    (pp +11 * n)			/* n+1 */
202 #define   v1    (pp +12 * n+1)			/* n+1 */
203 #define   v2    (pp +13 * n+2)			/* n+1 */
204 #define   v3    (scratch +12 * n + 4)		/* n+1 */
205 #define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
206 #define   wse   (scratch +13 * n + 5)		/* 2n+1 */
207 
208   /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
209      need all of them  */
210 /*   if (scratch == NULL) */
211 /*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
212   ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
213   ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
214 
215   /********************** evaluation and recursive calls *********************/
216 
217   /* $\pm1/8$ */
218   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
219 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
220   /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
221   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
222   mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
223 
224   /* $\pm1/4$ */
225   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
226 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
227   /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
228   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
229   mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
230 
231   /* $\pm2$ */
232   sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
233 	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
234   /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
235   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
236   mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
237 
238   /* $\pm8$ */
239   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
240 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
241   /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
242   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
243   mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
244 
245   /* $\pm1/2$ */
246   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
247 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
248   /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
249   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
250   mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
251 
252   /* $\pm1$ */
253   sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
254   if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
255     sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
256   else
257     sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
258   /* A(-1)*B(-1) */ /* A(1)*B(1) */
259   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
260   mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
261 
262   /* $\pm4$ */
263   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
264 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
265   /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
266   TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
267   mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
268 
269 #undef v0
270 #undef v1
271 #undef v2
272 #undef v3
273 #undef wse
274 
275   /* A(0)*B(0) */
276   TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
277 
278   /* Infinity */
279   if (UNLIKELY (half != 0)) {
280     if (s > t) {
281       TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
282     } else {
283       TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
284     };
285   };
286 
287   mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
288 
289 #undef r0
290 #undef r1
291 #undef r2
292 #undef r3
293 #undef r4
294 #undef r5
295 #undef r6
296 #undef wsi
297 }
298 
299 #undef TOOM8H_MUL_N_REC
300 #undef TOOM8H_MUL_REC
301 #undef MAYBE_mul_basecase
302 #undef MAYBE_mul_toom22
303 #undef MAYBE_mul_toom33
304 #undef MAYBE_mul_toom44
305 #undef MAYBE_mul_toom8h
306