xref: /dragonfly/contrib/gmp/mpn/generic/toom8h_mul.c (revision 926deccb)
1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2010 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17 
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21 License for more details.
22 
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25 
26 
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 
30 
31 #if GMP_NUMB_BITS < 29
32 #error Not implemented.
33 #endif
34 
35 #if GMP_NUMB_BITS < 43
36 #define BIT_CORRECTION 1
37 #define CORRECTION_BITS GMP_NUMB_BITS
38 #else
39 #define BIT_CORRECTION 0
40 #define CORRECTION_BITS 0
41 #endif
42 
43 
44 #if TUNE_PROGRAM_BUILD
45 #define MAYBE_mul_basecase 1
46 #define MAYBE_mul_toom22   1
47 #define MAYBE_mul_toom33   1
48 #define MAYBE_mul_toom44   1
49 #define MAYBE_mul_toom8h   1
50 #else
51 #define MAYBE_mul_basecase						\
52   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
53 #define MAYBE_mul_toom22						\
54   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
55 #define MAYBE_mul_toom33						\
56   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
57 #define MAYBE_mul_toom44						\
58   (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
59 #define MAYBE_mul_toom8h						\
60   (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
61 #endif
62 
63 #define TOOM8H_MUL_N_REC(p, a, b, n, ws)				\
64   do {									\
65     if (MAYBE_mul_basecase						\
66 	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
67       mpn_mul_basecase (p, a, n, b, n);					\
68     else if (MAYBE_mul_toom22						\
69 	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
70       mpn_toom22_mul (p, a, n, b, n, ws);				\
71     else if (MAYBE_mul_toom33						\
72 	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))		\
73       mpn_toom33_mul (p, a, n, b, n, ws);				\
74     else if (MAYBE_mul_toom44						\
75 	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))		\
76       mpn_toom44_mul (p, a, n, b, n, ws);				\
77     else if (! MAYBE_mul_toom8h						\
78 	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))		\
79       mpn_toom6h_mul (p, a, n, b, n, ws);				\
80     else								\
81       mpn_toom8h_mul (p, a, n, b, n, ws);				\
82   } while (0)
83 
84 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
85   do {	mpn_mul (p, a, na, b, nb);			\
86   } while (0)
87 
88 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
89    With: an >= bn >= 86, an*5 <  bn * 11.
90    It _may_ work with bn<=?? and bn*?? < an*? < bn*??
91 
92    Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
93 */
94 /* Estimate on needed scratch:
95    S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
96    since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
97  */
98 
99 void
100 mpn_toom8h_mul   (mp_ptr pp,
101 		  mp_srcptr ap, mp_size_t an,
102 		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
103 {
104   mp_size_t n, s, t;
105   int p, q, half;
106   int sign;
107 
108   /***************************** decomposition *******************************/
109 
110   ASSERT (an >= bn);
111   /* Can not handle too small operands */
112   ASSERT (bn >= 86);
113   /* Can not handle too much unbalancement */
114   ASSERT (an*4 <= bn*13);
115   ASSERT (GMP_NUMB_BITS > 12*3 || an*4 <= bn*12);
116   ASSERT (GMP_NUMB_BITS > 11*3 || an*5 <= bn*11);
117   ASSERT (GMP_NUMB_BITS > 10*3 || an*6 <= bn*10);
118   ASSERT (GMP_NUMB_BITS >  9*3 || an*7 <= bn* 9);
119 
120   /* Limit num/den is a rational number between
121      (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
122 #define LIMIT_numerator (21)
123 #define LIMIT_denominat (20)
124 
125   if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
126     {
127       half = 0;
128       n = 1 + ((an - 1)>>3);
129       p = q = 7;
130       s = an - p * n;
131       t = bn - q * n;
132     }
133   else
134     {
135       if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
136 	{ p = 9; q = 8; }
137       else if (GMP_NUMB_BITS <= 9*3 ||
138 	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
139 	{ p = 9; q = 7; }
140       else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
141 	{ p =10; q = 7; }
142       else if (GMP_NUMB_BITS <= 10*3 ||
143 	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
144 	{ p =10; q = 6; }
145       else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
146 	{ p =11; q = 6; }
147       else if (GMP_NUMB_BITS <= 11*3 ||
148 	       an * 4 < 9 * bn)
149 	{ p =11; q = 5; }
150       else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn )  /* is 4*... <12*... */
151 	{ p =12; q = 5; }
152       else if (GMP_NUMB_BITS <= 12*3 ||
153 	       an * 9 < 28 * bn )  /* is 4*... <12*... */
154 	{ p =12; q = 4; }
155       else
156 	{ p =13; q = 4; }
157 
158       half = (p+q)&1;
159       n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
160       p--; q--;
161 
162       s = an - p * n;
163       t = bn - q * n;
164 
165       if(half) { /* Recover from badly chosen splitting */
166 	if (s<1) {p--; s+=n; half=0;}
167 	else if (t<1) {q--; t+=n; half=0;}
168       }
169     }
170 #undef LIMIT_numerator
171 #undef LIMIT_denominat
172 
173   ASSERT (0 < s && s <= n);
174   ASSERT (0 < t && t <= n);
175   ASSERT (half || s + t > 3);
176   ASSERT (n > 2);
177 
178 #define   r6    (pp + 3 * n)			/* 3n+1 */
179 #define   r4    (pp + 7 * n)			/* 3n+1 */
180 #define   r2    (pp +11 * n)			/* 3n+1 */
181 #define   r0    (pp +15 * n)			/* s+t <= 2*n */
182 #define   r7    (scratch)			/* 3n+1 */
183 #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
184 #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
185 #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
186 #define   v0    (pp +11 * n)			/* n+1 */
187 #define   v1    (pp +12 * n+1)			/* n+1 */
188 #define   v2    (pp +13 * n+2)			/* n+1 */
189 #define   v3    (scratch +12 * n + 4)		/* n+1 */
190 #define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
191 #define   wse   (scratch +13 * n + 5)		/* 2n+1 */
192 
193   /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
194      need all of them  */
195 /*   if (scratch == NULL) */
196 /*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
197   ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
198   ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
199 
200   /********************** evaluation and recursive calls *********************/
201 
202   /* $\pm1/8$ */
203   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
204 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
205   TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */
206   TOOM8H_MUL_N_REC(r7, v2, v3, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */
207   mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
208 
209   /* $\pm1/4$ */
210   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
211 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
212   TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
213   TOOM8H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
214   mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
215 
216   /* $\pm2$ */
217   sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
218 	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
219   TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
220   TOOM8H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
221   mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
222 
223   /* $\pm8$ */
224   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
225 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
226   TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-8)*B(-8) */
227   TOOM8H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+8)*B(+8) */
228   mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
229 
230   /* $\pm1/2$ */
231   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
232 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
233   TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
234   TOOM8H_MUL_N_REC(r6, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
235   mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
236 
237   /* $\pm1$ */
238   sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
239   if (q == 3)
240     sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
241   else
242     sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
243   TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
244   TOOM8H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(1)*B(1) */
245   mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
246 
247   /* $\pm4$ */
248   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
249 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
250   TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
251   TOOM8H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
252   mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
253 
254 #undef v0
255 #undef v1
256 #undef v2
257 #undef v3
258 #undef wse
259 
260   /* A(0)*B(0) */
261   TOOM8H_MUL_N_REC(pp, ap, bp, n, wsi);
262 
263   /* Infinity */
264   if( half != 0) {
265     if(s>t) {
266       TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
267     } else {
268       TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
269     };
270   };
271 
272   mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
273 
274 #undef r0
275 #undef r1
276 #undef r2
277 #undef r3
278 #undef r4
279 #undef r5
280 #undef r6
281 #undef wsi
282 }
283 
284 #undef TOOM8H_MUL_N_REC
285 #undef TOOM8H_MUL_REC
286 #undef MAYBE_mul_basecase
287 #undef MAYBE_mul_toom22
288 #undef MAYBE_mul_toom33
289 #undef MAYBE_mul_toom44
290 #undef MAYBE_mul_toom8h
291