xref: /dragonfly/contrib/gmp/mpn/generic/toom6h_mul.c (revision 479ab7f0)
1 /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2010 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17 
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21 License for more details.
22 
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25 
26 
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 
30 
31 #if GMP_NUMB_BITS < 21
32 #error Not implemented.
33 #endif
34 
35 #if TUNE_PROGRAM_BUILD
36 #define MAYBE_mul_basecase 1
37 #define MAYBE_mul_toom22   1
38 #define MAYBE_mul_toom33   1
39 #define MAYBE_mul_toom6h   1
40 #else
41 #define MAYBE_mul_basecase						\
42   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
43 #define MAYBE_mul_toom22						\
44   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
45 #define MAYBE_mul_toom33						\
46   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
47 #define MAYBE_mul_toom6h						\
48   (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
49 #endif
50 
51 #define TOOM6H_MUL_N_REC(p, a, b, n, ws)				\
52   do {									\
53     if (MAYBE_mul_basecase						\
54 	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
55       mpn_mul_basecase (p, a, n, b, n);					\
56     else if (MAYBE_mul_toom22						\
57 	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
58       mpn_toom22_mul (p, a, n, b, n, ws);				\
59     else if (MAYBE_mul_toom33						\
60 	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))		\
61       mpn_toom33_mul (p, a, n, b, n, ws);				\
62     else if (! MAYBE_mul_toom6h						\
63 	     || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))		\
64       mpn_toom44_mul (p, a, n, b, n, ws);				\
65     else								\
66       mpn_toom6h_mul (p, a, n, b, n, ws);				\
67   } while (0)
68 
69 #define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
70   do {	mpn_mul (p, a, na, b, nb);			\
71   } while (0)
72 
73 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
74    With: an >= bn >= 46, an*6 <  bn * 17.
75    It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
76 
77    Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
78 */
79 /* Estimate on needed scratch:
80    S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
81    since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
82  */
83 
84 void
85 mpn_toom6h_mul   (mp_ptr pp,
86 		  mp_srcptr ap, mp_size_t an,
87 		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
88 {
89   mp_size_t n, s, t;
90   int p, q, half;
91   int sign;
92 
93   /***************************** decomposition *******************************/
94 
95   ASSERT( an >= bn);
96   /* Can not handle too much unbalancement */
97   ASSERT( bn >= 42 );
98   /* Can not handle too much unbalancement */
99   ASSERT((an*3 <  bn * 8) || ( bn >= 46 && an*6 <  bn * 17 ));
100 
101   /* Limit num/den is a rational number between
102      (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
103 #define LIMIT_numerator (18)
104 #define LIMIT_denominat (17)
105 
106   if( an * LIMIT_denominat < LIMIT_numerator * bn ) /* is 6*... < 6*... */
107     { p = q = 6; }
108   else if( an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn )
109     { p = 7; q = 6; }
110   else if( an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn )
111     { p = 7; q = 5; }
112   else if( an * LIMIT_numerator < LIMIT_denominat * 2 * bn )  /* is 4*... < 8*... */
113     { p = 8; q = 5; }
114   else if( an * LIMIT_denominat < LIMIT_numerator * 2 * bn )  /* is 4*... < 8*... */
115     { p = 8; q = 4; }
116   else
117     { p = 9; q = 4; }
118 
119   half = (p ^ q) & 1;
120   n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
121   p--; q--;
122 
123   s = an - p * n;
124   t = bn - q * n;
125 
126   /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
127   if (half) { /* Recover from badly chosen splitting */
128     if (s<1) {p--; s+=n; half=0;}
129     else if (t<1) {q--; t+=n; half=0;}
130   }
131 #undef LIMIT_numerator
132 #undef LIMIT_denominat
133 
134   ASSERT (0 < s && s <= n);
135   ASSERT (0 < t && t <= n);
136   ASSERT (half || s + t > 3);
137   ASSERT (n > 2);
138 
139 #define   r4    (pp + 3 * n)			/* 3n+1 */
140 #define   r2    (pp + 7 * n)			/* 3n+1 */
141 #define   r0    (pp +11 * n)			/* s+t <= 2*n */
142 #define   r5    (scratch)			/* 3n+1 */
143 #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
144 #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
145 #define   v0    (pp + 7 * n)			/* n+1 */
146 #define   v1    (pp + 8 * n+1)			/* n+1 */
147 #define   v2    (pp + 9 * n+2)			/* n+1 */
148 #define   v3    (scratch + 9 * n + 3)		/* n+1 */
149 #define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
150 #define   wse   (scratch +10 * n + 4)		/* 2n+1 */
151 
152   /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
153      need all of them  */
154 /*   if (scratch == NULL) */
155 /*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
156   ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
157   ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
158 
159   /********************** evaluation and recursive calls *********************/
160   /* $\pm1/2$ */
161   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
162 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
163   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
164   TOOM6H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
165   mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
166 
167   /* $\pm1$ */
168   sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
169   if (q == 3)
170     sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
171   else
172     sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
173   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
174   TOOM6H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(1)*B(1) */
175   mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
176 
177   /* $\pm4$ */
178   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
179 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
180   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
181   TOOM6H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
182   mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
183 
184   /* $\pm1/4$ */
185   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
186 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
187   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
188   TOOM6H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
189   mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
190 
191   /* $\pm2$ */
192   sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
193 	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
194   TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
195   TOOM6H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
196   mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
197 
198 #undef v0
199 #undef v1
200 #undef v2
201 #undef v3
202 #undef wse
203 
204   /* A(0)*B(0) */
205   TOOM6H_MUL_N_REC(pp, ap, bp, n, wsi);
206 
207   /* Infinity */
208   if( half != 0) {
209     if(s>t) {
210       TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
211     } else {
212       TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
213     };
214   };
215 
216   mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
217 
218 #undef r0
219 #undef r1
220 #undef r2
221 #undef r3
222 #undef r4
223 #undef r5
224 #undef wsi
225 }
226 
227 #undef TOOM6H_MUL_N_REC
228 #undef TOOM6H_MUL_REC
229 #undef MAYBE_mul_basecase
230 #undef MAYBE_mul_toom22
231 #undef MAYBE_mul_toom33
232 #undef MAYBE_mul_toom6h
233