1 /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 
38 #include "gmp-impl.h"
39 
40 
41 #if GMP_NUMB_BITS < 21
42 #error Not implemented.
43 #endif
44 
45 #if TUNE_PROGRAM_BUILD
46 #define MAYBE_mul_basecase 1
47 #define MAYBE_mul_toom22   1
48 #define MAYBE_mul_toom33   1
49 #define MAYBE_mul_toom6h   1
50 #else
51 #define MAYBE_mul_basecase						\
52   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
53 #define MAYBE_mul_toom22						\
54   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
55 #define MAYBE_mul_toom33						\
56   (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
57 #define MAYBE_mul_toom6h						\
58   (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
59 #endif
60 
61 #define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
62   do {									\
63     if (MAYBE_mul_basecase						\
64 	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
65       mpn_mul_basecase (p, a, n, b, n);					\
66       if (f)								\
67 	mpn_mul_basecase (p2, a2, n, b2, n);				\
68     } else if (MAYBE_mul_toom22						\
69 	       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
70       mpn_toom22_mul (p, a, n, b, n, ws);				\
71       if (f)								\
72 	mpn_toom22_mul (p2, a2, n, b2, n, ws);				\
73     } else if (MAYBE_mul_toom33						\
74 	       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
75       mpn_toom33_mul (p, a, n, b, n, ws);				\
76       if (f)								\
77 	mpn_toom33_mul (p2, a2, n, b2, n, ws);				\
78     } else if (! MAYBE_mul_toom6h					\
79 	       || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
80       mpn_toom44_mul (p, a, n, b, n, ws);				\
81       if (f)								\
82 	mpn_toom44_mul (p2, a2, n, b2, n, ws);				\
83     } else {								\
84       mpn_toom6h_mul (p, a, n, b, n, ws);				\
85       if (f)								\
86 	mpn_toom6h_mul (p2, a2, n, b2, n, ws);				\
87     }									\
88   } while (0)
89 
90 #define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
91   do { mpn_mul (p, a, na, b, nb);			\
92   } while (0)
93 
94 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
95    With: an >= bn >= 46, an*6 <  bn * 17.
96    It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
97 
98    Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
99 */
100 /* Estimate on needed scratch:
101    S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
102    since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
103  */
104 
105 void
mpn_toom6h_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)106 mpn_toom6h_mul   (mp_ptr pp,
107 		  mp_srcptr ap, mp_size_t an,
108 		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
109 {
110   mp_size_t n, s, t;
111   int p, q, half;
112   int sign;
113 
114   /***************************** decomposition *******************************/
115 
116   ASSERT (an >= bn);
117   /* Can not handle too much unbalancement */
118   ASSERT (bn >= 42);
119   /* Can not handle too much unbalancement */
120   ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
121 
122   /* Limit num/den is a rational number between
123      (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
124 #define LIMIT_numerator (18)
125 #define LIMIT_denominat (17)
126 
127   if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
128     {
129       n = 1 + (an - 1) / (size_t) 6;
130       p = q = 5;
131       half = 0;
132 
133       s = an - 5 * n;
134       t = bn - 5 * n;
135     }
136   else {
137     if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
138       { p = 7; q = 6; }
139     else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
140       { p = 7; q = 5; }
141     else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
142       { p = 8; q = 5; }
143     else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
144       { p = 8; q = 4; }
145     else
146       { p = 9; q = 4; }
147 
148     half = (p ^ q) & 1;
149     n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
150     p--; q--;
151 
152     s = an - p * n;
153     t = bn - q * n;
154 
155     /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
156     if (half) { /* Recover from badly chosen splitting */
157       if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
158       else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
159     }
160   }
161 #undef LIMIT_numerator
162 #undef LIMIT_denominat
163 
164   ASSERT (0 < s && s <= n);
165   ASSERT (0 < t && t <= n);
166   ASSERT (half || s + t > 3);
167   ASSERT (n > 2);
168 
169 #define   r4    (pp + 3 * n)			/* 3n+1 */
170 #define   r2    (pp + 7 * n)			/* 3n+1 */
171 #define   r0    (pp +11 * n)			/* s+t <= 2*n */
172 #define   r5    (scratch)			/* 3n+1 */
173 #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
174 #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
175 #define   v0    (pp + 7 * n)			/* n+1 */
176 #define   v1    (pp + 8 * n+1)			/* n+1 */
177 #define   v2    (pp + 9 * n+2)			/* n+1 */
178 #define   v3    (scratch + 9 * n + 3)		/* n+1 */
179 #define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
180 #define   wse   (scratch +10 * n + 4)		/* 2n+1 */
181 
182   /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
183      need all of them  */
184 /*   if (scratch == NULL) */
185 /*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
186   ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
187   ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
188 
189   /********************** evaluation and recursive calls *********************/
190   /* $\pm1/2$ */
191   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
192 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
193   /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
194   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
195   mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
196 
197   /* $\pm1$ */
198   sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
199   if (UNLIKELY (q == 3))
200     sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
201   else
202     sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
203   /* A(-1)*B(-1) */ /* A(1)*B(1) */
204   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
205   mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
206 
207   /* $\pm4$ */
208   sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
209 	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
210   /* A(-4)*B(-4) */
211   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
212   mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
213 
214   /* $\pm1/4$ */
215   sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
216 	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
217   /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
218   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
219   mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
220 
221   /* $\pm2$ */
222   sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
223 	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
224   /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
225   TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
226   mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
227 
228 #undef v0
229 #undef v1
230 #undef v2
231 #undef v3
232 #undef wse
233 
234   /* A(0)*B(0) */
235   TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
236 
237   /* Infinity */
238   if (UNLIKELY (half != 0)) {
239     if (s > t) {
240       TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
241     } else {
242       TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
243     };
244   };
245 
246   mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
247 
248 #undef r0
249 #undef r1
250 #undef r2
251 #undef r3
252 #undef r4
253 #undef r5
254 #undef wsi
255 }
256 
257 #undef TOOM6H_MUL_N_REC
258 #undef TOOM6H_MUL_REC
259 #undef MAYBE_mul_basecase
260 #undef MAYBE_mul_toom22
261 #undef MAYBE_mul_toom33
262 #undef MAYBE_mul_toom6h
263