1 /* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 
38 #include "gmp.h"
39 #include "gmp-impl.h"
40 
41 #if GMP_NUMB_BITS < 29
42 #error Not implemented.
43 #endif
44 
45 #if GMP_NUMB_BITS < 43
46 #define BIT_CORRECTION 1
47 #define CORRECTION_BITS GMP_NUMB_BITS
48 #else
49 #define BIT_CORRECTION 0
50 #define CORRECTION_BITS 0
51 #endif
52 
53 #ifndef SQR_TOOM8_THRESHOLD
54 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
55 #endif
56 
57 #ifndef SQR_TOOM6_THRESHOLD
58 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
59 #endif
60 
61 #if TUNE_PROGRAM_BUILD
62 #define MAYBE_sqr_basecase 1
63 #define MAYBE_sqr_above_basecase   1
64 #define MAYBE_sqr_toom2   1
65 #define MAYBE_sqr_above_toom2   1
66 #define MAYBE_sqr_toom3   1
67 #define MAYBE_sqr_above_toom3   1
68 #define MAYBE_sqr_toom4   1
69 #define MAYBE_sqr_above_toom4   1
70 #define MAYBE_sqr_above_toom6   1
71 #else
72 #define SQR_TOOM8_MAX					\
73   ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?	\
74    ((SQR_FFT_THRESHOLD+8*2-1+7)/8)			\
75    : MP_SIZE_T_MAX )
76 #define MAYBE_sqr_basecase					\
77   (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
78 #define MAYBE_sqr_above_basecase				\
79   (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
80 #define MAYBE_sqr_toom2						\
81   (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
82 #define MAYBE_sqr_above_toom2					\
83   (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
84 #define MAYBE_sqr_toom3						\
85   (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
86 #define MAYBE_sqr_above_toom3					\
87   (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
88 #define MAYBE_sqr_toom4						\
89   (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
90 #define MAYBE_sqr_above_toom4					\
91   (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
92 #define MAYBE_sqr_above_toom6					\
93   (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
94 #endif
95 
96 #define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws)				\
97   do {									\
98     if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
99 	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) {			\
100       mpn_sqr_basecase (p, a, n);					\
101       if (f) mpn_sqr_basecase (p2, a2, n);				\
102     } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
103 	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) {		\
104       mpn_toom2_sqr (p, a, n, ws);					\
105       if (f) mpn_toom2_sqr (p2, a2, n, ws);				\
106     } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
107 	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) {		\
108       mpn_toom3_sqr (p, a, n, ws);					\
109       if (f) mpn_toom3_sqr (p2, a2, n, ws);				\
110     } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4		\
111 	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) {		\
112       mpn_toom4_sqr (p, a, n, ws);					\
113       if (f) mpn_toom4_sqr (p2, a2, n, ws);				\
114     } else if (! MAYBE_sqr_above_toom6					\
115 	     || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) {		\
116       mpn_toom6_sqr (p, a, n, ws);					\
117       if (f) mpn_toom6_sqr (p2, a2, n, ws);				\
118     } else {								\
119       mpn_toom8_sqr (p, a, n, ws);					\
120       if (f) mpn_toom8_sqr (p2, a2, n, ws);				\
121     }									\
122   } while (0)
123 
124 void
mpn_toom8_sqr(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_ptr scratch)125 mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
126 {
127   mp_size_t n, s;
128 
129   /***************************** decomposition *******************************/
130 
131   ASSERT ( an >= 40 );
132 
133   n = 1 + ((an - 1)>>3);
134 
135   s = an - 7 * n;
136 
137   ASSERT (0 < s && s <= n);
138   ASSERT ( s + s > 3 );
139 
140 #define   r6    (pp + 3 * n)			/* 3n+1 */
141 #define   r4    (pp + 7 * n)			/* 3n+1 */
142 #define   r2    (pp +11 * n)			/* 3n+1 */
143 #define   r0    (pp +15 * n)			/* s+t <= 2*n */
144 #define   r7    (scratch)			/* 3n+1 */
145 #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
146 #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
147 #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
148 #define   v0    (pp +11 * n)			/* n+1 */
149 #define   v2    (pp +13 * n+2)			/* n+1 */
150 #define   wse   (scratch +12 * n + 4)		/* 3n+1 */
151 
152   /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
153      need all of them, when DO_mpn_sublsh_n usea a scratch  */
154 /*   if (scratch == NULL) */
155 /*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
156 
157   /********************** evaluation and recursive calls *********************/
158   /* $\pm1/8$ */
159   mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
160   /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
161   TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
162   mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
163 
164   /* $\pm1/4$ */
165   mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
166   /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
167   TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
168   mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
169 
170   /* $\pm2$ */
171   mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
172   /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
173   TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
174   mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
175 
176   /* $\pm8$ */
177   mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
178   /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
179   TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
180   mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
181 
182   /* $\pm1/2$ */
183   mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
184   /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
185   TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
186   mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
187 
188   /* $\pm1$ */
189   mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
190   /* A(-1)*B(-1) */ /* A(1)*B(1) */
191   TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
192   mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
193 
194   /* $\pm4$ */
195   mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
196   /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
197   TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
198   mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
199 
200 #undef v0
201 #undef v2
202 
203   /* A(0)*B(0) */
204   TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
205 
206   mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
207 
208 #undef r0
209 #undef r1
210 #undef r2
211 #undef r3
212 #undef r4
213 #undef r5
214 #undef r6
215 #undef wse
216 
217 }
218 
219 #undef TOOM8_SQR_REC
220 #undef MAYBE_sqr_basecase
221 #undef MAYBE_sqr_above_basecase
222 #undef MAYBE_sqr_toom2
223 #undef MAYBE_sqr_above_toom2
224 #undef MAYBE_sqr_toom3
225 #undef MAYBE_sqr_above_toom3
226 #undef MAYBE_sqr_above_toom4
227