1 /* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37
38 #include "gmp.h"
39 #include "gmp-impl.h"
40
41 #if GMP_NUMB_BITS < 29
42 #error Not implemented.
43 #endif
44
45 #if GMP_NUMB_BITS < 43
46 #define BIT_CORRECTION 1
47 #define CORRECTION_BITS GMP_NUMB_BITS
48 #else
49 #define BIT_CORRECTION 0
50 #define CORRECTION_BITS 0
51 #endif
52
53 #ifndef SQR_TOOM8_THRESHOLD
54 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
55 #endif
56
57 #ifndef SQR_TOOM6_THRESHOLD
58 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
59 #endif
60
61 #if TUNE_PROGRAM_BUILD
62 #define MAYBE_sqr_basecase 1
63 #define MAYBE_sqr_above_basecase 1
64 #define MAYBE_sqr_toom2 1
65 #define MAYBE_sqr_above_toom2 1
66 #define MAYBE_sqr_toom3 1
67 #define MAYBE_sqr_above_toom3 1
68 #define MAYBE_sqr_toom4 1
69 #define MAYBE_sqr_above_toom4 1
70 #define MAYBE_sqr_above_toom6 1
71 #else
72 #define SQR_TOOM8_MAX \
73 ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ? \
74 ((SQR_FFT_THRESHOLD+8*2-1+7)/8) \
75 : MP_SIZE_T_MAX )
76 #define MAYBE_sqr_basecase \
77 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
78 #define MAYBE_sqr_above_basecase \
79 (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
80 #define MAYBE_sqr_toom2 \
81 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
82 #define MAYBE_sqr_above_toom2 \
83 (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
84 #define MAYBE_sqr_toom3 \
85 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
86 #define MAYBE_sqr_above_toom3 \
87 (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
88 #define MAYBE_sqr_toom4 \
89 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
90 #define MAYBE_sqr_above_toom4 \
91 (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
92 #define MAYBE_sqr_above_toom6 \
93 (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
94 #endif
95
96 #define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws) \
97 do { \
98 if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \
99 || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) { \
100 mpn_sqr_basecase (p, a, n); \
101 if (f) mpn_sqr_basecase (p2, a2, n); \
102 } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \
103 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) { \
104 mpn_toom2_sqr (p, a, n, ws); \
105 if (f) mpn_toom2_sqr (p2, a2, n, ws); \
106 } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \
107 || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) { \
108 mpn_toom3_sqr (p, a, n, ws); \
109 if (f) mpn_toom3_sqr (p2, a2, n, ws); \
110 } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4 \
111 || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) { \
112 mpn_toom4_sqr (p, a, n, ws); \
113 if (f) mpn_toom4_sqr (p2, a2, n, ws); \
114 } else if (! MAYBE_sqr_above_toom6 \
115 || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) { \
116 mpn_toom6_sqr (p, a, n, ws); \
117 if (f) mpn_toom6_sqr (p2, a2, n, ws); \
118 } else { \
119 mpn_toom8_sqr (p, a, n, ws); \
120 if (f) mpn_toom8_sqr (p2, a2, n, ws); \
121 } \
122 } while (0)
123
124 void
mpn_toom8_sqr(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_ptr scratch)125 mpn_toom8_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
126 {
127 mp_size_t n, s;
128
129 /***************************** decomposition *******************************/
130
131 ASSERT ( an >= 40 );
132
133 n = 1 + ((an - 1)>>3);
134
135 s = an - 7 * n;
136
137 ASSERT (0 < s && s <= n);
138 ASSERT ( s + s > 3 );
139
140 #define r6 (pp + 3 * n) /* 3n+1 */
141 #define r4 (pp + 7 * n) /* 3n+1 */
142 #define r2 (pp +11 * n) /* 3n+1 */
143 #define r0 (pp +15 * n) /* s+t <= 2*n */
144 #define r7 (scratch) /* 3n+1 */
145 #define r5 (scratch + 3 * n + 1) /* 3n+1 */
146 #define r3 (scratch + 6 * n + 2) /* 3n+1 */
147 #define r1 (scratch + 9 * n + 3) /* 3n+1 */
148 #define v0 (pp +11 * n) /* n+1 */
149 #define v2 (pp +13 * n+2) /* n+1 */
150 #define wse (scratch +12 * n + 4) /* 3n+1 */
151
152 /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
153 need all of them, when DO_mpn_sublsh_n usea a scratch */
154 /* if (scratch == NULL) */
155 /* scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
156
157 /********************** evaluation and recursive calls *********************/
158 /* $\pm1/8$ */
159 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
160 /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
161 TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
162 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
163
164 /* $\pm1/4$ */
165 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
166 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
167 TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
168 mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
169
170 /* $\pm2$ */
171 mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
172 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
173 TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
174 mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
175
176 /* $\pm8$ */
177 mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
178 /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
179 TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
180 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
181
182 /* $\pm1/2$ */
183 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
184 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
185 TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
186 mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
187
188 /* $\pm1$ */
189 mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s, pp);
190 /* A(-1)*B(-1) */ /* A(1)*B(1) */
191 TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
192 mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
193
194 /* $\pm4$ */
195 mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
196 /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
197 TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
198 mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
199
200 #undef v0
201 #undef v2
202
203 /* A(0)*B(0) */
204 TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
205
206 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
207
208 #undef r0
209 #undef r1
210 #undef r2
211 #undef r3
212 #undef r4
213 #undef r5
214 #undef r6
215 #undef wse
216
217 }
218
219 #undef TOOM8_SQR_REC
220 #undef MAYBE_sqr_basecase
221 #undef MAYBE_sqr_above_basecase
222 #undef MAYBE_sqr_toom2
223 #undef MAYBE_sqr_above_toom2
224 #undef MAYBE_sqr_toom3
225 #undef MAYBE_sqr_above_toom3
226 #undef MAYBE_sqr_above_toom4
227