xref: /dragonfly/contrib/gmp/mpn/generic/toom8_sqr.c (revision 25a2db75)
1 /* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17 
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21 License for more details.
22 
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25 
26 
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 
30 #if GMP_NUMB_BITS < 29
31 #error Not implemented.
32 #endif
33 
34 #if GMP_NUMB_BITS < 43
35 #define BIT_CORRECTION 1
36 #define CORRECTION_BITS GMP_NUMB_BITS
37 #else
38 #define BIT_CORRECTION 0
39 #define CORRECTION_BITS 0
40 #endif
41 
42 #ifndef SQR_TOOM8_THRESHOLD
43 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
44 #endif
45 
46 #ifndef SQR_TOOM6_THRESHOLD
47 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
48 #endif
49 
50 #if TUNE_PROGRAM_BUILD
51 #define MAYBE_sqr_basecase 1
52 #define MAYBE_sqr_above_basecase   1
53 #define MAYBE_sqr_toom2   1
54 #define MAYBE_sqr_above_toom2   1
55 #define MAYBE_sqr_toom3   1
56 #define MAYBE_sqr_above_toom3   1
57 #define MAYBE_sqr_toom4   1
58 #define MAYBE_sqr_above_toom4   1
59 #define MAYBE_sqr_above_toom6   1
60 #else
61 #define SQR_TOOM8_MAX					\
62   ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?	\
63    ((SQR_FFT_THRESHOLD+8*2-1+7)/8)			\
64    : MP_SIZE_T_MAX )
65 #define MAYBE_sqr_basecase					\
66   (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
67 #define MAYBE_sqr_above_basecase				\
68   (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
69 #define MAYBE_sqr_toom2						\
70   (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
71 #define MAYBE_sqr_above_toom2					\
72   (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
73 #define MAYBE_sqr_toom3						\
74   (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
75 #define MAYBE_sqr_above_toom3					\
76   (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
77 #define MAYBE_sqr_toom4						\
78   (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
79 #define MAYBE_sqr_above_toom4					\
80   (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
81 #define MAYBE_sqr_above_toom6					\
82   (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
83 #endif
84 
85 #define TOOM8_SQR_REC(p, a, n, ws)					\
86   do {									\
87     if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
88 	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
89       mpn_sqr_basecase (p, a, n);					\
90     else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
91 	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
92       mpn_toom2_sqr (p, a, n, ws);					\
93     else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
94 	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
95       mpn_toom3_sqr (p, a, n, ws);					\
96     else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4		\
97 	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)))		\
98       mpn_toom4_sqr (p, a, n, ws);					\
99     else if (! MAYBE_sqr_above_toom6					\
100 	     || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))		\
101       mpn_toom6_sqr (p, a, n, ws);					\
102     else								\
103       mpn_toom8_sqr (p, a, n, ws);					\
104   } while (0)
105 
106 void
107 mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
108 {
109   mp_size_t n, s;
110 
111   /***************************** decomposition *******************************/
112 
113   ASSERT ( an >= 40 );
114 
115   n = 1 + ((an - 1)>>3);
116 
117   s = an - 7 * n;
118 
119   ASSERT (0 < s && s <= n);
120   ASSERT ( s + s > 3 );
121 
122 #define   r6    (pp + 3 * n)			/* 3n+1 */
123 #define   r4    (pp + 7 * n)			/* 3n+1 */
124 #define   r2    (pp +11 * n)			/* 3n+1 */
125 #define   r0    (pp +15 * n)			/* s+t <= 2*n */
126 #define   r7    (scratch)			/* 3n+1 */
127 #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
128 #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
129 #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
130 #define   v0    (pp +11 * n)			/* n+1 */
131 #define   v2    (pp +13 * n+2)			/* n+1 */
132 #define   wse   (scratch +12 * n + 4)		/* 3n+1 */
133 
134   /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
135      need all of them, when DO_mpn_sublsh_n usea a scratch  */
136 /*   if (scratch == NULL) */
137 /*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
138 
139   /********************** evaluation and recursive calls *********************/
140   /* $\pm1/8$ */
141   mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
142   TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */
143   TOOM8_SQR_REC(r7, v2, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */
144   mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
145 
146   /* $\pm1/4$ */
147   mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
148   TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
149   TOOM8_SQR_REC(r5, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
150   mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
151 
152   /* $\pm2$ */
153   mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
154   TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
155   TOOM8_SQR_REC(r3, v2, n + 1, wse); /* A(+2)*B(+2) */
156   mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
157 
158   /* $\pm8$ */
159   mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
160   TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-8)*B(-8) */
161   TOOM8_SQR_REC(r1, v2, n + 1, wse); /* A(+8)*B(+8) */
162   mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
163 
164   /* $\pm1/2$ */
165   mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
166   TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
167   TOOM8_SQR_REC(r6, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
168   mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
169 
170   /* $\pm1$ */
171   mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
172   TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
173   TOOM8_SQR_REC(r4, v2, n + 1, wse); /* A(1)*B(1) */
174   mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
175 
176   /* $\pm4$ */
177   mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
178   TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
179   TOOM8_SQR_REC(r2, v2, n + 1, wse); /* A(+4)*B(+4) */
180   mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
181 
182 #undef v0
183 #undef v2
184 
185   /* A(0)*B(0) */
186   TOOM8_SQR_REC(pp, ap, n, wse);
187 
188   mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
189 
190 #undef r0
191 #undef r1
192 #undef r2
193 #undef r3
194 #undef r4
195 #undef r5
196 #undef r6
197 #undef wse
198 
199 }
200 
201 #undef TOOM8_SQR_REC
202 #undef MAYBE_sqr_basecase
203 #undef MAYBE_sqr_above_basecase
204 #undef MAYBE_sqr_toom2
205 #undef MAYBE_sqr_above_toom2
206 #undef MAYBE_sqr_toom3
207 #undef MAYBE_sqr_above_toom3
208 #undef MAYBE_sqr_above_toom4
209