xref: /dragonfly/contrib/gmp/mpn/generic/toom6_sqr.c (revision 650094e1)
1 /* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17 
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21 License for more details.
22 
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25 
26 
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 
30 
31 #if GMP_NUMB_BITS < 21
32 #error Not implemented.
33 #endif
34 
35 
36 #if TUNE_PROGRAM_BUILD
37 #define MAYBE_sqr_basecase 1
38 #define MAYBE_sqr_above_basecase   1
39 #define MAYBE_sqr_toom2   1
40 #define MAYBE_sqr_above_toom2   1
41 #define MAYBE_sqr_toom3   1
42 #define MAYBE_sqr_above_toom3   1
43 #define MAYBE_sqr_above_toom4   1
44 #else
45 #ifdef  SQR_TOOM8_THRESHOLD
46 #define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
47 #else
48 #define SQR_TOOM6_MAX					\
49   ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ?	\
50    ((SQR_FFT_THRESHOLD+6*2-1+5)/6)			\
51    : MP_SIZE_T_MAX )
52 #endif
53 #define MAYBE_sqr_basecase					\
54   (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
55 #define MAYBE_sqr_above_basecase				\
56   (SQR_TOOM6_MAX >=  SQR_TOOM2_THRESHOLD)
57 #define MAYBE_sqr_toom2						\
58   (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
59 #define MAYBE_sqr_above_toom2					\
60   (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
61 #define MAYBE_sqr_toom3						\
62   (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
63 #define MAYBE_sqr_above_toom3					\
64   (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
65 #define MAYBE_sqr_above_toom4					\
66   (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
67 #endif
68 
69 #define TOOM6_SQR_REC(p, a, n, ws)					\
70   do {									\
71     if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
72 	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
73       mpn_sqr_basecase (p, a, n);					\
74     else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
75 	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
76       mpn_toom2_sqr (p, a, n, ws);					\
77     else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
78 	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
79       mpn_toom3_sqr (p, a, n, ws);					\
80     else if (! MAYBE_sqr_above_toom4					\
81 	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))		\
82       mpn_toom4_sqr (p, a, n, ws);					\
83     else								\
84       mpn_toom6_sqr (p, a, n, ws);					\
85   } while (0)
86 
87 void
88 mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
89 {
90   mp_size_t n, s;
91 
92   /***************************** decomposition *******************************/
93 
94   ASSERT( an >= 18 );
95 
96   n = 1 + (an - 1) / (size_t) 6;
97 
98   s = an - 5 * n;
99 
100   ASSERT (0 < s && s <= n);
101 
102 #define   r4    (pp + 3 * n)			/* 3n+1 */
103 #define   r2    (pp + 7 * n)			/* 3n+1 */
104 #define   r0    (pp +11 * n)			/* s+t <= 2*n */
105 #define   r5    (scratch)			/* 3n+1 */
106 #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
107 #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
108 #define   v0    (pp + 7 * n)			/* n+1 */
109 #define   v2    (pp + 9 * n+2)			/* n+1 */
110 #define   wse   (scratch + 9 * n + 3)		/* 3n+1 */
111 
112   /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
113      need all of them, when DO_mpn_sublsh_n usea a scratch  */
114 /*   if (scratch== NULL) */
115 /*     scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
116 
117   /********************** evaluation and recursive calls *********************/
118   /* $\pm1/2$ */
119   mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
120   TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
121   TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
122   mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
123 
124   /* $\pm1$ */
125   mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
126   TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
127   TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
128   mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
129 
130   /* $\pm4$ */
131   mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
132   TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
133   TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
134   mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
135 
136   /* $\pm1/4$ */
137   mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
138   TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
139   TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
140   mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
141 
142   /* $\pm2$ */
143   mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
144   TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
145   TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
146   mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
147 
148 #undef v0
149 #undef v2
150 
151   /* A(0)*B(0) */
152   TOOM6_SQR_REC(pp, ap, n, wse);
153 
154   mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
155 
156 #undef r0
157 #undef r1
158 #undef r2
159 #undef r3
160 #undef r4
161 #undef r5
162 
163 }
164 #undef TOOM6_SQR_REC
165 #undef MAYBE_sqr_basecase
166 #undef MAYBE_sqr_above_basecase
167 #undef MAYBE_sqr_toom2
168 #undef MAYBE_sqr_above_toom2
169 #undef MAYBE_sqr_toom3
170 #undef MAYBE_sqr_above_toom3
171 #undef MAYBE_sqr_above_toom4
172