1 /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37 #include "gmp.h"
38 #include "gmp-impl.h"
39
40 #define BINVERT_3 MODLIMB_INVERSE_3
41
42 #define BINVERT_15 \
43 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
44
45 #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
46
47 #ifndef mpn_divexact_by3
48 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
49 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
50 #else
51 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
52 #endif
53 #endif
54
55 #ifndef mpn_divexact_by45
56 #if GMP_NUMB_BITS % 12 == 0
57 #define mpn_divexact_by45(dst,src,size) \
58 (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
59 #else
60 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
61 #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
62 #else
63 #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
64 #endif
65 #endif
66 #endif
67
68 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
69 #define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
70 #else
71 #define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
72 #endif
73
74 #if HAVE_NATIVE_mpn_sublsh_n
75 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
76 #else
77 static mp_limb_t
DO_mpn_sublsh_n(mp_ptr dst,mp_srcptr src,mp_size_t n,unsigned int s,mp_ptr ws)78 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
79 {
80 #if USE_MUL_1 && 0
81 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
82 #else
83 mp_limb_t __cy;
84 __cy = mpn_lshift (ws,src,n,s);
85 return __cy + mpn_sub_n (dst,dst,ws,n);
86 #endif
87 }
88 #endif
89
90
91 #if HAVE_NATIVE_mpn_subrsh
92 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
93 #else
94 /* This is not a correct definition, it assumes no carry */
95 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
96 do { \
97 mp_limb_t __cy; \
98 MPN_DECR_U (dst, nd, src[0] >> s); \
99 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
100 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
101 } while (0)
102 #endif
103
104 /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
105 points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
106 we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
107 degree 7 (or 6), given the 8 (rsp. 7) values:
108
109 r1 = limit at infinity of f(x) / x^7,
110 r2 = f(4),
111 r3 = f(-4),
112 r4 = f(2),
113 r5 = f(-2),
114 r6 = f(1),
115 r7 = f(-1),
116 r8 = f(0).
117
118 All couples of the form f(n),f(-n) must be already mixed with
119 toom_couple_handling(f(n),...,f(-n),...)
120
121 The result is stored in {pp, spt + 7*n (or 6*n)}.
122 At entry, r8 is stored at {pp, 2n},
123 r5 is stored at {pp + 3n, 3n + 1}.
124
125 The other values are 2n+... limbs each (with most significant limbs small).
126
127 All intermediate results are positive.
128 Inputs are destroyed.
129 */
130
131 void
mpn_toom_interpolate_8pts(mp_ptr pp,mp_size_t n,mp_ptr r3,mp_ptr r7,mp_size_t spt,mp_ptr ws)132 mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
133 mp_ptr r3, mp_ptr r7,
134 mp_size_t spt, mp_ptr ws)
135 {
136 mp_limb_signed_t cy;
137 mp_ptr r5, r1;
138 r5 = (pp + 3 * n); /* 3n+1 */
139 r1 = (pp + 7 * n); /* spt */
140
141 /******************************* interpolation *****************************/
142
143 DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
144 cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
145 MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
146
147 DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
148 cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
149 MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
150
151 r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
152 cy = mpn_sub_n (r7, r7, r1, spt);
153 MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
154
155 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
156 ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
157
158 ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
159
160 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
161
162 mpn_divexact_by45 (r3, r3, 3 * n + 1);
163
164 ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
165
166 ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
167
168 /* last interpolation steps... */
169 /* ... are mixed with recomposition */
170
171 /***************************** recomposition *******************************/
172 /*
173 pp[] prior to operations:
174 |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
175
176 summation scheme for remaining operations:
177 |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
178 |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
179 ||_H r3|_M r3|_L*r3|
180 ||_H_r7|_M_r7|_L_r7|
181 ||-H r3|-M r3|-L*r3|
182 ||-H*r5|-M_r5|-L_r5|
183 */
184
185 cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
186 cy-= mpn_sub_n (pp + n, pp + n, r5, n);
187 if (0 > cy)
188 MPN_DECR_U (r7 + n, 2*n + 1, 1);
189 else
190 MPN_INCR_U (r7 + n, 2*n + 1, cy);
191
192 cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */
193 MPN_DECR_U (r7 + 2*n, n + 1, cy);
194
195 cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
196 r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
197 cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
198 if (UNLIKELY(0 > cy))
199 MPN_DECR_U (r5 + n + 1, 2*n, 1);
200 else
201 MPN_INCR_U (r5 + n + 1, 2*n, cy);
202
203 ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
204
205 cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
206 MPN_INCR_U (r3 + 2*n, n + 1, cy);
207 cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
208 if (LIKELY(spt != n))
209 MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
210 else
211 ASSERT (r3[3*n] + cy == 0);
212 }
213