1 /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25
26 #include "gmp.h"
27 #include "gmp-impl.h"
28
29 #define BINVERT_3 MODLIMB_INVERSE_3
30
31 #define BINVERT_15 \
32 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
33
34 #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
35
36 #ifndef mpn_divexact_by3
37 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
38 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
39 #else
40 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
41 #endif
42 #endif
43
44 #ifndef mpn_divexact_by45
45 #if GMP_NUMB_BITS % 12 == 0
46 #define mpn_divexact_by45(dst,src,size) \
47 (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
48 #else
49 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
50 #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
51 #else
52 #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
53 #endif
54 #endif
55 #endif
56
57 #if HAVE_NATIVE_mpn_sublsh_n
58 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,src,n,s)
59 #else
60 static mp_limb_t
DO_mpn_sublsh_n(mp_ptr dst,mp_srcptr src,mp_size_t n,unsigned int s,mp_ptr ws)61 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
62 {
63 #if USE_MUL_1
64 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
65 #else
66 mp_limb_t __cy;
67 __cy = mpn_lshift (ws,src,n,s);
68 return __cy + mpn_sub_n (dst,dst,ws,n);
69 #endif
70 }
71 #endif
72
73
74 #if HAVE_NATIVE_mpn_subrsh
75 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
76 #else
77 /* This is not a correct definition, it assumes no carry */
78 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
79 do { \
80 mp_limb_t __cy; \
81 MPN_DECR_U (dst, nd, src[0] >> s); \
82 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
83 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
84 } while (0)
85 #endif
86
87 /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
88 points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
89 we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
90 degree 7 (or 6), given the 8 (rsp. 7) values:
91
92 r1 = limit at infinity of f(x) / x^7,
93 r2 = f(4),
94 r3 = f(-4),
95 r4 = f(2),
96 r5 = f(-2),
97 r6 = f(1),
98 r7 = f(-1),
99 r8 = f(0).
100
101 All couples of the form f(n),f(-n) must be already mixed with
102 toom_couple_handling(f(n),...,f(-n),...)
103
104 The result is stored in {pp, spt + 7*n (or 6*n)}.
105 At entry, r8 is stored at {pp, 2n},
106 r5 is stored at {pp + 3n, 3n + 1}.
107
108 The other values are 2n+... limbs each (with most significant limbs small).
109
110 All intermediate results are positive.
111 Inputs are destroyed.
112 */
113
114 void
mpn_toom_interpolate_8pts(mp_ptr pp,mp_size_t n,mp_ptr r3,mp_ptr r7,mp_size_t spt,mp_ptr ws)115 mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
116 mp_ptr r3, mp_ptr r7,
117 mp_size_t spt, mp_ptr ws)
118 {
119 mp_limb_signed_t cy;
120 mp_ptr r5, r1;
121 r5 = (pp + 3 * n); /* 3n+1 */
122 r1 = (pp + 7 * n); /* spt */
123
124 /******************************* interpolation *****************************/
125
126 DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
127 cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
128 MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
129
130 DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
131 cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
132 MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
133
134 r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
135 cy = mpn_sub_n (r7, r7, r1, spt);
136 MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
137
138 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
139 ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
140
141 ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
142
143 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
144
145 mpn_divexact_by45 (r3, r3, 3 * n + 1);
146
147 ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
148
149 ASSERT_NOCARRY(DO_mpn_sublsh_n (r5, r3, 3 * n + 1, 2, ws));
150
151 /* last interpolation steps... */
152 /* ... are mixed with recomposition */
153
154 /***************************** recomposition *******************************/
155 /*
156 pp[] prior to operations:
157 |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
158
159 summation scheme for remaining operations:
160 |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
161 |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
162 ||_H r3|_M r3|_L*r3|
163 ||_H_r7|_M_r7|_L_r7|
164 ||-H r3|-M r3|-L*r3|
165 ||-H*r5|-M_r5|-L_r5|
166 */
167
168 cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
169 cy-= mpn_sub_n (pp + n, pp + n, r5, n);
170 if (0 > cy)
171 MPN_DECR_U (r7 + n, 2*n + 1, 1);
172 else
173 MPN_INCR_U (r7 + n, 2*n + 1, cy);
174
175 cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */
176 MPN_DECR_U (r7 + 2*n, n + 1, cy);
177
178 cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
179 r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
180 cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
181 if (UNLIKELY(0 > cy))
182 MPN_DECR_U (r5 + n + 1, 2*n, 1);
183 else
184 MPN_INCR_U (r5 + n + 1, 2*n, cy);
185
186 ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
187
188 cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
189 MPN_INCR_U (r3 + 2*n, n + 1, cy);
190 cy = r3[3*n] + mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
191 if (LIKELY(spt != n))
192 MPN_INCR_U (pp + 8*n, spt - n, cy);
193 else
194 ASSERT (cy == 0);
195 }
196