1 /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 #include "gmp.h"
38 #include "gmp-impl.h"
39 
40 #define BINVERT_3 MODLIMB_INVERSE_3
41 
42 #define BINVERT_15 \
43   ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
44 
45 #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
46 
47 #ifndef mpn_divexact_by3
48 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
49 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
50 #else
51 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
52 #endif
53 #endif
54 
55 #ifndef mpn_divexact_by45
56 #if GMP_NUMB_BITS % 12 == 0
57 #define mpn_divexact_by45(dst,src,size) \
58   (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
59 #else
60 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
61 #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
62 #else
63 #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
64 #endif
65 #endif
66 #endif
67 
68 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
69 #define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
70 #else
71 #define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
72 #endif
73 
74 #if HAVE_NATIVE_mpn_sublsh_n
75 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
76 #else
77 static mp_limb_t
DO_mpn_sublsh_n(mp_ptr dst,mp_srcptr src,mp_size_t n,unsigned int s,mp_ptr ws)78 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
79 {
80 #if USE_MUL_1 && 0
81   return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
82 #else
83   mp_limb_t __cy;
84   __cy = mpn_lshift (ws,src,n,s);
85   return __cy + mpn_sub_n (dst,dst,ws,n);
86 #endif
87 }
88 #endif
89 
90 
91 #if HAVE_NATIVE_mpn_subrsh
92 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
93 #else
94 /* This is not a correct definition, it assumes no carry */
95 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
96 do {									\
97   mp_limb_t __cy;							\
98   MPN_DECR_U (dst, nd, src[0] >> s);					\
99   __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
100   MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
101 } while (0)
102 #endif
103 
104 /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
105    points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
106    we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
107    degree 7 (or 6), given the 8 (rsp. 7) values:
108 
109      r1 = limit at infinity of f(x) / x^7,
110      r2 = f(4),
111      r3 = f(-4),
112      r4 = f(2),
113      r5 = f(-2),
114      r6 = f(1),
115      r7 = f(-1),
116      r8 = f(0).
117 
118    All couples of the form f(n),f(-n) must be already mixed with
119    toom_couple_handling(f(n),...,f(-n),...)
120 
121    The result is stored in {pp, spt + 7*n (or 6*n)}.
122    At entry, r8 is stored at {pp, 2n},
123    r5 is stored at {pp + 3n, 3n + 1}.
124 
125    The other values are 2n+... limbs each (with most significant limbs small).
126 
127    All intermediate results are positive.
128    Inputs are destroyed.
129 */
130 
131 void
mpn_toom_interpolate_8pts(mp_ptr pp,mp_size_t n,mp_ptr r3,mp_ptr r7,mp_size_t spt,mp_ptr ws)132 mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
133 			   mp_ptr r3, mp_ptr r7,
134 			   mp_size_t spt, mp_ptr ws)
135 {
136   mp_limb_signed_t cy;
137   mp_ptr r5, r1;
138   r5 = (pp + 3 * n);			/* 3n+1 */
139   r1 = (pp + 7 * n);			/* spt */
140 
141   /******************************* interpolation *****************************/
142 
143   DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
144   cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
145   MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
146 
147   DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
148   cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
149   MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
150 
151   r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
152   cy = mpn_sub_n (r7, r7, r1, spt);
153   MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
154 
155   ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
156   ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
157 
158   ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
159 
160   ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
161 
162   mpn_divexact_by45 (r3, r3, 3 * n + 1);
163 
164   ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
165 
166   ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
167 
168   /* last interpolation steps... */
169   /* ... are mixed with recomposition */
170 
171   /***************************** recomposition *******************************/
172   /*
173     pp[] prior to operations:
174      |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
175 
176     summation scheme for remaining operations:
177      |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
178      |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
179 	  ||_H r3|_M r3|_L*r3|
180 				  ||_H_r7|_M_r7|_L_r7|
181 		      ||-H r3|-M r3|-L*r3|
182 				  ||-H*r5|-M_r5|-L_r5|
183   */
184 
185   cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
186   cy-= mpn_sub_n (pp + n, pp + n, r5, n);
187   if (0 > cy)
188     MPN_DECR_U (r7 + n, 2*n + 1, 1);
189   else
190     MPN_INCR_U (r7 + n, 2*n + 1, cy);
191 
192   cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */
193   MPN_DECR_U (r7 + 2*n, n + 1, cy);
194 
195   cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
196   r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
197   cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
198   if (UNLIKELY(0 > cy))
199     MPN_DECR_U (r5 + n + 1, 2*n, 1);
200   else
201     MPN_INCR_U (r5 + n + 1, 2*n, cy);
202 
203   ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
204 
205   cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
206   MPN_INCR_U (r3 + 2*n, n + 1, cy);
207   cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
208   if (LIKELY(spt != n))
209     MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
210   else
211     ASSERT (r3[3*n] | cy == 0);
212 }
213