1 /* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
2 
3    Contributed to the GNU project by Marco Bodrato.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 #include "gmp.h"
38 #include "gmp-impl.h"
39 
40 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */
41 #ifndef mpn_divexact_by3
42 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3
43 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0)
44 #else
45 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
46 #endif
47 #endif
48 
49 /* Interpolation for Toom-3.5, using the evaluation points: infinity,
50    1, -1, 2, -2. More precisely, we want to compute
51    f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
52    six values
53 
54      w5 = f(0),
55      w4 = f(-1),
56      w3 = f(1)
57      w2 = f(-2),
58      w1 = f(2),
59      w0 = limit at infinity of f(x) / x^5,
60 
61    The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
62    {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
63    {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
64    significant limbs small). f(-1) and f(-2) may be negative, signs
65    determined by the flag bits. All intermediate results are positive.
66    Inputs are destroyed.
67 
68    Interpolation sequence was taken from the paper: "Integer and
69    Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
70    Some slight variations were introduced: adaptation to "gmp
71    instruction set", and a final saving of an operation by interlacing
72    interpolation and recomposition phases.
73 */
74 
75 void
mpn_toom_interpolate_6pts(mp_ptr pp,mp_size_t n,enum toom6_flags flags,mp_ptr w4,mp_ptr w2,mp_ptr w1,mp_size_t w0n)76 mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
77 			   mp_ptr w4, mp_ptr w2, mp_ptr w1,
78 			   mp_size_t w0n)
79 {
80   mp_limb_t cy;
81   /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
82   mp_limb_t cy4, cy6, embankment;
83 
84   ASSERT( n > 0 );
85   ASSERT( 2*n >= w0n && w0n > 0 );
86 
87 #define w5  pp					/* 2n   */
88 #define w3  (pp + 2 * n)			/* 2n+1 */
89 #define w0  (pp + 5 * n)			/* w0n  */
90 
91   /* Interpolate with sequence:
92      W2 =(W1 - W2)>>2
93      W1 =(W1 - W5)>>1
94      W1 =(W1 - W2)>>1
95      W4 =(W3 - W4)>>1
96      W2 =(W2 - W4)/3
97      W3 = W3 - W4 - W5
98      W1 =(W1 - W3)/3
99      // Last steps are mixed with recomposition...
100      W2 = W2 - W0<<2
101      W4 = W4 - W2
102      W3 = W3 - W1
103      W2 = W2 - W0
104   */
105 
106   /* W2 =(W1 - W2)>>2 */
107   if (flags & toom6_vm2_neg)
108     mpn_add_n (w2, w1, w2, 2 * n + 1);
109   else
110     mpn_sub_n (w2, w1, w2, 2 * n + 1);
111   mpn_rshift (w2, w2, 2 * n + 1, 2);
112 
113   /* W1 =(W1 - W5)>>1 */
114   w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
115   mpn_rshift (w1, w1, 2 * n + 1, 1);
116 
117   /* W1 =(W1 - W2)>>1 */
118 #if HAVE_NATIVE_mpn_rsh1sub_n
119   mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
120 #else
121   mpn_sub_n (w1, w1, w2, 2 * n + 1);
122   mpn_rshift (w1, w1, 2 * n + 1, 1);
123 #endif
124 
125   /* W4 =(W3 - W4)>>1 */
126   if (flags & toom6_vm1_neg)
127     {
128 #if HAVE_NATIVE_mpn_rsh1add_n
129       mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
130 #else
131       mpn_add_n (w4, w3, w4, 2 * n + 1);
132       mpn_rshift (w4, w4, 2 * n + 1, 1);
133 #endif
134     }
135   else
136     {
137 #if HAVE_NATIVE_mpn_rsh1sub_n
138       mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
139 #else
140       mpn_sub_n (w4, w3, w4, 2 * n + 1);
141       mpn_rshift (w4, w4, 2 * n + 1, 1);
142 #endif
143     }
144 
145   /* W2 =(W2 - W4)/3 */
146   mpn_sub_n (w2, w2, w4, 2 * n + 1);
147   mpn_divexact_by3 (w2, w2, 2 * n + 1);
148 
149   /* W3 = W3 - W4 - W5 */
150   mpn_sub_n (w3, w3, w4, 2 * n + 1);
151   w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
152 
153   /* W1 =(W1 - W3)/3 */
154   mpn_sub_n (w1, w1, w3, 2 * n + 1);
155   mpn_divexact_by3 (w1, w1, 2 * n + 1);
156 
157   /*
158     [1 0 0 0 0 0;
159      0 1 0 0 0 0;
160      1 0 1 0 0 0;
161      0 1 0 1 0 0;
162      1 0 1 0 1 0;
163      0 0 0 0 0 1]
164 
165     pp[] prior to operations:
166      |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
167 
168     summation scheme for remaining operations:
169      |______________5|n_____4|n_____3|n_____2|n______|n______|pp
170      |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
171 				    || H w4  | L w4  |
172 		    || H w2  | L w2  |
173 	    || H w1  | L w1  |
174 			    ||-H w1  |-L w1  |
175 		     |-H w0  |-L w0 ||-H w2  |-L w2  |
176   */
177   cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
178   MPN_INCR_U (pp + 3 * n + 1, n, cy);
179 
180   /* W2 -= W0<<2 */
181 #if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
182 #if HAVE_NATIVE_mpn_sublsh2_n_ip1
183   cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
184 #else
185   cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
186 #endif
187 #else
188   /* {W4,2*n+1} is now free and can be overwritten. */
189   cy = mpn_lshift(w4, w0, w0n, 2);
190   cy+= mpn_sub_n(w2, w2, w4, w0n);
191 #endif
192   MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
193 
194   /* W4L = W4L - W2L */
195   cy = mpn_sub_n (pp + n, pp + n, w2, n);
196   MPN_DECR_U (w3, 2 * n + 1, cy);
197 
198   /* W3H = W3H + W2L */
199   cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
200   /* W1L + W2H */
201   cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
202   MPN_INCR_U (w1 + n, n + 1, cy);
203 
204   /* W0 = W0 + W1H */
205   if (LIKELY (w0n > n))
206     cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
207   else
208     cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
209 
210   /*
211     summation scheme for the next operation:
212      |...____5|n_____4|n_____3|n_____2|n______|n______|pp
213      |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
214 		     ...-w0___|-w1_w2 |
215   */
216   /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
217   cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
218 
219   /* embankment is a "dirty trick" to avoid carry/borrow propagation
220      beyond allocated memory */
221   embankment = w0[w0n - 1] - 1;
222   w0[w0n - 1] = 1;
223   if (LIKELY (w0n > n)) {
224     if (cy4 > cy6)
225       MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
226     else
227       MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
228     MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
229     MPN_INCR_U (w0 + n, w0n - n, cy6);
230   } else {
231     MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
232     MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
233   }
234   w0[w0n - 1] += embankment;
235 
236 #undef w5
237 #undef w3
238 #undef w0
239 
240 }
241