1 /* Interpolaton for the algorithm Toom-Cook 8.5-way.
2
3 Contributed to the GNU project by Marco Bodrato.
4
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
22
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25
26
27 #include "mpir.h"
28 #include "gmp-impl.h"
29
30 #if GMP_NUMB_BITS < 29
31 #error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
32 #endif
33
34 #if GMP_NUMB_BITS < 28
35 #error Not implemented: divexact_by188513325 and _by182712915 will not work.
36 #endif
37
38
39 #if HAVE_NATIVE_mpn_sublsh_n
40 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
41 #else
42 static mp_limb_t
DO_mpn_sublsh_n(mp_ptr dst,mp_srcptr src,mp_size_t n,unsigned int s,mp_ptr ws)43 DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
44 {
45 #if USE_MUL_1 && 0
46 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
47 #else
48 mp_limb_t __cy;
49 __cy = mpn_lshift(ws,src,n,s);
50 return __cy + mpn_sub_n(dst,dst,ws,n);
51 #endif
52 }
53 #endif
54
55 #if HAVE_NATIVE_mpn_addlsh_n
56 #define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
57 #else
58 static mp_limb_t
DO_mpn_addlsh_n(mp_ptr dst,mp_srcptr src,mp_size_t n,unsigned int s,mp_ptr ws)59 DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
60 {
61 #if USE_MUL_1 && 0
62 return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
63 #else
64 mp_limb_t __cy;
65 __cy = mpn_lshift(ws,src,n,s);
66 return __cy + mpn_add_n(dst,dst,ws,n);
67 #endif
68 }
69 #endif
70
71 #if HAVE_NATIVE_mpn_subrsh
72 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
73 #else
74 /* FIXME: This is not a correct definition, it assumes no carry */
75 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
76 do { \
77 mp_limb_t __cy; \
78 MPN_DECR_U (dst, nd, src[0] >> s); \
79 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \
80 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \
81 } while (0)
82 #endif
83
84
85 /* FIXME: tuneup should decide the best variant */
86 #ifndef AORSMUL_FASTER_AORS_AORSLSH
87 #define AORSMUL_FASTER_AORS_AORSLSH 1
88 #endif
89 #ifndef AORSMUL_FASTER_AORS_2AORSLSH
90 #define AORSMUL_FASTER_AORS_2AORSLSH 1
91 #endif
92 #ifndef AORSMUL_FASTER_2AORSLSH
93 #define AORSMUL_FASTER_2AORSLSH 1
94 #endif
95 #ifndef AORSMUL_FASTER_3AORSLSH
96 #define AORSMUL_FASTER_3AORSLSH 1
97 #endif
98
99 #if GMP_NUMB_BITS < 43
100 #define BIT_CORRECTION 1
101 #define CORRECTION_BITS GMP_NUMB_BITS
102 #else
103 #define BIT_CORRECTION 0
104 #define CORRECTION_BITS 0
105 #endif
106
107 #define BINVERT_9 \
108 ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
109
110 #define BINVERT_255 \
111 (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
112
113 /* FIXME: find some more general expressions for inverses */
114 #if GMP_LIMB_BITS == 32
115 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x53E3771B))
116 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0x9F314C35))
117 #define BINVERT_182712915 (GMP_NUMB_MASK & CNST_LIMB(0x550659DB))
118 #define BINVERT_188513325 (GMP_NUMB_MASK & CNST_LIMB(0xFBC333A5))
119 #define BINVERT_255x182712915L (GMP_NUMB_MASK & CNST_LIMB(0x6FC4CB25))
120 #define BINVERT_255x188513325L (GMP_NUMB_MASK & CNST_LIMB(0x6864275B))
121 #if GMP_NAIL_BITS == 0
122 #define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
123 #define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
124 #else /* GMP_NAIL_BITS != 0 */
125 #define BINVERT_255x182712915H \
126 (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
127 #define BINVERT_255x188513325H \
128 (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
129 #endif
130 #else
131 #if GMP_LIMB_BITS == 64
132 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B))
133 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35))
134 #define BINVERT_255x182712915 (GMP_NUMB_MASK & CNST_LIMB(0x1B649A076FC4CB25))
135 #define BINVERT_255x188513325 (GMP_NUMB_MASK & CNST_LIMB(0x06DB993A6864275B))
136 #endif
137 #endif
138
139 #ifndef mpn_divexact_by255
140 #if GMP_NUMB_BITS % 8 == 0
141 #define mpn_divexact_by255(dst,src,size) \
142 (255 & 1 * mpn_divexact_byfobm1(dst, src, size, 255, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
143 #else
144 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
145 #define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
146 #else
147 #define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
148 #endif
149 #endif
150 #endif
151
152 #ifndef mpn_divexact_by255x4
153 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
154 #define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
155 #else
156 #define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
157 #endif
158 #endif
159
160 #ifndef mpn_divexact_by9x16
161 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
162 #define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
163 #else
164 #define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
165 #endif
166 #endif
167
168 #ifndef mpn_divexact_by42525x16
169 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
170 #define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
171 #else
172 #define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
173 #endif
174 #endif
175
176 #ifndef mpn_divexact_by2835x64
177 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
178 #define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
179 #else
180 #define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
181 #endif
182 #endif
183
184 #ifndef mpn_divexact_by255x182712915
185 #if GMP_NUMB_BITS < 36
186 #if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
187 /* FIXME: use mpn_bdiv_q_2_pi2 */
188 #endif
189 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
190 #define mpn_divexact_by255x182712915(dst,src,size) \
191 do { \
192 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0); \
193 mpn_divexact_by255(dst,dst,size); \
194 } while(0)
195 #else
196 #define mpn_divexact_by255x182712915(dst,src,size) \
197 do { \
198 mpn_divexact_1(dst,src,size,CNST_LIMB(182712915)); \
199 mpn_divexact_by255(dst,dst,size); \
200 } while(0)
201 #endif
202 #else /* GMP_NUMB_BITS > 35 */
203 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
204 #define mpn_divexact_by255x182712915(dst,src,size) \
205 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
206 #else
207 #define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
208 #endif
209 #endif /* GMP_NUMB_BITS >?< 36 */
210 #endif
211
212 #ifndef mpn_divexact_by255x188513325
213 #if GMP_NUMB_BITS < 36
214 #if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
215 /* FIXME: use mpn_bdiv_q_1_pi2 */
216 #endif
217 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
218 #define mpn_divexact_by255x188513325(dst,src,size) \
219 do { \
220 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0); \
221 mpn_divexact_by255(dst,dst,size); \
222 } while(0)
223 #else
224 #define mpn_divexact_by255x188513325(dst,src,size) \
225 do { \
226 mpn_divexact_1(dst,src,size,CNST_LIMB(188513325)); \
227 mpn_divexact_by255(dst,dst,size); \
228 } while(0)
229 #endif
230 #else /* GMP_NUMB_BITS > 35 */
231 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
232 #define mpn_divexact_by255x188513325(dst,src,size) \
233 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
234 #else
235 #define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
236 #endif
237 #endif /* GMP_NUMB_BITS >?< 36 */
238 #endif
239
240 /* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
241 points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
242 +-1/8, 0. More precisely, we want to compute
243 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
244 14), given the 16 (rsp. 15) values:
245
246 r0 = limit at infinity of f(x) / x^7,
247 r1 = f(8),f(-8),
248 r2 = f(4),f(-4),
249 r3 = f(2),f(-2),
250 r4 = f(1),f(-1),
251 r5 = f(1/4),f(-1/4),
252 r6 = f(1/2),f(-1/2),
253 r7 = f(1/8),f(-1/8),
254 r8 = f(0).
255
256 All couples of the form f(n),f(-n) must be already mixed with
257 toom_couple_handling(f(n),...,f(-n),...)
258
259 The result is stored in {pp, spt + 7*n (or 8*n)}.
260 At entry, r8 is stored at {pp, 2n},
261 r6 is stored at {pp + 3n, 3n + 1}.
262 r4 is stored at {pp + 7n, 3n + 1}.
263 r2 is stored at {pp +11n, 3n + 1}.
264 r0 is stored at {pp +15n, spt}.
265
266 The other values are 3n+1 limbs each (with most significant limbs small).
267
268 Negative intermediate results are stored two-complemented.
269 Inputs are destroyed.
270 */
271
272 void
mpn_toom_interpolate_16pts(mp_ptr pp,mp_ptr r1,mp_ptr r3,mp_ptr r5,mp_ptr r7,mp_size_t n,mp_size_t spt,int half,mp_ptr wsi)273 mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
274 mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
275 {
276 mp_limb_t cy;
277 mp_size_t n3;
278 mp_size_t n3p1;
279 n3 = 3 * n;
280 n3p1 = n3 + 1;
281
282 #define r6 (pp + n3) /* 3n+1 */
283 #define r4 (pp + 7 * n) /* 3n+1 */
284 #define r2 (pp +11 * n) /* 3n+1 */
285 #define r0 (pp +15 * n) /* s+t <= 2*n */
286
287 ASSERT( spt <= 2 * n );
288 /******************************* interpolation *****************************/
289 if( half != 0) {
290 cy = mpn_sub_n (r4, r4, r0, spt);
291 MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
292
293 cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
294 MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
295 DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
296
297 cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
298 MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
299 DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
300
301 cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
302 //MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
303 #if BIT_CORRECTION
304 cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
305 n3p1 - spt - BIT_CORRECTION, cy);
306 ASSERT (BIT_CORRECTION > 0 || cy == 0);
307 /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
308 cy = r7[n3p1];
309 r7[n3p1] = 0x80;
310 #else
311 MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
312 #endif
313 DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
314 #if BIT_CORRECTION
315 /* FIXME: assumes r7[n3p1] is writable. */
316 ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
317 r7[n3p1] = cy;
318 #endif
319 };
320
321 r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
322 DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
323
324 #if HAVE_NATIVE_mpn_sumdiff_n
325 mpn_sumdiff_n (r2, wsi, r5, r2, n3p1);
326 MP_PTR_SWAP(r5,wsi);
327 #else
328 mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
329 ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
330 MP_PTR_SWAP(r5, wsi);
331 #endif
332
333 r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
334 DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
335
336 #if HAVE_NATIVE_mpn_sumdiff_n
337 mpn_sumdiff_n (wsi, r6, r6, r3, n3p1);
338 MP_PTR_SWAP(r3, wsi);
339 #else
340 ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
341 mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
342 MP_PTR_SWAP(r3, wsi);
343 #endif
344
345 r7[n3] -= DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi)
346 * (1-BIT_CORRECTION); /* if BIT_CORRECTION != 0, discard the carry. */
347 #if BIT_CORRECTION
348 MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
349 cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
350 cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
351 ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
352 #else
353 DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
354 #endif
355
356 #if HAVE_NATIVE_mpn_sumdiff_n
357 mpn_sumdiff_n (r1, wsi, r7, r1, n3p1);
358 MP_PTR_SWAP(r7, wsi);
359 #else
360 mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
361 mpn_add_n (r1, r1, r7, n3p1); /* if BIT_CORRECTION != 0, can give a carry. */
362 MP_PTR_SWAP(r7, wsi);
363 #endif
364
365 r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
366
367 #if AORSMUL_FASTER_2AORSLSH
368 mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
369 #else
370 DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
371 DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
372 #endif
373
374 mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
375 #if AORSMUL_FASTER_3AORSLSH
376 mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
377 #else
378 DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
379 DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
380 DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
381 #endif
382 mpn_divexact_by255x188513325(r7, r7, n3p1);
383
384 mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
385 /* A division by 2835x64 followsi. Warning: the operand can be negative! */
386 mpn_divexact_by2835x64(r5, r5, n3p1);
387 if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
388 r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
389
390 #if AORSMUL_FASTER_AORS_AORSLSH
391 mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
392 #else
393 mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
394 DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
395 #endif
396 #if AORSMUL_FASTER_2AORSLSH
397 mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
398 #else
399 DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
400 DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
401 #endif
402 /* A division by 255x4 followsi. Warning: the operand can be negative! */
403 mpn_divexact_by255x4(r6, r6, n3p1);
404 if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
405 r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
406
407 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
408
409 ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
410 ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
411
412 /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
413 DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
414 mpn_submul_1 (r1, r2, n3p1, 1428);
415 mpn_submul_1 (r1, r3, n3p1, 112896);
416 mpn_divexact_by255x182712915(r1, r1, n3p1);
417
418 ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
419 mpn_divexact_by42525x16(r2, r2, n3p1);
420
421 #if AORSMUL_FASTER_AORS_2AORSLSH
422 ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
423 #else
424 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
425 ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
426 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
427 #endif
428 ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
429 mpn_divexact_by9x16(r3, r3, n3p1);
430
431 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
432 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
433 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
434
435 mpn_add_n (r6, r2, r6, n3p1);
436 ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
437 ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
438
439 mpn_sub_n (r5, r3, r5, n3p1);
440 ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
441 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
442
443 mpn_add_n (r7, r1, r7, n3p1);
444 ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
445 ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
446
447 /* last interpolation steps... */
448 /* ... could be mixed with recomposition
449 ||H-r7|M-r7|L-r7| ||H-r5|M-r5|L-r5|
450 */
451
452 /***************************** recomposition *******************************/
453 /*
454 pp[] prior to operations:
455 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
456
457 summation scheme for remaining operations:
458 |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
459 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
460 ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| ||H r7|M r7|L r7|
461 */
462
463 cy = mpn_add_n (pp + n, pp + n, r7, n);
464 cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
465 #if HAVE_NATIVE_mpn_add_nc
466 cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
467 #else
468 MPN_INCR_U (r7 + 2 * n, n + 1, cy);
469 cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
470 #endif
471 MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
472
473 pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
474 cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
475 #if HAVE_NATIVE_mpn_add_nc
476 cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
477 #else
478 MPN_INCR_U (r5 + 2 * n, n + 1, cy);
479 cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
480 #endif
481 MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
482
483 pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
484 cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
485 #if HAVE_NATIVE_mpn_add_nc
486 cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
487 #else
488 MPN_INCR_U (r3 + 2 * n, n + 1, cy);
489 cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
490 #endif
491 MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
492
493 pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
494 if ( half ) {
495 cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
496 #if HAVE_NATIVE_mpn_add_nc
497 if(LIKELY(spt > n)) {
498 cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
499 MPN_INCR_U (pp + 16 * n, spt - n, cy);
500 } else {
501 ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
502 }
503 #else
504 MPN_INCR_U (r1 + 2 * n, n + 1, cy);
505 if(LIKELY(spt > n)) {
506 cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
507 MPN_INCR_U (pp + 16 * n, spt - n, cy);
508 } else {
509 ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
510 }
511 #endif
512 } else {
513 ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
514 }
515
516 #undef r0
517 #undef r2
518 #undef r4
519 #undef r6
520 }
521