1 /* Interpolaton for the algorithm Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 30 #if GMP_NUMB_BITS < 29 31 #error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB. 32 #endif 33 34 #if GMP_NUMB_BITS < 28 35 #error Not implemented: divexact_by188513325 and _by182712915 will not work. 36 #endif 37 38 39 #if HAVE_NATIVE_mpn_sublsh_n 40 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s) 41 #else 42 static mp_limb_t 43 DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 44 { 45 #if USE_MUL_1 && 0 46 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s)); 47 #else 48 mp_limb_t __cy; 49 __cy = mpn_lshift(ws,src,n,s); 50 return __cy + mpn_sub_n(dst,dst,ws,n); 51 #endif 52 } 53 #endif 54 55 #if HAVE_NATIVE_mpn_addlsh_n 56 #define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s) 57 #else 58 static mp_limb_t 59 DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 60 { 61 #if USE_MUL_1 && 0 62 return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s)); 63 #else 64 mp_limb_t __cy; 65 __cy = mpn_lshift(ws,src,n,s); 66 return __cy + mpn_add_n(dst,dst,ws,n); 67 #endif 68 } 69 #endif 70 71 #if HAVE_NATIVE_mpn_subrsh 72 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s) 73 #else 74 /* FIXME: This is not a correct definition, it assumes no carry */ 75 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \ 76 do { \ 77 mp_limb_t __cy; \ 78 MPN_DECR_U (dst, nd, src[0] >> s); \ 79 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \ 80 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \ 81 } while (0) 82 #endif 83 84 85 /* FIXME: tuneup should decide the best variant */ 86 #ifndef AORSMUL_FASTER_AORS_AORSLSH 87 #define AORSMUL_FASTER_AORS_AORSLSH 1 88 #endif 89 #ifndef AORSMUL_FASTER_AORS_2AORSLSH 90 #define AORSMUL_FASTER_AORS_2AORSLSH 1 91 #endif 92 #ifndef AORSMUL_FASTER_2AORSLSH 93 #define AORSMUL_FASTER_2AORSLSH 1 94 #endif 95 #ifndef AORSMUL_FASTER_3AORSLSH 96 #define AORSMUL_FASTER_3AORSLSH 1 97 #endif 98 99 #if GMP_NUMB_BITS < 43 100 #define BIT_CORRECTION 1 101 #define CORRECTION_BITS GMP_NUMB_BITS 102 #else 103 #define BIT_CORRECTION 0 104 #define CORRECTION_BITS 0 105 #endif 106 107 #define BINVERT_9 \ 108 ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39) 109 110 #define BINVERT_255 \ 111 (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8))) 112 113 /* FIXME: find some more general expressions for inverses */ 114 #if GMP_LIMB_BITS == 32 115 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x53E3771B)) 116 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0x9F314C35)) 117 #define BINVERT_182712915 (GMP_NUMB_MASK & CNST_LIMB(0x550659DB)) 118 #define BINVERT_188513325 (GMP_NUMB_MASK & CNST_LIMB(0xFBC333A5)) 119 #define BINVERT_255x182712915L (GMP_NUMB_MASK & CNST_LIMB(0x6FC4CB25)) 120 #define BINVERT_255x188513325L (GMP_NUMB_MASK & CNST_LIMB(0x6864275B)) 121 #if GMP_NAIL_BITS == 0 122 #define BINVERT_255x182712915H CNST_LIMB(0x1B649A07) 123 #define BINVERT_255x188513325H CNST_LIMB(0x06DB993A) 124 #else /* GMP_NAIL_BITS != 0 */ 125 #define BINVERT_255x182712915H \ 126 (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS))) 127 #define BINVERT_255x188513325H \ 128 (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS))) 129 #endif 130 #else 131 #if GMP_LIMB_BITS == 64 132 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B)) 133 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35)) 134 #define BINVERT_255x182712915 (GMP_NUMB_MASK & CNST_LIMB(0x1B649A076FC4CB25)) 135 #define BINVERT_255x188513325 (GMP_NUMB_MASK & CNST_LIMB(0x06DB993A6864275B)) 136 #endif 137 #endif 138 139 #ifndef mpn_divexact_by255 140 #if GMP_NUMB_BITS % 8 == 0 141 #define mpn_divexact_by255(dst,src,size) \ 142 (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255))) 143 #else 144 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 145 #define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0) 146 #else 147 #define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)) 148 #endif 149 #endif 150 #endif 151 152 #ifndef mpn_divexact_by255x4 153 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 154 #define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2) 155 #else 156 #define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2) 157 #endif 158 #endif 159 160 #ifndef mpn_divexact_by9x16 161 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 162 #define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4) 163 #else 164 #define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4) 165 #endif 166 #endif 167 168 #ifndef mpn_divexact_by42525x16 169 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525) 170 #define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4) 171 #else 172 #define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4) 173 #endif 174 #endif 175 176 #ifndef mpn_divexact_by2835x64 177 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835) 178 #define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6) 179 #else 180 #define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6) 181 #endif 182 #endif 183 184 #ifndef mpn_divexact_by255x182712915 185 #if GMP_NUMB_BITS < 36 186 #if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H) 187 /* FIXME: use mpn_bdiv_q_2_pi2 */ 188 #endif 189 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915) 190 #define mpn_divexact_by255x182712915(dst,src,size) \ 191 do { \ 192 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0); \ 193 mpn_divexact_by255(dst,dst,size); \ 194 } while(0) 195 #else 196 #define mpn_divexact_by255x182712915(dst,src,size) \ 197 do { \ 198 mpn_divexact_1(dst,src,size,CNST_LIMB(182712915)); \ 199 mpn_divexact_by255(dst,dst,size); \ 200 } while(0) 201 #endif 202 #else /* GMP_NUMB_BITS > 35 */ 203 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915) 204 #define mpn_divexact_by255x182712915(dst,src,size) \ 205 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0) 206 #else 207 #define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915)) 208 #endif 209 #endif /* GMP_NUMB_BITS >?< 36 */ 210 #endif 211 212 #ifndef mpn_divexact_by255x188513325 213 #if GMP_NUMB_BITS < 36 214 #if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H) 215 /* FIXME: use mpn_bdiv_q_1_pi2 */ 216 #endif 217 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325) 218 #define mpn_divexact_by255x188513325(dst,src,size) \ 219 do { \ 220 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0); \ 221 mpn_divexact_by255(dst,dst,size); \ 222 } while(0) 223 #else 224 #define mpn_divexact_by255x188513325(dst,src,size) \ 225 do { \ 226 mpn_divexact_1(dst,src,size,CNST_LIMB(188513325)); \ 227 mpn_divexact_by255(dst,dst,size); \ 228 } while(0) 229 #endif 230 #else /* GMP_NUMB_BITS > 35 */ 231 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325) 232 #define mpn_divexact_by255x188513325(dst,src,size) \ 233 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0) 234 #else 235 #define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325)) 236 #endif 237 #endif /* GMP_NUMB_BITS >?< 36 */ 238 #endif 239 240 /* Interpolation for Toom-8.5 (or Toom-8), using the evaluation 241 points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2, 242 +-1/8, 0. More precisely, we want to compute 243 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or 244 14), given the 16 (rsp. 15) values: 245 246 r0 = limit at infinity of f(x) / x^7, 247 r1 = f(8),f(-8), 248 r2 = f(4),f(-4), 249 r3 = f(2),f(-2), 250 r4 = f(1),f(-1), 251 r5 = f(1/4),f(-1/4), 252 r6 = f(1/2),f(-1/2), 253 r7 = f(1/8),f(-1/8), 254 r8 = f(0). 255 256 All couples of the form f(n),f(-n) must be already mixed with 257 toom_couple_handling(f(n),...,f(-n),...) 258 259 The result is stored in {pp, spt + 7*n (or 8*n)}. 260 At entry, r8 is stored at {pp, 2n}, 261 r6 is stored at {pp + 3n, 3n + 1}. 262 r4 is stored at {pp + 7n, 3n + 1}. 263 r2 is stored at {pp +11n, 3n + 1}. 264 r0 is stored at {pp +15n, spt}. 265 266 The other values are 3n+1 limbs each (with most significant limbs small). 267 268 Negative intermediate results are stored two-complemented. 269 Inputs are destroyed. 270 */ 271 272 void 273 mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7, 274 mp_size_t n, mp_size_t spt, int half, mp_ptr wsi) 275 { 276 mp_limb_t cy; 277 mp_size_t n3; 278 mp_size_t n3p1; 279 n3 = 3 * n; 280 n3p1 = n3 + 1; 281 282 #define r6 (pp + n3) /* 3n+1 */ 283 #define r4 (pp + 7 * n) /* 3n+1 */ 284 #define r2 (pp +11 * n) /* 3n+1 */ 285 #define r0 (pp +15 * n) /* s+t <= 2*n */ 286 287 ASSERT( spt <= 2 * n ); 288 /******************************* interpolation *****************************/ 289 if( half != 0) { 290 cy = mpn_sub_n (r4, r4, r0, spt); 291 MPN_DECR_U (r4 + spt, n3p1 - spt, cy); 292 293 cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi); 294 MPN_DECR_U (r3 + spt, n3p1 - spt, cy); 295 DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi); 296 297 cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi); 298 MPN_DECR_U (r2 + spt, n3p1 - spt, cy); 299 DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi); 300 301 cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi); 302 MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy); 303 #if BIT_CORRECTION 304 /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */ 305 cy = r7[n3p1]; 306 r7[n3p1] = 0x80; 307 #endif 308 DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi); 309 #if BIT_CORRECTION 310 /* FIXME: assumes r7[n3p1] is writable. */ 311 ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 ); 312 r7[n3p1] = cy; 313 #endif 314 }; 315 316 r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi); 317 DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi); 318 319 #if HAVE_NATIVE_mpn_add_n_sub_n 320 mpn_add_n_sub_n (r2, r5, r5, r2, n3p1); 321 #else 322 mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */ 323 ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1)); 324 MP_PTR_SWAP(r5, wsi); 325 #endif 326 327 r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi); 328 DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi); 329 330 #if HAVE_NATIVE_mpn_add_n_sub_n 331 mpn_add_n_sub_n (r3, r6, r6, r3, n3p1); 332 #else 333 ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1)); 334 mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */ 335 MP_PTR_SWAP(r3, wsi); 336 #endif 337 338 cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi); 339 #if BIT_CORRECTION 340 MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6); 341 cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi); 342 cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy); 343 ASSERT ( BIT_CORRECTION > 0 || cy != 0 ); 344 #else 345 r7[n3] -= cy; 346 DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi); 347 #endif 348 349 #if HAVE_NATIVE_mpn_add_n_sub_n 350 mpn_add_n_sub_n (r1, r7, r7, r1, n3p1); 351 #else 352 mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */ 353 mpn_add_n (r1, r1, r7, n3p1); /* if BIT_CORRECTION != 0, can give a carry. */ 354 MP_PTR_SWAP(r7, wsi); 355 #endif 356 357 r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n); 358 359 #if AORSMUL_FASTER_2AORSLSH 360 mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */ 361 #else 362 DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */ 363 DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */ 364 #endif 365 366 mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */ 367 #if AORSMUL_FASTER_3AORSLSH 368 mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */ 369 #else 370 DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */ 371 DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */ 372 DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */ 373 #endif 374 mpn_divexact_by255x188513325(r7, r7, n3p1); 375 376 mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */ 377 /* A division by 2835x64 followsi. Warning: the operand can be negative! */ 378 mpn_divexact_by2835x64(r5, r5, n3p1); 379 if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0) 380 r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6)); 381 382 #if AORSMUL_FASTER_AORS_AORSLSH 383 mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */ 384 #else 385 mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */ 386 DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */ 387 #endif 388 #if AORSMUL_FASTER_2AORSLSH 389 mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */ 390 #else 391 DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */ 392 DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */ 393 #endif 394 /* A division by 255x4 followsi. Warning: the operand can be negative! */ 395 mpn_divexact_by255x4(r6, r6, n3p1); 396 if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0) 397 r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2)); 398 399 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi)); 400 401 ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi)); 402 ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400)); 403 404 /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/ 405 DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi); 406 mpn_submul_1 (r1, r2, n3p1, 1428); 407 mpn_submul_1 (r1, r3, n3p1, 112896); 408 mpn_divexact_by255x182712915(r1, r1, n3p1); 409 410 ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425)); 411 mpn_divexact_by42525x16(r2, r2, n3p1); 412 413 #if AORSMUL_FASTER_AORS_2AORSLSH 414 ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969)); 415 #else 416 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1)); 417 ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi)); 418 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi)); 419 #endif 420 ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900)); 421 mpn_divexact_by9x16(r3, r3, n3p1); 422 423 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1)); 424 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1)); 425 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1)); 426 427 mpn_add_n (r6, r2, r6, n3p1); 428 ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1)); 429 ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1)); 430 431 mpn_sub_n (r5, r3, r5, n3p1); 432 ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1)); 433 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1)); 434 435 mpn_add_n (r7, r1, r7, n3p1); 436 ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1)); 437 ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1)); 438 439 /* last interpolation steps... */ 440 /* ... could be mixed with recomposition 441 ||H-r7|M-r7|L-r7| ||H-r5|M-r5|L-r5| 442 */ 443 444 /***************************** recomposition *******************************/ 445 /* 446 pp[] prior to operations: 447 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp 448 449 summation scheme for remaining operations: 450 |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp 451 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp 452 ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| ||H r7|M r7|L r7| 453 */ 454 455 cy = mpn_add_n (pp + n, pp + n, r7, n); 456 cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy); 457 #if HAVE_NATIVE_mpn_add_nc 458 cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy); 459 #else 460 MPN_INCR_U (r7 + 2 * n, n + 1, cy); 461 cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n); 462 #endif 463 MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy); 464 465 pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n); 466 cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]); 467 #if HAVE_NATIVE_mpn_add_nc 468 cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy); 469 #else 470 MPN_INCR_U (r5 + 2 * n, n + 1, cy); 471 cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n); 472 #endif 473 MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy); 474 475 pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n); 476 cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]); 477 #if HAVE_NATIVE_mpn_add_nc 478 cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy); 479 #else 480 MPN_INCR_U (r3 + 2 * n, n + 1, cy); 481 cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n); 482 #endif 483 MPN_INCR_U (pp +12 * n, 2 * n + 1, cy); 484 485 pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n); 486 if ( half ) { 487 cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]); 488 #if HAVE_NATIVE_mpn_add_nc 489 if(LIKELY(spt > n)) { 490 cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy); 491 MPN_INCR_U (pp + 16 * n, spt - n, cy); 492 } else { 493 ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy)); 494 } 495 #else 496 MPN_INCR_U (r1 + 2 * n, n + 1, cy); 497 if(LIKELY(spt > n)) { 498 cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n); 499 MPN_INCR_U (pp + 16 * n, spt - n, cy); 500 } else { 501 ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt)); 502 } 503 #endif 504 } else { 505 ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n])); 506 } 507 508 #undef r0 509 #undef r2 510 #undef r4 511 #undef r6 512 } 513