1 /* Compute {up,n}^(-1) mod 2(n*GMP_NUMB_BITS). 2 3 Contributed to the GNU project by Torbjorn Granlund. 4 5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE. IT IS 6 ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS 7 ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP 8 RELEASE. 9 10 Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of the GNU Lesser General Public License as published by 16 the Free Software Foundation; either version 3 of the License, or (at your 17 option) any later version. 18 19 The GNU MP Library is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 22 License for more details. 23 24 You should have received a copy of the GNU Lesser General Public License 25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 30 31 /* 32 r[k+1] = r[k] - r[k] * (u*r[k] - 1) 33 r[k+1] = r[k] + r[k] - r[k]*(u*r[k]) 34 */ 35 36 /* This is intended for constant THRESHOLDs only, where the compiler can 37 completely fold the result. */ 38 #define LOG2C(n) \ 39 (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \ 40 ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \ 41 ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \ 42 ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000)) 43 44 #if TUNE_PROGRAM_BUILD 45 #define NPOWS \ 46 ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t))) 47 #else 48 #define NPOWS \ 49 ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD)) 50 #endif 51 52 mp_size_t 53 mpn_binvert_itch (mp_size_t n) 54 { 55 #if WANT_FFT 56 if (ABOVE_THRESHOLD (n, 2 * MUL_FFT_MODF_THRESHOLD)) 57 return mpn_fft_next_size (n, mpn_fft_best_k (n, 0)); 58 else 59 #endif 60 return 3 * (n - (n >> 1)); 61 } 62 63 void 64 mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 65 { 66 mp_ptr xp; 67 mp_size_t rn, newrn; 68 mp_size_t sizes[NPOWS], *sizp; 69 mp_limb_t di; 70 71 /* Compute the computation precisions from highest to lowest, leaving the 72 base case size in 'rn'. */ 73 sizp = sizes; 74 for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1) 75 *sizp++ = rn; 76 77 xp = scratch; 78 79 /* Compute a base value using a low-overhead O(n^2) algorithm. FIXME: We 80 should call some divide-and-conquer lsb division function here for an 81 operand subrange. */ 82 MPN_ZERO (xp, rn); 83 xp[0] = 1; 84 binvert_limb (di, up[0]); 85 if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD)) 86 mpn_sb_bdiv_q (rp, xp, rn, up, rn, -di); 87 else 88 mpn_dc_bdiv_q (rp, xp, rn, up, rn, -di); 89 90 /* Use Newton iterations to get the desired precision. */ 91 for (; rn < n; rn = newrn) 92 { 93 newrn = *--sizp; 94 95 #if WANT_FFT 96 if (ABOVE_THRESHOLD (newrn, 2 * MUL_FFT_MODF_THRESHOLD)) 97 { 98 int k; 99 mp_size_t m, i; 100 101 k = mpn_fft_best_k (newrn, 0); 102 m = mpn_fft_next_size (newrn, k); 103 mpn_mul_fft (xp, m, up, newrn, rp, rn, k); 104 for (i = rn - 1; i >= 0; i--) 105 if (xp[i] > (i == 0)) 106 { 107 mpn_add_1 (xp + rn, xp + rn, newrn - rn, 1); 108 break; 109 } 110 } 111 else 112 #endif 113 mpn_mul (xp, up, newrn, rp, rn); 114 mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn); 115 mpn_neg_n (rp + rn, rp + rn, newrn - rn); 116 } 117 } 118