xref: /dragonfly/contrib/gmp/mpn/generic/binvert.c (revision bcb3e04d)
1 /* Compute {up,n}^(-1) mod 2(n*GMP_NUMB_BITS).
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
6    ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
7    ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
8    RELEASE.
9 
10 Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
18 
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
22 License for more details.
23 
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
26 
27 #include "gmp.h"
28 #include "gmp-impl.h"
29 
30 
31 /*
32   r[k+1] = r[k] - r[k] * (u*r[k] - 1)
33   r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
34 */
35 
36 /* This is intended for constant THRESHOLDs only, where the compiler can
37    completely fold the result.  */
38 #define LOG2C(n) \
39  (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
40   ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
41   ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
42   ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
43 
44 #if TUNE_PROGRAM_BUILD
45 #define NPOWS \
46  ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
47 #else
48 #define NPOWS \
49  ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (BINV_NEWTON_THRESHOLD))
50 #endif
51 
52 mp_size_t
53 mpn_binvert_itch (mp_size_t n)
54 {
55 #if WANT_FFT
56   if (ABOVE_THRESHOLD (n, 2 * MUL_FFT_MODF_THRESHOLD))
57     return mpn_fft_next_size (n, mpn_fft_best_k (n, 0));
58   else
59 #endif
60     return 3 * (n - (n >> 1));
61 }
62 
63 void
64 mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
65 {
66   mp_ptr xp;
67   mp_size_t rn, newrn;
68   mp_size_t sizes[NPOWS], *sizp;
69   mp_limb_t di;
70 
71   /* Compute the computation precisions from highest to lowest, leaving the
72      base case size in 'rn'.  */
73   sizp = sizes;
74   for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1)
75     *sizp++ = rn;
76 
77   xp = scratch;
78 
79   /* Compute a base value using a low-overhead O(n^2) algorithm.  FIXME: We
80      should call some divide-and-conquer lsb division function here for an
81      operand subrange.  */
82   MPN_ZERO (xp, rn);
83   xp[0] = 1;
84   binvert_limb (di, up[0]);
85   if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
86     mpn_sb_bdiv_q (rp, xp, rn, up, rn, -di);
87   else
88     mpn_dc_bdiv_q (rp, xp, rn, up, rn, -di);
89 
90   /* Use Newton iterations to get the desired precision.  */
91   for (; rn < n; rn = newrn)
92     {
93       newrn = *--sizp;
94 
95 #if WANT_FFT
96       if (ABOVE_THRESHOLD (newrn, 2 * MUL_FFT_MODF_THRESHOLD))
97 	{
98 	  int k;
99 	  mp_size_t m, i;
100 
101 	  k = mpn_fft_best_k (newrn, 0);
102 	  m = mpn_fft_next_size (newrn, k);
103 	  mpn_mul_fft (xp, m, up, newrn, rp, rn, k);
104 	  for (i = rn - 1; i >= 0; i--)
105 	    if (xp[i] > (i == 0))
106 	      {
107 		mpn_add_1 (xp + rn, xp + rn, newrn - rn, 1);
108 		break;
109 	      }
110 	}
111       else
112 #endif
113 	mpn_mul (xp, up, newrn, rp, rn);
114       mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn);
115       mpn_neg_n (rp + rn, rp + rn, newrn - rn);
116     }
117 }
118