1 /* mpn_broot -- Compute hensel sqrt
2
3 Contributed to the GNU project by Niels Möller
4
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
8
9 Copyright 2012 Free Software Foundation, Inc.
10
11 This file is part of the GNU MP Library.
12
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
19
20 or
21
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
25
26 or both in parallel, as here.
27
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
32
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
36
37 #include "gmp-impl.h"
38
39 /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
40 typical use will have e small. */
41 static mp_limb_t
powlimb(mp_limb_t a,mp_limb_t e)42 powlimb (mp_limb_t a, mp_limb_t e)
43 {
44 mp_limb_t r = 1;
45 mp_limb_t s = a;
46
47 for (r = 1, s = a; e > 0; e >>= 1, s *= s)
48 if (e & 1)
49 r *= s;
50
51 return r;
52 }
53
54 /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
55
56 Iterates
57
58 r' <-- r - r * (a^{k-1} r^k - 1) / n
59
60 If
61
62 a^{k-1} r^k = 1 (mod 2^m),
63
64 then
65
66 a^{k-1} r'^k = 1 (mod 2^{2m}),
67
68 Compute the update term as
69
70 r' = r - (a^{k-1} r^{k+1} - r) / k
71
72 where we still have cancellation of low limbs.
73
74 */
75 void
mpn_broot_invm1(mp_ptr rp,mp_srcptr ap,mp_size_t n,mp_limb_t k)76 mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
77 {
78 mp_size_t sizes[GMP_LIMB_BITS * 2];
79 mp_ptr akm1, tp, rnp, ep;
80 mp_limb_t a0, r0, km1, kp1h, kinv;
81 mp_size_t rn;
82 unsigned i;
83
84 TMP_DECL;
85
86 ASSERT (n > 0);
87 ASSERT (ap[0] & 1);
88 ASSERT (k & 1);
89 ASSERT (k >= 3);
90
91 TMP_MARK;
92
93 akm1 = TMP_ALLOC_LIMBS (4*n);
94 tp = akm1 + n;
95
96 km1 = k-1;
97 /* FIXME: Could arrange the iteration so we don't need to compute
98 this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
99 that we can use wraparound also for a*r, since the low half is
100 unchanged from the previous iteration. Or possibly mulmid. Also,
101 a r = a^{1/k}, so we get that value too, for free? */
102 mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
103
104 a0 = ap[0];
105 binvert_limb (kinv, k);
106
107 /* 4 bits: a^{1/k - 1} (mod 16):
108
109 a % 8
110 1 3 5 7
111 k%4 +-------
112 1 |1 1 1 1
113 3 |1 9 9 1
114 */
115 r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
116 r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
117 r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
118 r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
119 #if GMP_NUMB_BITS > 32
120 {
121 unsigned prec = 32;
122 do
123 {
124 r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
125 prec *= 2;
126 }
127 while (prec < GMP_NUMB_BITS);
128 }
129 #endif
130
131 rp[0] = r0;
132 if (n == 1)
133 {
134 TMP_FREE;
135 return;
136 }
137
138 /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
139 kp1h = k/2 + 1;
140
141 /* FIXME: Special case for two limb iteration. */
142 rnp = TMP_ALLOC_LIMBS (2*n + 1);
143 ep = rnp + n;
144
145 /* FIXME: Possible to this on the fly with some bit fiddling. */
146 for (i = 0; n > 1; n = (n + 1)/2)
147 sizes[i++] = n;
148
149 rn = 1;
150
151 while (i-- > 0)
152 {
153 /* Compute x^{k+1}. */
154 mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
155 final iteration. */
156 mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
157
158 /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
159
160 mpn_mullo_n (ep, rnp, akm1, sizes[i]);
161 ASSERT (mpn_cmp (ep, rp, rn) == 0);
162
163 ASSERT (sizes[i] <= 2*rn);
164 mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
165 mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
166 rn = sizes[i];
167 }
168 TMP_FREE;
169 }
170
171 /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
172 void
mpn_broot(mp_ptr rp,mp_srcptr ap,mp_size_t n,mp_limb_t k)173 mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
174 {
175 mp_ptr tp;
176 TMP_DECL;
177
178 ASSERT (n > 0);
179 ASSERT (ap[0] & 1);
180 ASSERT (k & 1);
181
182 if (k == 1)
183 {
184 MPN_COPY (rp, ap, n);
185 return;
186 }
187
188 TMP_MARK;
189 tp = TMP_ALLOC_LIMBS (n);
190
191 mpn_broot_invm1 (tp, ap, n, k);
192 mpn_mullo_n (rp, tp, ap, n);
193
194 TMP_FREE;
195 }
196