1 /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
2 store the truncated integer part at rootp and the remainder at remp.
3
4 Contributed by Paul Zimmermann (algorithm) and
5 Paul Zimmermann and Torbjorn Granlund (implementation).
6
7 THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES. IT'S
8 ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST
9 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10
11 Copyright 2002, 2005, 2009-2012 Free Software Foundation, Inc.
12
13 This file is part of the GNU MP Library.
14
15 The GNU MP Library is free software; you can redistribute it and/or modify
16 it under the terms of either:
17
18 * the GNU Lesser General Public License as published by the Free
19 Software Foundation; either version 3 of the License, or (at your
20 option) any later version.
21
22 or
23
24 * the GNU General Public License as published by the Free Software
25 Foundation; either version 2 of the License, or (at your option) any
26 later version.
27
28 or both in parallel, as here.
29
30 The GNU MP Library is distributed in the hope that it will be useful, but
31 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
33 for more details.
34
35 You should have received copies of the GNU General Public License and the
36 GNU Lesser General Public License along with the GNU MP Library. If not,
37 see https://www.gnu.org/licenses/. */
38
39 /* FIXME:
40 This implementation is not optimal when remp == NULL, since the complexity
41 is M(n), whereas it should be M(n/k) on average.
42 */
43
44 #include <stdio.h> /* for NULL */
45
46 #include "gmp.h"
47 #include "gmp-impl.h"
48 #include "longlong.h"
49
50 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
51 mp_limb_t, int);
52
53 #define MPN_RSHIFT(cy,rp,up,un,cnt) \
54 do { \
55 if ((cnt) != 0) \
56 cy = mpn_rshift (rp, up, un, cnt); \
57 else \
58 { \
59 MPN_COPY_INCR (rp, up, un); \
60 cy = 0; \
61 } \
62 } while (0)
63
64 #define MPN_LSHIFT(cy,rp,up,un,cnt) \
65 do { \
66 if ((cnt) != 0) \
67 cy = mpn_lshift (rp, up, un, cnt); \
68 else \
69 { \
70 MPN_COPY_DECR (rp, up, un); \
71 cy = 0; \
72 } \
73 } while (0)
74
75
76 /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
77 If remp <> NULL, put in {remp, un} the remainder.
78 Return the size (in limbs) of the remainder if remp <> NULL,
79 or a non-zero value iff the remainder is non-zero when remp = NULL.
80 Assumes:
81 (a) up[un-1] is not zero
82 (b) rootp has at least space for ceil(un/k) limbs
83 (c) remp has at least space for un limbs (in case remp <> NULL)
84 (d) the operands do not overlap.
85
86 The auxiliary memory usage is 3*un+2 if remp = NULL,
87 and 2*un+2 if remp <> NULL. FIXME: This is an incorrect comment.
88 */
89 mp_size_t
mpn_rootrem(mp_ptr rootp,mp_ptr remp,mp_srcptr up,mp_size_t un,mp_limb_t k)90 mpn_rootrem (mp_ptr rootp, mp_ptr remp,
91 mp_srcptr up, mp_size_t un, mp_limb_t k)
92 {
93 mp_size_t m;
94 ASSERT (un > 0);
95 ASSERT (up[un - 1] != 0);
96 ASSERT (k > 1);
97
98 m = (un - 1) / k; /* ceil(un/k) - 1 */
99 if (remp == NULL && m > 2)
100 /* Pad {up,un} with k zero limbs. This will produce an approximate root
101 with one more limb, allowing us to compute the exact integral result. */
102 {
103 mp_ptr sp, wp;
104 mp_size_t rn, sn, wn;
105 TMP_DECL;
106 TMP_MARK;
107 wn = un + k;
108 wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */
109 sn = m + 2; /* ceil(un/k) + 1 */
110 sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */
111 MPN_COPY (wp + k, up, un);
112 MPN_ZERO (wp, k);
113 rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
114 /* The approximate root S = {sp,sn} is either the correct root of
115 {sp,sn}, or 1 too large. Thus unless the least significant limb of
116 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
117 limb. (In case sp[0]=1, we can deduce the root, but not decide
118 whether it is exact or not.) */
119 MPN_COPY (rootp, sp + 1, sn - 1);
120 TMP_FREE;
121 return rn;
122 }
123 else
124 {
125 return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
126 }
127 }
128
129 /* if approx is non-zero, does not compute the final remainder */
130 static mp_size_t
mpn_rootrem_internal(mp_ptr rootp,mp_ptr remp,mp_srcptr up,mp_size_t un,mp_limb_t k,int approx)131 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
132 mp_limb_t k, int approx)
133 {
134 mp_ptr qp, rp, sp, wp, scratch;
135 mp_size_t qn, rn, sn, wn, nl, bn;
136 mp_limb_t save, save2, cy;
137 unsigned long int unb; /* number of significant bits of {up,un} */
138 unsigned long int xnb; /* number of significant bits of the result */
139 unsigned long b, kk;
140 unsigned long sizes[GMP_NUMB_BITS + 1];
141 int ni, i;
142 int c;
143 int logk;
144 TMP_DECL;
145
146 TMP_MARK;
147
148 if (remp == NULL)
149 {
150 rp = TMP_ALLOC_LIMBS (un + 1); /* will contain the remainder */
151 scratch = rp; /* used by mpn_div_q */
152 }
153 else
154 {
155 scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
156 rp = remp;
157 }
158 sp = rootp;
159
160 MPN_SIZEINBASE_2EXP(unb, up, un, 1);
161 /* unb is the number of bits of the input U */
162
163 xnb = (unb - 1) / k + 1; /* ceil (unb / k) */
164 /* xnb is the number of bits of the root R */
165
166 if (xnb == 1) /* root is 1 */
167 {
168 if (remp == NULL)
169 remp = rp;
170 mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
171 MPN_NORMALIZE (remp, un); /* There should be at most one zero limb,
172 if we demand u to be normalized */
173 rootp[0] = 1;
174 TMP_FREE;
175 return un;
176 }
177
178 /* We initialize the algorithm with a 1-bit approximation to zero: since we
179 know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that
180 r0^k = 2^(k*(xnb-1)), that we subtract to the input. */
181 kk = k * (xnb - 1); /* number of truncated bits in the input */
182 rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */
183 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
184 mpn_sub_1 (rp, rp, rn, 1); /* subtract the initial approximation: since
185 the non-truncated part is less than 2^k, it
186 is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
187 sp[0] = 1; /* initial approximation */
188 sn = 1; /* it has one limb */
189
190 for (logk = 1; ((k - 1) >> logk) != 0; logk++)
191 ;
192 /* logk = ceil(log(k)/log(2)) */
193
194 b = xnb - 1; /* number of remaining bits to determine in the kth root */
195 ni = 0;
196 while (b != 0)
197 {
198 /* invariant: here we want b+1 total bits for the kth root */
199 sizes[ni] = b;
200 /* if c is the new value of b, this means that we'll go from a root
201 of c+1 bits (say s') to a root of b+1 bits.
202 It is proved in the book "Modern Computer Arithmetic" from Brent
203 and Zimmermann, Chapter 1, that
204 if s' >= k*beta, then at most one correction is necessary.
205 Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that
206 c >= ceil((b + log2(k))/2). */
207 b = (b + logk + 1) / 2;
208 if (b >= sizes[ni])
209 b = sizes[ni] - 1; /* add just one bit at a time */
210 ni++;
211 }
212 sizes[ni] = 0;
213 ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
214 /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
215 sizes[i] <= 2 * sizes[i+1].
216 Newton iteration will first compute sizes[ni-1] extra bits,
217 then sizes[ni-2], ..., then sizes[0] = b. */
218
219 /* qp and wp need enough space to store S'^k where S' is an approximate
220 root. Since S' can be as large as S+2, the worst case is when S=2 and
221 S'=4. But then since we know the number of bits of S in advance, S'
222 can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
223 So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
224 fits in un limbs, the number of extra limbs needed is bounded by
225 ceil(k*log2(3/2)/GMP_NUMB_BITS). */
226 #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
227 qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
228 of R/(k*S^(k-1)), and S^k */
229 wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
230 and temporary for mpn_pow_1 */
231
232 wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
233 wn = 1;
234 for (i = ni; i != 0; i--)
235 {
236 /* 1: loop invariant:
237 {sp, sn} is the current approximation of the root, which has
238 exactly 1 + sizes[ni] bits.
239 {rp, rn} is the current remainder
240 {wp, wn} = {sp, sn}^(k-1)
241 kk = number of truncated bits of the input
242 */
243 b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that
244 iteration */
245
246 /* Reinsert a low zero limb if we normalized away the entire remainder */
247 if (rn == 0)
248 {
249 rp[0] = 0;
250 rn = 1;
251 }
252
253 /* first multiply the remainder by 2^b */
254 MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS);
255 rn = rn + b / GMP_NUMB_BITS;
256 if (cy != 0)
257 {
258 rp[rn] = cy;
259 rn++;
260 }
261
262 kk = kk - b;
263
264 /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
265
266 /* Now insert bits [kk,kk+b-1] from the input U */
267 bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */
268 save = rp[bn];
269 /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
270 nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
271 /* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
272 - floor(kk / GMP_NUMB_BITS)
273 <= 1 + (kk + b - 1) / GMP_NUMB_BITS
274 - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
275 = 2 + (b - 2) / GMP_NUMB_BITS
276 thus since nl is an integer:
277 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
278 /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
279 if (nl - 1 > bn)
280 save2 = rp[bn + 1];
281 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
282 /* set to zero high bits of rp[bn] */
283 rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1;
284 /* restore corresponding bits */
285 rp[bn] |= save;
286 if (nl - 1 > bn)
287 rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
288 they start by bit 0 in rp[0], so they use
289 at most ceil(b/GMP_NUMB_BITS) limbs */
290
291 /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
292
293 /* compute {wp, wn} = k * {sp, sn}^(k-1) */
294 cy = mpn_mul_1 (wp, wp, wn, k);
295 wp[wn] = cy;
296 wn += cy != 0;
297
298 /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
299
300 /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
301 if (rn < wn)
302 {
303 qn = 0;
304 }
305 else
306 {
307 qn = rn - wn; /* expected quotient size */
308 mpn_div_q (qp, rp, rn, wp, wn, scratch);
309 qn += qp[qn] != 0;
310 }
311
312 /* 5: current buffers: {sp,sn}, {qp,qn}.
313 Note: {rp,rn} is not needed any more since we'll compute it from
314 scratch at the end of the loop.
315 */
316
317 /* Number of limbs used by b bits, when least significant bit is
318 aligned to least limb */
319 bn = (b - 1) / GMP_NUMB_BITS + 1;
320
321 /* the quotient should be smaller than 2^b, since the previous
322 approximation was correctly rounded toward zero */
323 if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
324 qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS))))
325 {
326 qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */
327 MPN_ZERO (qp, qn);
328 qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS);
329 MPN_DECR_U (qp, qn, 1);
330 qn -= qp[qn - 1] == 0;
331 }
332
333 /* 6: current buffers: {sp,sn}, {qp,qn} */
334
335 /* multiply the root approximation by 2^b */
336 MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
337 sn = sn + b / GMP_NUMB_BITS;
338 if (cy != 0)
339 {
340 sp[sn] = cy;
341 sn++;
342 }
343
344 /* 7: current buffers: {sp,sn}, {qp,qn} */
345
346 ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn
347 above, q is set to 2^b-1, which has
348 exactly bn limbs */
349
350 /* Combine sB and q to form sB + q. */
351 save = sp[b / GMP_NUMB_BITS];
352 MPN_COPY (sp, qp, qn);
353 MPN_ZERO (sp + qn, bn - qn);
354 sp[b / GMP_NUMB_BITS] |= save;
355
356 /* 8: current buffer: {sp,sn} */
357
358 /* Since each iteration treats b bits from the root and thus k*b bits
359 from the input, and we already considered b bits from the input,
360 we now have to take another (k-1)*b bits from the input. */
361 kk -= (k - 1) * b; /* remaining input bits */
362 /* {rp, rn} = floor({up, un} / 2^kk) */
363 MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
364 rn = un - kk / GMP_NUMB_BITS;
365 rn -= rp[rn - 1] == 0;
366
367 /* 9: current buffers: {sp,sn}, {rp,rn} */
368
369 for (c = 0;; c++)
370 {
371 /* Compute S^k in {qp,qn}. */
372 if (i == 1)
373 {
374 /* Last iteration: we don't need W anymore. */
375 /* mpn_pow_1 requires that both qp and wp have enough space to
376 store the result {sp,sn}^k + 1 limb */
377 approx = approx && (sp[0] > 1);
378 qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0;
379 }
380 else
381 {
382 /* W <- S^(k-1) for the next iteration,
383 and S^k = W * S. */
384 wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
385 mpn_mul (qp, wp, wn, sp, sn);
386 qn = wn + sn;
387 qn -= qp[qn - 1] == 0;
388 }
389
390 /* if S^k > floor(U/2^kk), the root approximation was too large */
391 if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0))
392 MPN_DECR_U (sp, sn, 1);
393 else
394 break;
395 }
396
397 /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
398
399 ASSERT_ALWAYS (c <= 1);
400 ASSERT_ALWAYS (rn >= qn);
401
402 /* R = R - Q = floor(U/2^kk) - S^k */
403 if (i > 1 || approx == 0)
404 {
405 mpn_sub (rp, rp, rn, qp, qn);
406 MPN_NORMALIZE (rp, rn);
407 }
408 /* otherwise we have rn > 0, thus the return value is ok */
409
410 /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
411 }
412
413 TMP_FREE;
414 return rn;
415 }
416