1 /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
2    store the truncated integer part at rootp and the remainder at remp.
3 
4    Contributed by Paul Zimmermann (algorithm) and
5    Paul Zimmermann and Torbjorn Granlund (implementation).
6 
7    THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
8    ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
9    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10 
11 Copyright 2002, 2005, 2009-2012 Free Software Foundation, Inc.
12 
13 This file is part of the GNU MP Library.
14 
15 The GNU MP Library is free software; you can redistribute it and/or modify
16 it under the terms of either:
17 
18   * the GNU Lesser General Public License as published by the Free
19     Software Foundation; either version 3 of the License, or (at your
20     option) any later version.
21 
22 or
23 
24   * the GNU General Public License as published by the Free Software
25     Foundation; either version 2 of the License, or (at your option) any
26     later version.
27 
28 or both in parallel, as here.
29 
30 The GNU MP Library is distributed in the hope that it will be useful, but
31 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
32 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
33 for more details.
34 
35 You should have received copies of the GNU General Public License and the
36 GNU Lesser General Public License along with the GNU MP Library.  If not,
37 see https://www.gnu.org/licenses/.  */
38 
39 /* FIXME:
40      This implementation is not optimal when remp == NULL, since the complexity
41      is M(n), whereas it should be M(n/k) on average.
42 */
43 
44 #include <stdio.h>		/* for NULL */
45 
46 #include "gmp.h"
47 #include "gmp-impl.h"
48 #include "longlong.h"
49 
50 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
51 				       mp_limb_t, int);
52 
53 #define MPN_RSHIFT(cy,rp,up,un,cnt) \
54   do {									\
55     if ((cnt) != 0)							\
56       cy = mpn_rshift (rp, up, un, cnt);				\
57     else								\
58       {									\
59 	MPN_COPY_INCR (rp, up, un);					\
60 	cy = 0;								\
61       }									\
62   } while (0)
63 
64 #define MPN_LSHIFT(cy,rp,up,un,cnt) \
65   do {									\
66     if ((cnt) != 0)							\
67       cy = mpn_lshift (rp, up, un, cnt);				\
68     else								\
69       {									\
70 	MPN_COPY_DECR (rp, up, un);					\
71 	cy = 0;								\
72       }									\
73   } while (0)
74 
75 
76 /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
77    If remp <> NULL, put in {remp, un} the remainder.
78    Return the size (in limbs) of the remainder if remp <> NULL,
79 	  or a non-zero value iff the remainder is non-zero when remp = NULL.
80    Assumes:
81    (a) up[un-1] is not zero
82    (b) rootp has at least space for ceil(un/k) limbs
83    (c) remp has at least space for un limbs (in case remp <> NULL)
84    (d) the operands do not overlap.
85 
86    The auxiliary memory usage is 3*un+2 if remp = NULL,
87    and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
88 */
89 mp_size_t
mpn_rootrem(mp_ptr rootp,mp_ptr remp,mp_srcptr up,mp_size_t un,mp_limb_t k)90 mpn_rootrem (mp_ptr rootp, mp_ptr remp,
91 	     mp_srcptr up, mp_size_t un, mp_limb_t k)
92 {
93   mp_size_t m;
94   ASSERT (un > 0);
95   ASSERT (up[un - 1] != 0);
96   ASSERT (k > 1);
97 
98   m = (un - 1) / k;		/* ceil(un/k) - 1 */
99   if (remp == NULL && m > 2)
100     /* Pad {up,un} with k zero limbs.  This will produce an approximate root
101        with one more limb, allowing us to compute the exact integral result. */
102     {
103       mp_ptr sp, wp;
104       mp_size_t rn, sn, wn;
105       TMP_DECL;
106       TMP_MARK;
107       wn = un + k;
108       wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */
109       sn = m + 2; /* ceil(un/k) + 1 */
110       sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */
111       MPN_COPY (wp + k, up, un);
112       MPN_ZERO (wp, k);
113       rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
114       /* The approximate root S = {sp,sn} is either the correct root of
115 	 {sp,sn}, or 1 too large.  Thus unless the least significant limb of
116 	 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
117 	 limb.  (In case sp[0]=1, we can deduce the root, but not decide
118 	 whether it is exact or not.) */
119       MPN_COPY (rootp, sp + 1, sn - 1);
120       TMP_FREE;
121       return rn;
122     }
123   else
124     {
125       return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
126     }
127 }
128 
129 /* if approx is non-zero, does not compute the final remainder */
130 static mp_size_t
mpn_rootrem_internal(mp_ptr rootp,mp_ptr remp,mp_srcptr up,mp_size_t un,mp_limb_t k,int approx)131 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
132 		      mp_limb_t k, int approx)
133 {
134   mp_ptr qp, rp, sp, wp, scratch;
135   mp_size_t qn, rn, sn, wn, nl, bn;
136   mp_limb_t save, save2, cy;
137   unsigned long int unb; /* number of significant bits of {up,un} */
138   unsigned long int xnb; /* number of significant bits of the result */
139   unsigned long b, kk;
140   unsigned long sizes[GMP_NUMB_BITS + 1];
141   int ni, i;
142   int c;
143   int logk;
144   TMP_DECL;
145 
146   TMP_MARK;
147 
148   if (remp == NULL)
149     {
150       rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */
151       scratch = rp;			 /* used by mpn_div_q */
152     }
153   else
154     {
155       scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
156       rp = remp;
157     }
158   sp = rootp;
159 
160   MPN_SIZEINBASE_2EXP(unb, up, un, 1);
161   /* unb is the number of bits of the input U */
162 
163   xnb = (unb - 1) / k + 1;	/* ceil (unb / k) */
164   /* xnb is the number of bits of the root R */
165 
166   if (xnb == 1) /* root is 1 */
167     {
168       if (remp == NULL)
169 	remp = rp;
170       mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
171       MPN_NORMALIZE (remp, un);	/* There should be at most one zero limb,
172 				   if we demand u to be normalized  */
173       rootp[0] = 1;
174       TMP_FREE;
175       return un;
176     }
177 
178   /* We initialize the algorithm with a 1-bit approximation to zero: since we
179      know the root has exactly xnb bits, we write r0 = 2^(xnb-1), so that
180      r0^k = 2^(k*(xnb-1)), that we subtract to the input. */
181   kk = k * (xnb - 1);		/* number of truncated bits in the input */
182   rn = un - kk / GMP_NUMB_BITS; /* number of limbs of the non-truncated part */
183   MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
184   mpn_sub_1 (rp, rp, rn, 1);	/* subtract the initial approximation: since
185 				   the non-truncated part is less than 2^k, it
186 				   is <= k bits: rn <= ceil(k/GMP_NUMB_BITS) */
187   sp[0] = 1;			/* initial approximation */
188   sn = 1;			/* it has one limb */
189 
190   for (logk = 1; ((k - 1) >> logk) != 0; logk++)
191     ;
192   /* logk = ceil(log(k)/log(2)) */
193 
194   b = xnb - 1; /* number of remaining bits to determine in the kth root */
195   ni = 0;
196   while (b != 0)
197     {
198       /* invariant: here we want b+1 total bits for the kth root */
199       sizes[ni] = b;
200       /* if c is the new value of b, this means that we'll go from a root
201 	 of c+1 bits (say s') to a root of b+1 bits.
202 	 It is proved in the book "Modern Computer Arithmetic" from Brent
203 	 and Zimmermann, Chapter 1, that
204 	 if s' >= k*beta, then at most one correction is necessary.
205 	 Here beta = 2^(b-c), and s' >= 2^c, thus it suffices that
206 	 c >= ceil((b + log2(k))/2). */
207       b = (b + logk + 1) / 2;
208       if (b >= sizes[ni])
209 	b = sizes[ni] - 1;	/* add just one bit at a time */
210       ni++;
211     }
212   sizes[ni] = 0;
213   ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
214   /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
215      sizes[i] <= 2 * sizes[i+1].
216      Newton iteration will first compute sizes[ni-1] extra bits,
217      then sizes[ni-2], ..., then sizes[0] = b. */
218 
219   /* qp and wp need enough space to store S'^k where S' is an approximate
220      root. Since S' can be as large as S+2, the worst case is when S=2 and
221      S'=4. But then since we know the number of bits of S in advance, S'
222      can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
223      So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
224      fits in un limbs, the number of extra limbs needed is bounded by
225      ceil(k*log2(3/2)/GMP_NUMB_BITS). */
226 #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
227   qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
228 					of R/(k*S^(k-1)), and S^k */
229   wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
230 					and temporary for mpn_pow_1 */
231 
232   wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
233   wn = 1;
234   for (i = ni; i != 0; i--)
235     {
236       /* 1: loop invariant:
237 	 {sp, sn} is the current approximation of the root, which has
238 		  exactly 1 + sizes[ni] bits.
239 	 {rp, rn} is the current remainder
240 	 {wp, wn} = {sp, sn}^(k-1)
241 	 kk = number of truncated bits of the input
242       */
243       b = sizes[i - 1] - sizes[i]; /* number of bits to compute in that
244 				      iteration */
245 
246       /* Reinsert a low zero limb if we normalized away the entire remainder */
247       if (rn == 0)
248 	{
249 	  rp[0] = 0;
250 	  rn = 1;
251 	}
252 
253       /* first multiply the remainder by 2^b */
254       MPN_LSHIFT (cy, rp + b / GMP_NUMB_BITS, rp, rn, b % GMP_NUMB_BITS);
255       rn = rn + b / GMP_NUMB_BITS;
256       if (cy != 0)
257 	{
258 	  rp[rn] = cy;
259 	  rn++;
260 	}
261 
262       kk = kk - b;
263 
264       /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
265 
266       /* Now insert bits [kk,kk+b-1] from the input U */
267       bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[] */
268       save = rp[bn];
269       /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
270       nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
271       /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
272 		 - floor(kk / GMP_NUMB_BITS)
273 	     <= 1 + (kk + b - 1) / GMP_NUMB_BITS
274 		  - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
275 	     = 2 + (b - 2) / GMP_NUMB_BITS
276 	 thus since nl is an integer:
277 	 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
278       /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
279       if (nl - 1 > bn)
280 	save2 = rp[bn + 1];
281       MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
282       /* set to zero high bits of rp[bn] */
283       rp[bn] &= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS)) - 1;
284       /* restore corresponding bits */
285       rp[bn] |= save;
286       if (nl - 1 > bn)
287 	rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
288 			       they start by bit 0 in rp[0], so they use
289 			       at most ceil(b/GMP_NUMB_BITS) limbs */
290 
291       /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
292 
293       /* compute {wp, wn} = k * {sp, sn}^(k-1) */
294       cy = mpn_mul_1 (wp, wp, wn, k);
295       wp[wn] = cy;
296       wn += cy != 0;
297 
298       /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
299 
300       /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
301       if (rn < wn)
302 	{
303 	  qn = 0;
304 	}
305       else
306 	{
307 	  qn = rn - wn; /* expected quotient size */
308 	  mpn_div_q (qp, rp, rn, wp, wn, scratch);
309 	  qn += qp[qn] != 0;
310 	}
311 
312       /* 5: current buffers: {sp,sn}, {qp,qn}.
313 	 Note: {rp,rn} is not needed any more since we'll compute it from
314 	 scratch at the end of the loop.
315        */
316 
317       /* Number of limbs used by b bits, when least significant bit is
318 	 aligned to least limb */
319       bn = (b - 1) / GMP_NUMB_BITS + 1;
320 
321       /* the quotient should be smaller than 2^b, since the previous
322 	 approximation was correctly rounded toward zero */
323       if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
324 		      qp[qn - 1] >= ((mp_limb_t) 1 << (b % GMP_NUMB_BITS))))
325 	{
326 	  qn = b / GMP_NUMB_BITS + 1; /* b+1 bits */
327 	  MPN_ZERO (qp, qn);
328 	  qp[qn - 1] = (mp_limb_t) 1 << (b % GMP_NUMB_BITS);
329 	  MPN_DECR_U (qp, qn, 1);
330 	  qn -= qp[qn - 1] == 0;
331 	}
332 
333       /* 6: current buffers: {sp,sn}, {qp,qn} */
334 
335       /* multiply the root approximation by 2^b */
336       MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
337       sn = sn + b / GMP_NUMB_BITS;
338       if (cy != 0)
339 	{
340 	  sp[sn] = cy;
341 	  sn++;
342 	}
343 
344       /* 7: current buffers: {sp,sn}, {qp,qn} */
345 
346       ASSERT_ALWAYS (bn >= qn); /* this is ok since in the case qn > bn
347 				   above, q is set to 2^b-1, which has
348 				   exactly bn limbs */
349 
350       /* Combine sB and q to form sB + q.  */
351       save = sp[b / GMP_NUMB_BITS];
352       MPN_COPY (sp, qp, qn);
353       MPN_ZERO (sp + qn, bn - qn);
354       sp[b / GMP_NUMB_BITS] |= save;
355 
356       /* 8: current buffer: {sp,sn} */
357 
358       /* Since each iteration treats b bits from the root and thus k*b bits
359 	 from the input, and we already considered b bits from the input,
360 	 we now have to take another (k-1)*b bits from the input. */
361       kk -= (k - 1) * b; /* remaining input bits */
362       /* {rp, rn} = floor({up, un} / 2^kk) */
363       MPN_RSHIFT (cy, rp, up + kk / GMP_NUMB_BITS, un - kk / GMP_NUMB_BITS, kk % GMP_NUMB_BITS);
364       rn = un - kk / GMP_NUMB_BITS;
365       rn -= rp[rn - 1] == 0;
366 
367       /* 9: current buffers: {sp,sn}, {rp,rn} */
368 
369      for (c = 0;; c++)
370 	{
371 	  /* Compute S^k in {qp,qn}. */
372 	  if (i == 1)
373 	    {
374 	      /* Last iteration: we don't need W anymore. */
375 	      /* mpn_pow_1 requires that both qp and wp have enough space to
376 		 store the result {sp,sn}^k + 1 limb */
377 	      approx = approx && (sp[0] > 1);
378 	      qn = (approx == 0) ? mpn_pow_1 (qp, sp, sn, k, wp) : 0;
379 	    }
380 	  else
381 	    {
382 	      /* W <- S^(k-1) for the next iteration,
383 		 and S^k = W * S. */
384 	      wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
385 	      mpn_mul (qp, wp, wn, sp, sn);
386 	      qn = wn + sn;
387 	      qn -= qp[qn - 1] == 0;
388 	    }
389 
390 	  /* if S^k > floor(U/2^kk), the root approximation was too large */
391 	  if (qn > rn || (qn == rn && mpn_cmp (qp, rp, rn) > 0))
392 	    MPN_DECR_U (sp, sn, 1);
393 	  else
394 	    break;
395 	}
396 
397       /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
398 
399       ASSERT_ALWAYS (c <= 1);
400       ASSERT_ALWAYS (rn >= qn);
401 
402       /* R = R - Q = floor(U/2^kk) - S^k */
403       if (i > 1 || approx == 0)
404 	{
405 	  mpn_sub (rp, rp, rn, qp, qn);
406 	  MPN_NORMALIZE (rp, rn);
407 	}
408       /* otherwise we have rn > 0, thus the return value is ok */
409 
410       /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
411     }
412 
413   TMP_FREE;
414   return rn;
415 }
416