1 /* mpn_perfect_power_p -- mpn perfect power detection.
2 
3    Contributed to the GNU project by Martin Boij.
4 
5 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
6 
7 This file is part of the GNU MP Library.
8 
9 The GNU MP Library is free software; you can redistribute it and/or modify
10 it under the terms of either:
11 
12   * the GNU Lesser General Public License as published by the Free
13     Software Foundation; either version 3 of the License, or (at your
14     option) any later version.
15 
16 or
17 
18   * the GNU General Public License as published by the Free Software
19     Foundation; either version 2 of the License, or (at your option) any
20     later version.
21 
22 or both in parallel, as here.
23 
24 The GNU MP Library is distributed in the hope that it will be useful, but
25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27 for more details.
28 
29 You should have received copies of the GNU General Public License and the
30 GNU Lesser General Public License along with the GNU MP Library.  If not,
31 see https://www.gnu.org/licenses/.  */
32 
33 #include "gmp.h"
34 #include "gmp-impl.h"
35 #include "longlong.h"
36 
37 #define SMALL 20
38 #define MEDIUM 100
39 
40 /* Return non-zero if {np,nn} == {xp,xn} ^ k.
41    Algorithm:
42        For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of
43        {xp,xn}^k. Stop if they don't match the s least significant limbs of
44        {np,nn}.
45 
46    FIXME: Low xn limbs can be expected to always match, if computed as a mod
47    B^{xn} root. So instead of using mpn_powlo, compute an approximation of the
48    most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and
49    compare to {np, nn}. Or use an even cruder approximation based on fix-point
50    base 2 logarithm.  */
51 static int
pow_equals(mp_srcptr np,mp_size_t n,mp_srcptr xp,mp_size_t xn,mp_limb_t k,mp_bitcnt_t f,mp_ptr tp)52 pow_equals (mp_srcptr np, mp_size_t n,
53 	    mp_srcptr xp,mp_size_t xn,
54 	    mp_limb_t k, mp_bitcnt_t f,
55 	    mp_ptr tp)
56 {
57   mp_limb_t *tp2;
58   mp_bitcnt_t y, z;
59   mp_size_t i, bn;
60   int ans;
61   mp_limb_t h, l;
62   TMP_DECL;
63 
64   ASSERT (n > 1 || (n == 1 && np[0] > 1));
65   ASSERT (np[n - 1] > 0);
66   ASSERT (xn > 0);
67 
68   if (xn == 1 && xp[0] == 1)
69     return 0;
70 
71   z = 1 + (n >> 1);
72   for (bn = 1; bn < z; bn <<= 1)
73     {
74       mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
75       if (mpn_cmp (tp, np, bn) != 0)
76 	return 0;
77     }
78 
79   TMP_MARK;
80 
81   /* Final check. Estimate the size of {xp,xn}^k before computing the power
82      with full precision.  Optimization: It might pay off to make a more
83      accurate estimation of the logarithm of {xp,xn}, rather than using the
84      index of the MSB.  */
85 
86   MPN_SIZEINBASE_2EXP(y, xp, xn, 1);
87   y -= 1;  /* msb_index (xp, xn) */
88 
89   umul_ppmm (h, l, k, y);
90   h -= l == 0;  l--;	/* two-limb decrement */
91 
92   z = f - 1; /* msb_index (np, n) */
93   if (h == 0 && l <= z)
94     {
95       mp_limb_t size;
96       size = l + k;
97       ASSERT_ALWAYS (size >= k);
98 
99       y = 2 + size / GMP_LIMB_BITS;
100       tp2 = TMP_ALLOC_LIMBS (y);
101 
102       i = mpn_pow_1 (tp, xp, xn, k, tp2);
103       if (i == n && mpn_cmp (tp, np, n) == 0)
104 	ans = 1;
105       else
106 	ans = 0;
107     }
108   else
109     {
110       ans = 0;
111     }
112 
113   TMP_FREE;
114   return ans;
115 }
116 
117 
118 /* Return non-zero if N = {np,n} is a kth power.
119    I = {ip,n} = N^(-1) mod B^n.  */
120 static int
is_kth_power(mp_ptr rp,mp_srcptr np,mp_limb_t k,mp_srcptr ip,mp_size_t n,mp_bitcnt_t f,mp_ptr tp)121 is_kth_power (mp_ptr rp, mp_srcptr np,
122 	      mp_limb_t k, mp_srcptr ip,
123 	      mp_size_t n, mp_bitcnt_t f,
124 	      mp_ptr tp)
125 {
126   mp_bitcnt_t b;
127   mp_size_t rn, xn;
128 
129   ASSERT (n > 0);
130   ASSERT ((k & 1) != 0 || k == 2);
131   ASSERT ((np[0] & 1) != 0);
132 
133   if (k == 2)
134     {
135       b = (f + 1) >> 1;
136       rn = 1 + b / GMP_LIMB_BITS;
137       if (mpn_bsqrtinv (rp, ip, b, tp) != 0)
138 	{
139 	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
140 	  xn = rn;
141 	  MPN_NORMALIZE (rp, xn);
142 	  if (pow_equals (np, n, rp, xn, k, f, tp) != 0)
143 	    return 1;
144 
145 	  /* Check if (2^b - r)^2 == n */
146 	  mpn_neg (rp, rp, rn);
147 	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
148 	  MPN_NORMALIZE (rp, rn);
149 	  if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
150 	    return 1;
151 	}
152     }
153   else
154     {
155       b = 1 + (f - 1) / k;
156       rn = 1 + (b - 1) / GMP_LIMB_BITS;
157       mpn_brootinv (rp, ip, rn, k, tp);
158       if ((b % GMP_LIMB_BITS) != 0)
159 	rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
160       MPN_NORMALIZE (rp, rn);
161       if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
162 	return 1;
163     }
164   MPN_ZERO (rp, rn); /* Untrash rp */
165   return 0;
166 }
167 
168 static int
perfpow(mp_srcptr np,mp_size_t n,mp_limb_t ub,mp_limb_t g,mp_bitcnt_t f,int neg)169 perfpow (mp_srcptr np, mp_size_t n,
170 	 mp_limb_t ub, mp_limb_t g,
171 	 mp_bitcnt_t f, int neg)
172 {
173   mp_ptr ip, tp, rp;
174   mp_limb_t k;
175   int ans;
176   mp_bitcnt_t b;
177   gmp_primesieve_t ps;
178   TMP_DECL;
179 
180   ASSERT (n > 0);
181   ASSERT ((np[0] & 1) != 0);
182   ASSERT (ub > 0);
183 
184   TMP_MARK;
185   gmp_init_primesieve (&ps);
186   b = (f + 3) >> 1;
187 
188   ip = TMP_ALLOC_LIMBS (n);
189   rp = TMP_ALLOC_LIMBS (n);
190   tp = TMP_ALLOC_LIMBS (5 * n);		/* FIXME */
191   MPN_ZERO (rp, n);
192 
193   /* FIXME: It seems the inverse in ninv is needed only to get non-inverted
194      roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and
195      similarly for nth roots. It should be more efficient to compute n^{1/2} as
196      n * n^{-1/2}, with a mullo instead of a binvert. And we can do something
197      similar for kth roots if we switch to an iteration converging to n^{1/k -
198      1}, and we can then eliminate this binvert call. */
199   mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
200   if (b % GMP_LIMB_BITS)
201     ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
202 
203   if (neg)
204     gmp_nextprime (&ps);
205 
206   ans = 0;
207   if (g > 0)
208     {
209       ub = MIN (ub, g + 1);
210       while ((k = gmp_nextprime (&ps)) < ub)
211 	{
212 	  if ((g % k) == 0)
213 	    {
214 	      if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
215 		{
216 		  ans = 1;
217 		  goto ret;
218 		}
219 	    }
220 	}
221     }
222   else
223     {
224       while ((k = gmp_nextprime (&ps)) < ub)
225 	{
226 	  if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
227 	    {
228 	      ans = 1;
229 	      goto ret;
230 	    }
231 	}
232     }
233  ret:
234   TMP_FREE;
235   return ans;
236 }
237 
238 static const unsigned short nrtrial[] = { 100, 500, 1000 };
239 
240 /* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime
241    number.  */
242 static const double logs[] =
243   { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
244 
245 int
mpn_perfect_power_p(mp_srcptr np,mp_size_t n)246 mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
247 {
248   mp_size_t ncn, s, pn, xn;
249   mp_limb_t *nc, factor, g;
250   mp_limb_t exp, *prev, *next, d, l, r, c, *tp, cry;
251   mp_bitcnt_t twos, count;
252   int ans, where, neg, trial;
253   TMP_DECL;
254 
255   nc = (mp_ptr) np;
256 
257   neg = 0;
258   if (n < 0)
259     {
260       neg = 1;
261       n = -n;
262     }
263 
264   if (n == 0 || (n == 1 && np[0] == 1))
265     return 1;
266 
267   TMP_MARK;
268 
269   g = 0;
270 
271   ncn = n;
272   twos = mpn_scan1 (np, 0);
273   if (twos > 0)
274     {
275       if (twos == 1)
276 	{
277 	  ans = 0;
278 	  goto ret;
279 	}
280       s = twos / GMP_LIMB_BITS;
281       if (s + 1 == n && POW2_P (np[s]))
282 	{
283 	  ans = ! (neg && POW2_P (twos));
284 	  goto ret;
285 	}
286       count = twos % GMP_LIMB_BITS;
287       ncn = n - s;
288       nc = TMP_ALLOC_LIMBS (ncn);
289       if (count > 0)
290 	{
291 	  mpn_rshift (nc, np + s, ncn, count);
292 	  ncn -= (nc[ncn - 1] == 0);
293 	}
294       else
295 	{
296 	  MPN_COPY (nc, np + s, ncn);
297 	}
298       g = twos;
299     }
300 
301   if (ncn <= SMALL)
302     trial = 0;
303   else if (ncn <= MEDIUM)
304     trial = 1;
305   else
306     trial = 2;
307 
308   where = 0;
309   factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
310 
311   if (factor != 0)
312     {
313       if (twos == 0)
314 	{
315 	  nc = TMP_ALLOC_LIMBS (ncn);
316 	  MPN_COPY (nc, np, ncn);
317 	}
318 
319       /* Remove factors found by trialdiv.  Optimization: Perhaps better to use
320 	 the strategy in mpz_remove ().  */
321       prev = TMP_ALLOC_LIMBS (ncn + 2);
322       next = TMP_ALLOC_LIMBS (ncn + 2);
323       tp = TMP_ALLOC_LIMBS (4 * ncn);
324 
325       do
326 	{
327 	  binvert_limb (d, factor);
328 	  prev[0] = d;
329 	  pn = 1;
330 	  exp = 1;
331 	  while (2 * pn - 1 <= ncn)
332 	    {
333 	      mpn_sqr (next, prev, pn);
334 	      xn = 2 * pn;
335 	      xn -= (next[xn - 1] == 0);
336 
337 	      if (mpn_divisible_p (nc, ncn, next, xn) == 0)
338 		break;
339 
340 	      exp <<= 1;
341 	      pn = xn;
342 	      MP_PTR_SWAP (next, prev);
343 	    }
344 
345 	  /* Binary search for the exponent */
346 	  l = exp + 1;
347 	  r = 2 * exp - 1;
348 	  while (l <= r)
349 	    {
350 	      c = (l + r) >> 1;
351 	      if (c - exp > 1)
352 		{
353 		  xn = mpn_pow_1 (tp, &d, 1, c - exp, next);
354 		  if (pn + xn - 1 > ncn)
355 		    {
356 		      r = c - 1;
357 		      continue;
358 		    }
359 		  mpn_mul (next, prev, pn, tp, xn);
360 		  xn += pn;
361 		  xn -= (next[xn - 1] == 0);
362 		}
363 	      else
364 		{
365 		  cry = mpn_mul_1 (next, prev, pn, d);
366 		  next[pn] = cry;
367 		  xn = pn + (cry != 0);
368 		}
369 
370 	      if (mpn_divisible_p (nc, ncn, next, xn) == 0)
371 		{
372 		  r = c - 1;
373 		}
374 	      else
375 		{
376 		  exp = c;
377 		  l = c + 1;
378 		  MP_PTR_SWAP (next, prev);
379 		  pn = xn;
380 		}
381 	    }
382 
383 	  if (g == 0)
384 	    g = exp;
385 	  else
386 	    g = mpn_gcd_1 (&g, 1, exp);
387 
388 	  if (g == 1)
389 	    {
390 	      ans = 0;
391 	      goto ret;
392 	    }
393 
394 	  mpn_divexact (next, nc, ncn, prev, pn);
395 	  ncn = ncn - pn;
396 	  ncn += next[ncn] != 0;
397 	  MPN_COPY (nc, next, ncn);
398 
399 	  if (ncn == 1 && nc[0] == 1)
400 	    {
401 	      ans = ! (neg && POW2_P (g));
402 	      goto ret;
403 	    }
404 
405 	  factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
406 	}
407       while (factor != 0);
408     }
409 
410   MPN_SIZEINBASE_2EXP(count, nc, ncn, 1);   /* log (nc) + 1 */
411   d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
412   ans = perfpow (nc, ncn, d, g, count, neg);
413 
414  ret:
415   TMP_FREE;
416   return ans;
417 }
418