1 /*
2 * Utility functions, such as sqrt mod p, polynomial manipulation, etc.
3 */
4
5 #include <stdio.h>
6 #include <stddef.h>
7 #include <stdlib.h>
8 #include <gmp.h>
9 #include <math.h>
10
11 #include "ptypes.h"
12
13 /* includes mpz_mulmod(r, a, b, n, temp) */
14 #include "utility.h"
15 #include "factor.h"
16 #include "primality.h"
17 #include "isaac.h"
18
19 static int _verbose = 0;
get_verbose_level(void)20 int get_verbose_level(void) { return _verbose; }
set_verbose_level(int level)21 void set_verbose_level(int level) { _verbose = level; }
22
23 static gmp_randstate_t _randstate;
get_randstate(void)24 gmp_randstate_t* get_randstate(void) { return &_randstate; }
25
26 #if __LITTLE_ENDIAN__ || (defined(BYTEORDER) && (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678))
27 #define LESWAP(mem, val)
28 #else
29 #if !defined(__x86_64__)
30 #undef U8TO32_LE
31 #undef U32TO8_LE
32 #endif
33 #ifndef U32TO8_LE
34 #define U32TO8_LE(p, v) \
35 do { \
36 uint32_t _v = v; \
37 (p)[0] = (((_v) ) & 0xFFU); \
38 (p)[1] = (((_v) >> 8) & 0xFFU); \
39 (p)[2] = (((_v) >> 16) & 0xFFU); \
40 (p)[3] = (((_v) >> 24) & 0xFFU); \
41 } while (0)
42 #endif
43 #define LESWAP(mem, val) U32TO8_LE(mem,val)
44 #endif
45
init_randstate(unsigned long seed)46 void init_randstate(unsigned long seed) {
47 unsigned char seedstr[8] = {0};
48 #if (__GNU_MP_VERSION > 4) || (__GNU_MP_VERSION == 4 && __GNU_MP_VERSION_MINOR >= 2)
49 /* MT was added in GMP 4.2 released in 2006. */
50 gmp_randinit_mt(_randstate);
51 #else
52 gmp_randinit_default(_randstate);
53 #endif
54 gmp_randseed_ui(_randstate, seed);
55
56 #if BITS_PER_WORD == 64
57 if (seed > UVCONST(4294967295)) {
58 LESWAP(seedstr, seed);
59 LESWAP(seedstr + 4, (seed >> 16) >> 16);
60 isaac_init(8, seedstr);
61 } else
62 #endif
63 {
64 LESWAP(seedstr, seed);
65 isaac_init(4, seedstr);
66 }
67 }
clear_randstate(void)68 void clear_randstate(void) { gmp_randclear(_randstate); }
69
irand64(int nbits)70 UV irand64(int nbits)
71 {
72 if (nbits == 0) return 0;
73 if (nbits <= 32) return( isaac_rand32() >> (32-nbits) );
74 #if BITS_PER_WORD == 64
75 if (nbits <= 64) return( ((UV)isaac_rand32() | ((UV)isaac_rand32() << 32)) >> (64-nbits) );
76 #endif
77 croak("irand64 too many bits for UV");
78 }
79 static NV _tonv_32 = -1.0;
80 static NV _tonv_64;
drand64(void)81 NV drand64(void)
82 {
83 if (_tonv_32 < 0) {
84 int i;
85 NV t64, t32 = 1.0;
86 for (i = 0; i < 32; i++)
87 t32 /= 2.0;
88 t64 = t32;
89 for (i = 0; i < 32; i++)
90 t64 /= 2.0;
91 _tonv_64 = t64;
92 _tonv_32 = t32;
93 }
94 return isaac_rand32() * _tonv_32 + isaac_rand32() * _tonv_64;
95 }
96
mpz_isaac_urandomb(mpz_t rop,int nbits)97 void mpz_isaac_urandomb(mpz_t rop, int nbits)
98 {
99 if (nbits <= 32) {
100 mpz_set_ui(rop, irand64(nbits));
101 } else {
102 unsigned char* d;
103 int nbytes = (nbits+7)/8;
104 New(0, d, nbytes, unsigned char);
105 isaac_rand_bytes(nbytes, d);
106 mpz_import(rop, nbytes, 1, sizeof(unsigned char), 0, 0, d);
107 Safefree(d);
108 if (nbits != nbytes*8)
109 mpz_tdiv_r_2exp(rop, rop, nbits);
110 }
111 }
112
mpz_isaac_urandomm(mpz_t rop,mpz_t n)113 void mpz_isaac_urandomm(mpz_t rop, mpz_t n)
114 {
115 int count = 80;
116 unsigned long nbits = mpz_sizeinbase(n,2);
117
118 if (mpz_sgn(n) <= 0) {
119 mpz_set_ui(rop,0);
120 return;
121 } else if (nbits <= 32) {
122 mpz_set_ui(rop, isaac_rand(mpz_get_ui(n)));
123 } else if (nbits < 3000) {
124 /* Just like GMP, try until we're in range or we're tired. */
125 while (count-- > 0) {
126 mpz_isaac_urandomb(rop, nbits);
127 if (mpz_cmp(rop,n) < 0)
128 return;
129 }
130 mpz_mod(rop, rop, n);
131 } else {
132 /* Reduce tries needed by selecting from a range that is a multiple of n
133 * (so no bias) and uses the max space inside the power-of-2 range.
134 * Downside is that we do an alloc and two mods. For large values
135 * it can be much faster however. */
136 mpz_t rmax;
137 mpz_init(rmax);
138 mpz_setbit(rmax, nbits+8);
139 mpz_sub_ui(rmax,rmax,1);
140 mpz_tdiv_q(rmax, rmax, n);
141 mpz_mul(rmax, rmax, n);
142 do {
143 mpz_isaac_urandomb(rop, nbits+8);
144 } while (mpz_cmp(rop, rmax) >= 0 && count-- > 0);
145 mpz_clear(rmax);
146 mpz_mod(rop, rop, n);
147 }
148 }
149
150 /* a=0, return power. a>1, return bool if an a-th power */
is_power(mpz_t n,UV a)151 UV is_power(mpz_t n, UV a)
152 {
153 if (mpz_cmp_ui(n,3) <= 0 && a == 0)
154 return 0;
155 else if (a == 1)
156 return 1;
157 else if (a == 2)
158 return mpz_perfect_square_p(n);
159 else {
160 UV result;
161 mpz_t t;
162 mpz_init(t);
163 result = (a == 0) ? power_factor(n, t) : (UV)mpz_root(t, n, a);
164 mpz_clear(t);
165 return result;
166 }
167 }
168
prime_power(mpz_t prime,mpz_t n)169 UV prime_power(mpz_t prime, mpz_t n)
170 {
171 UV k;
172 if (mpz_even_p(n)) {
173 k = mpz_scan1(n, 0);
174 if (k+1 == mpz_sizeinbase(n, 2)) {
175 mpz_set_ui(prime, 2);
176 return k;
177 }
178 return 0;
179 }
180 if (_GMP_is_prob_prime(n)) {
181 mpz_set(prime, n);
182 return 1;
183 }
184 k = power_factor(n, prime);
185 if (k && !_GMP_is_prob_prime(prime))
186 k = 0;
187 return k;
188 }
189
is_primitive_root(mpz_t ina,mpz_t n,int nprime)190 int is_primitive_root(mpz_t ina, mpz_t n, int nprime)
191 {
192 mpz_t a, s, r, sreduced, t, *factors;
193 int ret, i, nfactors, *exponents;
194
195 if (mpz_sgn(n) == 0)
196 return 0;
197 if (mpz_sgn(n) < 0)
198 mpz_neg(n,n);
199 if (mpz_cmp_ui(n,1) == 0)
200 return 1;
201 if (mpz_cmp_ui(n,4) <= 0)
202 return mpz_get_ui(ina) == mpz_get_ui(n)-1;
203 if (mpz_divisible_2exp_p(n,2))
204 return 0;
205
206 mpz_init(a);
207 mpz_mod(a,ina,n);
208 mpz_init(s);
209 mpz_gcd(s, a, n);
210
211 if (mpz_cmp_ui(s,1) != 0) {
212 mpz_clear(s);
213 mpz_clear(a);
214 return 0;
215 }
216
217 mpz_init(t);
218 if (nprime) {
219 mpz_sub_ui(s, n, 1);
220 } else { /* totient(s, n); */ /* Fine, but slow. */
221 UV k;
222 mpz_init(r);
223 if (mpz_odd_p(n)) mpz_set(t, n); else mpz_fdiv_q_2exp(t, n, 1);
224 k = prime_power(r, t);
225 if (!k) { /* Not of form p^a or 2p^a */
226 mpz_clear(r); mpz_clear(t); mpz_clear(s); mpz_clear(a); return 0;
227 }
228 mpz_divexact(t, t, r);
229 mpz_mul(s, t, r); mpz_sub(s, s, t);
230 mpz_clear(r);
231 }
232 mpz_init_set(sreduced, s);
233
234 ret = 0;
235 mpz_sub_ui(t, n, 1);
236 if (mpz_cmp(s,t) == 0 && mpz_kronecker(a,n) != -1)
237 goto DONE_IPR;
238 /* Unclear if this is worth doing.
239 i = is_power(a, 0);
240 if (i > 1 && mpz_gcd_ui(NULL, s, i) != 1)
241 goto DONE_IPR;
242 */
243
244 #define IPR_TEST_UI(s, p, a, n, t, ret) \
245 mpz_divexact_ui(t, s, p); \
246 mpz_powm(t, a, t, n); \
247 if (mpz_cmp_ui(t, 1) == 0) { ret = 0; }
248
249 #define IPR_TEST(s, p, a, n, t, ret) \
250 mpz_divexact(t, s, p); \
251 mpz_powm(t, a, t, n); \
252 if (mpz_cmp_ui(t, 1) == 0) { ret = 0; }
253
254 ret = 1;
255 { /* Pull out small factors and test */
256 UV p, fromp = 0;
257 while (ret == 1 && (p = _GMP_trial_factor(sreduced, fromp, 60))) {
258 if (mpz_cmp_ui(sreduced,p) == 0) break;
259 IPR_TEST_UI(s, p, a, n, t, ret);
260 mpz_set_ui(t, p);
261 (void) mpz_remove(sreduced, sreduced, t);
262 fromp = p+1;
263 }
264 }
265 if (ret == 0 || mpz_cmp_ui(sreduced,1) == 0)
266 goto DONE_IPR;
267 if (_GMP_BPSW(sreduced)) {
268 IPR_TEST(s, sreduced, a, n, t, ret);
269 goto DONE_IPR;
270 }
271
272 /* Pull out more factors, noting they can be composites. */
273 while (_GMP_pminus1_factor(sreduced, t, 100000, 100000)) {
274 mpz_divexact(sreduced, sreduced, t);
275 if (_GMP_BPSW(t)) {
276 IPR_TEST(s, t, a, n, t, ret);
277 } else {
278 nfactors = factor(t, &factors, &exponents);
279 for (i = 0; ret == 1 && i < nfactors; i++) {
280 IPR_TEST(s, factors[i], a, n, t, ret);
281 }
282 clear_factors(nfactors, &factors, &exponents);
283 }
284 if (ret == 0 || mpz_cmp_ui(sreduced,1) == 0)
285 goto DONE_IPR;
286 if (_GMP_BPSW(sreduced)) {
287 IPR_TEST(s, sreduced, a, n, t, ret);
288 goto DONE_IPR;
289 }
290 }
291
292 /* We have a composite and so far it could be a primitive root. Factor. */
293 nfactors = factor(sreduced, &factors, &exponents);
294 for (i = 0; ret == 1 && i < nfactors; i++) {
295 IPR_TEST(s, factors[i], a, n, t, ret);
296 }
297 clear_factors(nfactors, &factors, &exponents);
298
299 DONE_IPR:
300 mpz_clear(sreduced); mpz_clear(t);
301 mpz_clear(s); mpz_clear(a);
302 return ret;
303 }
304
305
mpz_divmod(mpz_t r,mpz_t a,mpz_t b,mpz_t n,mpz_t t)306 int mpz_divmod(mpz_t r, mpz_t a, mpz_t b, mpz_t n, mpz_t t)
307 {
308 int invertible;
309 invertible = mpz_invert(t, b, n);
310 if (!invertible)
311 return 0;
312 mpz_mulmod(r, t, a, n, t); /* mpz_mul(t,t,a); mpz_mod(r,t,n); */
313 return 1;
314 }
315
316 /* set x to sqrt(a) mod p. Returns 0 if a is not a square root mod p
317 * See Cohen section 1.5 and http://www.math.vt.edu/people/brown/doc/sqrts.pdf
318 */
sqrtmod(mpz_t s,mpz_t a,mpz_t p)319 int sqrtmod(mpz_t s, mpz_t a, mpz_t p) {
320 int res;
321 mpz_t x, t1, t2, t3, t4;
322 mpz_init(x); mpz_init(t1); mpz_init(t2); mpz_init(t3); mpz_init(t4);
323 res = sqrtmod_t(x, a, p, t1, t2, t3, t4);
324 mpz_set(s, x);
325 mpz_clear(x); mpz_clear(t1); mpz_clear(t2); mpz_clear(t3); mpz_clear(t4);
326 return res;
327 }
328
329 /* Returns 1 if x^2 = a mod p, otherwise set x to 0 and return 0. */
verify_sqrt(mpz_t x,mpz_t a,mpz_t p,mpz_t t,mpz_t t2)330 static int verify_sqrt(mpz_t x, mpz_t a, mpz_t p, mpz_t t, mpz_t t2) {
331 /* reflect to get the smaller of +/- x */
332 mpz_sub(t, p, x); if (mpz_cmp(t,x) < 0) mpz_set(x,t);
333
334 mpz_mulmod(t, x, x, p, t2);
335 mpz_mod(t2, a, p);
336 if (mpz_cmp(t, t2) == 0) return 1;
337 mpz_set_ui(x, 0);
338 return 0;
339 }
340
341 /* Internal version that takes temp variables and x cannot overlap args */
sqrtmod_t(mpz_t x,mpz_t a,mpz_t p,mpz_t t,mpz_t q,mpz_t b,mpz_t z)342 int sqrtmod_t(mpz_t x, mpz_t a, mpz_t p,
343 mpz_t t, mpz_t q, mpz_t b, mpz_t z) /* 4 temp variables */
344 {
345 int r, e, m;
346
347 if (mpz_cmp_ui(p,2) <= 0) {
348 if (mpz_cmp_ui(p,0) <= 0) {
349 mpz_set_ui(x,0);
350 return 0;
351 }
352 mpz_mod(x, a, p);
353 return verify_sqrt(x, a, p, t, q);
354 }
355 if (!mpz_cmp_ui(a,0) || !mpz_cmp_ui(a,1)) {
356 mpz_set(x,a);
357 return verify_sqrt(x, a, p, t, q);
358 }
359
360 /* Easy cases from page 31 (or Menezes 3.36, 3.37) */
361 if (mpz_congruent_ui_p(p, 3, 4)) {
362 mpz_add_ui(t, p, 1);
363 mpz_tdiv_q_2exp(t, t, 2);
364 mpz_powm(x, a, t, p);
365 return verify_sqrt(x, a, p, t, q);
366 }
367
368 if (mpz_congruent_ui_p(p, 5, 8)) {
369 mpz_sub_ui(t, p, 1);
370 mpz_tdiv_q_2exp(t, t, 2);
371 mpz_powm(q, a, t, p);
372 if (mpz_cmp_si(q, 1) == 0) { /* s = a^((p+3)/8) mod p */
373 mpz_add_ui(t, p, 3);
374 mpz_tdiv_q_2exp(t, t, 3);
375 mpz_powm(x, a, t, p);
376 } else { /* s = 2a * (4a)^((p-5)/8) mod p */
377 mpz_sub_ui(t, p, 5);
378 mpz_tdiv_q_2exp(t, t, 3);
379 mpz_mul_ui(q, a, 4);
380 mpz_powm(x, q, t, p);
381 mpz_mul_ui(x, x, 2);
382 mpz_mulmod(x, x, a, p, x);
383 }
384 return verify_sqrt(x, a, p, t, q);
385 }
386
387 if (mpz_kronecker(a, p) != 1) {
388 /* Possible no solution exists. Check Euler criterion. */
389 mpz_sub_ui(t, p, 1);
390 mpz_tdiv_q_2exp(t, t, 1);
391 mpz_powm(x, a, t, p);
392 if (mpz_cmp_si(x, 1) != 0) {
393 mpz_set_ui(x, 0);
394 return 0;
395 }
396 }
397
398 mpz_sub_ui(q, p, 1);
399 e = mpz_scan1(q, 0); /* Remove 2^e from q */
400 mpz_tdiv_q_2exp(q, q, e);
401 mpz_set_ui(t, 2);
402 while (mpz_kronecker(t, p) != -1) { /* choose t "at random" */
403 mpz_add_ui(t, t, 1);
404 if (!mpz_cmp_ui(t,133)) {
405 /* If a root of p exists, then our chances are nearly 1/2 that
406 * (t|p) = -1. After 133 tries it seems dubious that a root
407 * exists. It's likely that p is not prime. */
408 if (mpz_even_p(p)) { mpz_set_ui(x,0); return 0; }
409 /* Euler probable prime test with base t. (t|p) = 1 or t divides p */
410 if (mpz_divisible_p(p, t)) { mpz_set_ui(x,0); return 0; }
411 mpz_sub_ui(z, p, 1); mpz_fdiv_q_2exp(b,z,1); mpz_powm(z, t, b, p);
412 if (mpz_cmp_ui(z,1)) { mpz_set_ui(x,0); return 0; }
413 /* Fermat base 2 */
414 mpz_set_ui(b,2); mpz_sub_ui(z, p, 1); mpz_powm(z, b, z, p);
415 if (mpz_cmp_ui(z,1)) { mpz_set_ui(x,0); return 0; }
416 }
417 if (!mpz_cmp_ui(t,286)) {
418 /* Another Euler probable prime test, p not even so t can't divide. */
419 mpz_sub_ui(z, p, 1); mpz_fdiv_q_2exp(b,z,1); mpz_powm(z, t, b, p);
420 if (mpz_cmp_ui(z,1)) { mpz_set_ui(x,0); return 0; }
421 }
422 if (!mpz_cmp_ui(t,20000)) { mpz_set_ui(x,0); return 0; }
423 }
424 mpz_powm(z, t, q, p); /* Step 1 complete */
425 r = e;
426
427 mpz_powm(b, a, q, p);
428 mpz_add_ui(q, q, 1);
429 mpz_divexact_ui(q, q, 2);
430 mpz_powm(x, a, q, p); /* Done with q, will use it for y now */
431
432 while (mpz_cmp_ui(b, 1)) {
433 /* calculate how many times b^2 mod p == 1 */
434 mpz_set(t, b);
435 m = 0;
436 do {
437 mpz_powm_ui(t, t, 2, p);
438 m++;
439 } while (m < r && mpz_cmp_ui(t, 1));
440 if (m >= r) break;
441 mpz_ui_pow_ui(t, 2, r-m-1);
442 mpz_powm(t, z, t, p);
443 mpz_mulmod(x, x, t, p, x);
444 mpz_powm_ui(z, t, 2, p);
445 mpz_mulmod(b, b, z, p, b);
446 r = m;
447 }
448 return verify_sqrt(x, a, p, t, q);
449 }
450
451 /* Smith-Cornacchia: Solve x,y for x^2 + |D|y^2 = p given prime p */
452 /* See Cohen 1.5.2 */
cornacchia(mpz_t x,mpz_t y,mpz_t D,mpz_t p)453 int cornacchia(mpz_t x, mpz_t y, mpz_t D, mpz_t p)
454 {
455 int result = 0;
456 mpz_t a, b, c, d;
457
458 if (mpz_jacobi(D, p) < 0) /* No solution */
459 return 0;
460
461 mpz_init(a); mpz_init(b); mpz_init(c); mpz_init(d);
462
463 sqrtmod_t(x, D, p, a, b, c, d);
464 mpz_set(a, p);
465 mpz_set(b, x);
466 mpz_sqrt(c, p);
467
468 while (mpz_cmp(b,c) > 0) {
469 mpz_set(d, a);
470 mpz_set(a, b);
471 mpz_mod(b, d, b);
472 }
473
474 mpz_mul(a, b, b);
475 mpz_sub(a, p, a); /* a = p - b^2 */
476 mpz_abs(d, D); /* d = |D| */
477
478 if (mpz_divisible_p(a, d)) {
479 mpz_divexact(c, a, d);
480 if (mpz_perfect_square_p(c)) {
481 mpz_set(x, b);
482 mpz_sqrt(y, c);
483 result = 1;
484 }
485 }
486
487 mpz_clear(a); mpz_clear(b); mpz_clear(c); mpz_clear(d);
488
489 return result;
490 }
491
492 /* Modified Cornacchia, Solve x,y for x^2 + |D|y^2 = 4p given prime p */
493 /* See Cohen 1.5.3 */
modified_cornacchia(mpz_t x,mpz_t y,mpz_t D,mpz_t p)494 int modified_cornacchia(mpz_t x, mpz_t y, mpz_t D, mpz_t p)
495 {
496 int result = 0;
497 mpz_t a, b, c, d;
498
499 if (mpz_cmp_ui(p, 2) == 0) {
500 mpz_add_ui(x, D, 8);
501 if (mpz_perfect_square_p(x)) {
502 mpz_sqrt(x, x);
503 mpz_set_ui(y, 1);
504 result = 1;
505 }
506 return result;
507 }
508 if (mpz_jacobi(D, p) == -1) /* No solution */
509 return 0;
510
511 mpz_init(a); mpz_init(b); mpz_init(c); mpz_init(d);
512
513 sqrtmod_t(x, D, p, a, b, c, d);
514 if ( (mpz_even_p(D) && mpz_odd_p(x)) || (mpz_odd_p(D) && mpz_even_p(x)) )
515 mpz_sub(x, p, x);
516
517 mpz_mul_ui(a, p, 2);
518 mpz_set(b, x);
519 mpz_sqrt(c, p);
520 mpz_mul_ui(c, c, 2);
521
522 /* Euclidean algorithm */
523 while (mpz_cmp(b, c) > 0) {
524 mpz_set(d, a);
525 mpz_set(a, b);
526 mpz_mod(b, d, b);
527 }
528
529 mpz_mul_ui(c, p, 4);
530 mpz_mul(a, b, b);
531 mpz_sub(a, c, a); /* a = 4p - b^2 */
532 mpz_abs(d, D); /* d = |D| */
533
534 if (mpz_divisible_p(a, d)) {
535 mpz_divexact(c, a, d);
536 if (mpz_perfect_square_p(c)) {
537 mpz_set(x, b);
538 mpz_sqrt(y, c);
539 result = 1;
540 }
541 }
542
543 mpz_clear(a); mpz_clear(b); mpz_clear(c); mpz_clear(d);
544
545 return result;
546 }
547
548
549 /* Modular inversion: invert a mod p.
550 * This implementation from William Hart, using extended gcd.
551 */
modinverse(unsigned long a,unsigned long p)552 unsigned long modinverse(unsigned long a, unsigned long p)
553 {
554 long u1 = 1;
555 long u3 = a;
556 long v1 = 0;
557 long v3 = p;
558 long t1 = 0;
559 long t3 = 0;
560 long quot;
561 while (v3) {
562 quot = u3 - v3;
563 if (u3 < (v3<<2)) {
564 if (quot < v3) {
565 if (quot < 0) {
566 t1 = u1; u1 = v1; v1 = t1;
567 t3 = u3; u3 = v3; v3 = t3;
568 } else {
569 t1 = u1 - v1; u1 = v1; v1 = t1;
570 t3 = u3 - v3; u3 = v3; v3 = t3;
571 }
572 } else if (quot < (v3<<1)) {
573 t1 = u1 - (v1<<1); u1 = v1; v1 = t1;
574 t3 = u3 - (v3<<1); u3 = v3; v3 = t3;
575 } else {
576 t1 = u1 - v1*3; u1 = v1; v1 = t1;
577 t3 = u3 - v3*3; u3 = v3; v3 = t3;
578 }
579 } else {
580 quot = u3 / v3;
581 t1 = u1 - v1*quot; u1 = v1; v1 = t1;
582 t3 = u3 - v3*quot; u3 = v3; v3 = t3;
583 }
584 }
585 if (u1 < 0) u1 += p;
586 return u1;
587 }
588
589 #if __GNU_MP_VERSION < 5
gcdext(mpz_t g,mpz_t s,mpz_t t,const mpz_t ia,const mpz_t ib)590 extern void gcdext(mpz_t g, mpz_t s, mpz_t t, const mpz_t ia, const mpz_t ib)
591 {
592 mpz_t a, b;
593 mpz_init_set(a, ia);
594 mpz_init_set(b, ib);
595
596 if (mpz_sgn(a) == 0 || mpz_cmp(a,b) == 0) {
597 mpz_set_si(s, 0);
598 mpz_set_si(t, mpz_sgn(b));
599 mpz_abs(g, b);
600 } else if (mpz_sgn(b) == 0) {
601 mpz_set_si(s, mpz_sgn(a));
602 mpz_set_si(t, 0);
603 mpz_abs(g, a);
604 } else {
605 mpz_t os, ot, or, r, q, tmp;
606 mpz_init(os); mpz_init(ot); mpz_init(or);
607 mpz_init(r); mpz_init(q); mpz_init(tmp);
608 mpz_set_ui(s,0); mpz_set_ui(os,1);
609 mpz_set_ui(t,1); mpz_set_ui(ot,0);
610 mpz_set(r,b); mpz_set(or,a);
611 while (mpz_sgn(r)) {
612 mpz_tdiv_q(q, or, r);
613 mpz_set(tmp, r); mpz_mul(r, r, q); mpz_sub(r, or, r); mpz_set(or, tmp);
614 mpz_set(tmp, s); mpz_mul(s, s, q); mpz_sub(s, os, s); mpz_set(os, tmp);
615 mpz_set(tmp, t); mpz_mul(t, t, q); mpz_sub(t, ot, t); mpz_set(ot, tmp);
616 }
617 mpz_set(s, os);
618 mpz_set(t, ot);
619 mpz_set(g, or);
620 mpz_clear(r); mpz_clear(q); mpz_clear(tmp);
621 mpz_clear(os); mpz_clear(ot); mpz_clear(or);
622
623 if (mpz_sgn(g) < 0) {
624 mpz_neg(s, s);
625 mpz_neg(t, t);
626 mpz_neg(g, g);
627 }
628 }
629 mpz_clear(a); mpz_clear(b);
630 }
631 #endif
632
chinese(mpz_t ret,mpz_t lcm,mpz_t * a,mpz_t * m,int items)633 int chinese(mpz_t ret, mpz_t lcm, mpz_t *a, mpz_t *m, int items)
634 {
635 mpz_t sum, gcd, u, v, s, t, temp1, temp2;
636 int i, rval = 1;
637
638 #if 0
639 if (items >= 128) {
640 int first = items/2;
641 mpz_t ca[2], cm[2];
642
643 for (i = 0; i < 2; i++)
644 { mpz_init(ca[i]); mpz_init(cm[i]); }
645 rval = chinese(ca[0], cm[0], a, m, first);
646 if (rval == 1)
647 rval = chinese(ca[1], cm[1], a+first, m+first, items-first);
648 if (rval == 1)
649 rval = chinese(ret, lcm, ca, cm, 2);
650 for (i = 0; i < 2; i++)
651 { mpz_clear(ca[i]); mpz_clear(cm[i]); }
652 return rval;
653 }
654 #else
655 #define CRTN 8
656 if (items >= 64) {
657 int step = items/CRTN;
658 mpz_t ca[CRTN], cm[CRTN];
659
660 for (i = 0; i < CRTN; i++)
661 { mpz_init(ca[i]); mpz_init(cm[i]); }
662 for (i = 0; rval && i < CRTN; i++) {
663 int citems = (i==CRTN-1) ? items-(CRTN-1)*step : step;
664 rval = chinese(ca[i], cm[i], a+i*step, m+i*step, citems);
665 }
666 if (rval) rval = chinese(ret, lcm, ca, cm, CRTN);
667 for (i = 0; i < CRTN; i++)
668 { mpz_clear(ca[i]); mpz_clear(cm[i]); }
669 return rval;
670 }
671 #endif
672
673 /* Avoid dividing by zero, which GMP doesn't enjoy. */
674 for (i = 0; i < items; i++)
675 if (mpz_sgn(m[i]) == 0)
676 return 0;
677
678 mpz_init(temp1); mpz_init(temp2);
679 mpz_init(sum); mpz_init(gcd);
680 mpz_init(s); mpz_init(t);
681 mpz_init(u); mpz_init(v);
682
683 mpz_set(lcm, m[0]);
684 mpz_mod(sum, a[0], m[0]);
685 for (i = 1; i < items; i++) {
686 mpz_gcdext(gcd, u, v, lcm, m[i]);
687 mpz_divexact(s, m[i], gcd);
688 mpz_divexact(t, lcm, gcd);
689 if (mpz_cmp_ui(gcd,1) != 0) {
690 mpz_mod(temp1, sum, gcd);
691 mpz_mod(temp2, a[i], gcd);
692 if (mpz_cmp(temp1, temp2) != 0) {
693 rval = 0;
694 break;
695 }
696 }
697 if (mpz_sgn(s) < 0) mpz_neg(s,s);
698 if (mpz_sgn(t) < 0) mpz_neg(t,t);
699 mpz_mul(lcm, lcm, s);
700 if (mpz_sgn(u) < 0) mpz_add(u, u, lcm);
701 if (mpz_sgn(v) < 0) mpz_add(v, v, lcm);
702 mpz_mul(temp1, v, s);
703 mpz_mul(v, temp1, sum);
704 mpz_mul(temp1, u, t);
705 mpz_mul(u, temp1, a[i]);
706 mpz_add(temp1, v, u);
707 mpz_mod(sum, temp1, lcm);
708 }
709 mpz_set(ret, sum);
710 mpz_clear(sum); mpz_clear(gcd);
711 mpz_clear(s); mpz_clear(t);
712 mpz_clear(u); mpz_clear(v);
713 mpz_clear(temp1); mpz_clear(temp2);
714 return rval;
715 }
716
717
mpz_order_ui(UV r,mpz_t n,UV limit)718 UV mpz_order_ui(UV r, mpz_t n, UV limit) {
719 UV j;
720 mpz_t t;
721
722 /* If n < limit, set limit to n */
723 if (mpz_cmp_ui(n, limit) < 0)
724 limit = mpz_get_ui(n);
725 mpz_init_set_ui(t, 1);
726 for (j = 1; j <= limit; j++) {
727 mpz_mul(t, t, n);
728 mpz_mod_ui(t, t, r);
729 if (!mpz_cmp_ui(t, 1))
730 break;
731 }
732 mpz_clear(t);
733 return j;
734 }
735
mpz_arctan(mpz_t r,unsigned long base,mpz_t pow,mpz_t t1,mpz_t t2)736 void mpz_arctan(mpz_t r, unsigned long base, mpz_t pow, mpz_t t1, mpz_t t2)
737 {
738 unsigned long i = 1;
739 mpz_tdiv_q_ui(r, pow, base);
740 mpz_set(t1, r);
741 do {
742 if (base > 65535) { mpz_ui_pow_ui(t2, base, 2); mpz_tdiv_q(t1, t1, t2); }
743 else mpz_tdiv_q_ui(t1, t1, base*base);
744 mpz_tdiv_q_ui(t2, t1, 2*i+1);
745 if (i++ & 1) mpz_sub(r, r, t2); else mpz_add(r, r, t2);
746 } while (mpz_sgn(t2));
747 }
mpz_arctanh(mpz_t r,unsigned long base,mpz_t pow,mpz_t t1,mpz_t t2)748 void mpz_arctanh(mpz_t r, unsigned long base, mpz_t pow, mpz_t t1, mpz_t t2)
749 {
750 unsigned long i = 1;
751 mpz_tdiv_q_ui(r, pow, base);
752 mpz_set(t1, r);
753 do {
754 if (base > 65535) { mpz_ui_pow_ui(t2, base, 2); mpz_tdiv_q(t1, t1, t2); }
755 else mpz_tdiv_q_ui(t1, t1, base*base);
756 mpz_tdiv_q_ui(t2, t1, 1 + (i++ << 1));
757 mpz_add(r, r, t2);
758 } while (mpz_sgn(t2));
759 }
760
mpz_product(mpz_t * A,UV a,UV b)761 void mpz_product(mpz_t* A, UV a, UV b) {
762 if (b <= a) {
763 /* nothing */
764 } else if (b == a+1) {
765 mpz_mul(A[a], A[a], A[b]);
766 } else if (b == a+2) {
767 mpz_mul(A[a+1], A[a+1], A[a+2]);
768 mpz_mul(A[a], A[a], A[a+1]);
769 } else {
770 UV c = a + (b-a+1)/2;
771 mpz_product(A, a, c-1);
772 mpz_product(A, c, b);
773 mpz_mul(A[a], A[a], A[c]);
774 }
775 }
mpz_veclcm(mpz_t * A,UV a,UV b)776 void mpz_veclcm(mpz_t* A, UV a, UV b) {
777 if (b <= a) {
778 /* nothing */
779 } else if (b == a+1) {
780 mpz_lcm(A[a], A[a], A[b]);
781 } else if (b == a+2) {
782 mpz_lcm(A[a+1], A[a+1], A[a+2]);
783 mpz_lcm(A[a], A[a], A[a+1]);
784 } else {
785 UV c = a + (b-a+1)/2;
786 mpz_veclcm(A, a, c-1);
787 mpz_veclcm(A, c, b);
788 mpz_lcm(A[a], A[a], A[c]);
789 }
790 }
791
logint(mpz_t n,UV base)792 UV logint(mpz_t n, UV base) {
793 mpz_t nt;
794 double logn, logbn, coreps;
795 UV res, nbits;
796
797 if (mpz_cmp_ui(n,0) <= 0 || base <= 1)
798 croak("mpz_logint: bad input\n");
799
800 /* If base is a small power of 2, then this is exact */
801 if (base <= 62 && (base & (base-1)) == 0)
802 return mpz_sizeinbase(n, base)-1;
803
804 if (mpz_cmp_ui(n,base) < 0)
805 return 0;
806
807 #if 0 /* example using mpf_log for high precision. Slow. */
808 {
809 mpf_t fr, fn;
810 mpf_init(fr); mpf_init(fn);
811 mpf_set_z(fn, n);
812 mpf_log(fr, fn);
813 logn = mpf_get_d(fr);
814 mpf_clear(fn); mpf_clear(fr);
815 coreps = 1e-8;
816 }
817 #endif
818
819 /* A typical way to do this is to start with base, then compare
820 * base^2, base^4, base^8, ... until larger than n. Then either work
821 * back down or do a binary search
822 * It uses O(log2(log2(n)) integer squares+multiplies plus some space.
823 *
824 * However, libc gives us the very fast log() function for doubles. While
825 * reducing the argument as needed to make sure we fit inside a double,
826 * we can use this to give us a result extremely close to the right
827 * answer, then adjust if we're not sure of the result.
828 *
829 * My benchmarks show it as about 2-10x faster than the all-integer method.
830 */
831
832 nbits = mpz_sizeinbase(n,2);
833 mpz_init(nt);
834
835 /* Step 1, get an approximation of log(n) */
836 if (nbits < 512) {
837 logn = log(mpz_get_d(n));
838 coreps = 1e-8;
839 } else {
840 /* Reduce bits using log(x * 2^k) = log(x) + k * log(2) */
841 uint32_t redbits = nbits - 256;
842 mpz_tdiv_q_2exp(nt, n, redbits);
843 logn = log(mpz_get_d(nt)) + redbits * 0.69314718055994530941723212145818L;
844 coreps = 1e-7;
845 }
846
847 /* Step 2, approximate log_base(n) */
848 logbn = logn / log(base);
849 res = (UV) logbn;
850
851 /* Step 3, correct if logbn might be rounded wrong */
852 if (res != (UV)(logbn+coreps) || res != (UV)(logbn-coreps)) {
853 mpz_ui_pow_ui(nt, base, res);
854 if (mpz_cmp(nt, n) > 0) {
855 res--;
856 } else if (mpz_cmp(nt, n) < 0) {
857 mpz_mul_ui(nt, nt, base);
858 if (mpz_cmp(nt, n) <= 0)
859 res++;
860 }
861 }
862 mpz_clear(nt);
863 /* res is largest res such that base^res <= n */
864 return res;
865 }
866
867 /******************************************************************************/
868 /*
869 * Floating point routines.
870 * These are not rigorously accurate. Use MPFR if possible.
871 *
872 * See also: http://fredrikj.net/math/elefun.pdf
873 * for how to really look at this correctly.
874 *
875 * Many ideas from Brent's presentation:
876 * https://pdfs.semanticscholar.org/8aec/ea97b8f2f23d4f09ec8f69025598f742ae9e.pdf
877 */
878
879
880 extern void const_pi(mpf_t pi, unsigned long prec);
881 extern void const_log2(mpf_t logn, unsigned long prec);
882
883 /* Log using Brent's second AGM algorithm (Sasaki and Kanada theta) */
mpf_log(mpf_t logn,mpf_t n)884 void mpf_log(mpf_t logn, mpf_t n)
885 {
886 mpf_t N, t, q, theta2, theta3, logdn;
887 unsigned long k, bits = mpf_get_prec(logn);
888 int neg = (mpf_sgn(n) < 0);
889
890 if (mpf_sgn(n) == 0)
891 croak("mpf_log(0)");
892 if (mpf_cmp_ui(n,2) == 0)
893 { const_log2(logn,BITS2DIGS(bits)); return; }
894 if ((neg && !mpf_cmp_si(n,-1)) || (!neg && !mpf_cmp_si(n,1)))
895 { mpf_set_ui(logn,0); return; }
896
897 mpf_init2(N, bits);
898 mpf_set(N, n);
899 if (neg) mpf_neg(N, N);
900
901 mpf_init2(t, 64 + bits);
902 mpf_init2(q, 64 + bits);
903 mpf_init2(theta2, 64 + bits);
904 mpf_init2(theta3, 64 + bits);
905 mpf_init2(logdn, 64 + bits);
906 mpf_set_ui(logn, 0);
907
908 /* ensure N >> 1 */
909 mpf_set_ui(t, 1); mpf_mul_2exp(t, t, 1+(35+bits)/36);
910 if (mpf_cmp(N, t) <= 0) {
911 /* log(n) = log(n*2^k) - k*log(2) */
912 for (k = 0; mpf_cmp(N, t) <= 0; k += 16)
913 mpf_mul_2exp(N, N, 16);
914 if (k > 0) {
915 const_log2(t, BITS2DIGS(bits));
916 mpf_mul_ui(logn, t, k);
917 mpf_neg(logn, logn);
918 }
919 }
920
921 mpf_ui_div(q, 1, N);
922 mpf_pow_ui(t,q,9); mpf_add(theta2, q, t);
923 mpf_pow_ui(t,q,25); mpf_add(theta2, theta2, t);
924 mpf_mul_2exp(theta2, theta2, 1);
925 mpf_pow_ui(theta3,q,4);
926 mpf_pow_ui(t,q,16); mpf_add(theta3, theta3, t);
927 mpf_mul_2exp(theta3, theta3, 1);
928 mpf_add_ui(theta3, theta3, 1);
929
930 /* Normally we would do:
931 * mpf_mul(theta2, theta2, theta2); mpf_mul(theta3, theta3, theta3);
932 * mpf_agm(t, theta2, theta3); mpf_mul_2exp(t, t, 2);
933 * but Brent points out the one term acceleration:
934 * AGM(t2^2,t3^2) => AGM(2*t2*t3,t2^2+t3^2)/2
935 */
936 mpf_mul(t, theta2, theta3);
937 mpf_mul_2exp(q, t, 1); /* q = 2*t2*t3 */
938 mpf_add(t, theta2, theta3);
939 mpf_mul(t, t, t); /* t = (t2+t3)^2 = t2^2 + t3^2 + 2*t2*t3 */
940 mpf_sub(theta3, t, q);
941 mpf_set(theta2, q);
942 mpf_agm(t, theta2, theta3);
943 mpf_mul_2exp(t, t, 1);
944
945 const_pi(logdn, BITS2DIGS(bits));
946 mpf_div(logdn, logdn, t);
947
948 mpf_add(logn, logn, logdn);
949 mpf_clear(logdn); mpf_clear(theta3); mpf_clear(theta2); mpf_clear(q);
950 mpf_clear(t); mpf_clear(N);
951 if (neg) mpf_neg(logn, logn);
952 }
953
954 /* x should be 0 < x < 1 */
_exp_sinh(mpf_t expx,mpf_t x,unsigned long bits)955 static void _exp_sinh(mpf_t expx, mpf_t x, unsigned long bits)
956 {
957 unsigned long k;
958 mpf_t t, s, N, D, X;
959
960 mpf_init2(t, 10 + bits);
961 mpf_init2(s, 10 + bits);
962 mpf_init2(N, 10 + bits);
963 mpf_init2(D, 10 + bits);
964 mpf_init2(X, 10 + bits);
965
966 /* 1. Compute s =~ sinh(x). */
967 mpf_set(s,x);
968 mpf_set(N,x);
969 mpf_mul(X,x,x);
970 mpf_set_ui(D,1);
971 for (k = 1; k < bits; k++) {
972 mpf_mul(N, N, X);
973 mpf_mul_ui(D, D, 2*k);
974 mpf_mul_ui(D, D, 2*k+1);
975 mpf_div(t, N, D);
976 mpf_add(s, s, t);
977
978 mpf_abs(t, t);
979 mpf_mul_2exp(t, t, bits);
980 if (mpf_cmp_d(t, .5) < 0)
981 break;
982 }
983 mpf_clear(X); mpf_clear(D); mpf_clear(N);
984
985 /* 2. Compute s =~ e(x) from sinh(x). */
986 mpf_mul(t, s, s);
987 mpf_add_ui(t, t, 1);
988 mpf_sqrt(t, t);
989 mpf_add(s, s, t);
990
991 mpf_set(expx, s);
992 mpf_clear(s); mpf_clear(t);
993 }
994
_exp_lift(mpf_t expx,mpf_t x,unsigned long bits)995 static void _exp_lift(mpf_t expx, mpf_t x, unsigned long bits)
996 {
997 mpf_t s, t1, t2;
998 unsigned long k;
999
1000 mpf_init2(s, 10 + bits);
1001 mpf_init2(t1, 10 + bits);
1002 mpf_init2(t2, 10 + bits);
1003
1004 mpf_set(s, expx);
1005 mpf_log(t1, s);
1006 mpf_sub(t2, x, t1); /* t2 = s-ln(x) */
1007 mpf_mul(t1, s, t2); /* t1 = s(s-ln(x) */
1008 mpf_add(s, s, t1);
1009 /* third and higher orders */
1010 for (k = 3; k <= 8; k++) {
1011 mpf_mul(t1, t1, t2);
1012 mpf_div_ui(t1, t1, k-1);
1013 mpf_add(s, s, t1);
1014 }
1015 mpf_set(expx, s);
1016 mpf_clear(t2); mpf_clear(t1); mpf_clear(s);
1017 }
1018
mpf_exp(mpf_t expn,mpf_t x)1019 void mpf_exp(mpf_t expn, mpf_t x)
1020 {
1021 mpf_t t;
1022 unsigned long k, r, rbits, bits = mpf_get_prec(expn);
1023
1024 if (mpf_sgn(x) == 0) { mpf_set_ui(expn, 1); return; }
1025
1026 mpf_init2(t, 10 + bits);
1027
1028 if (mpf_sgn(x) < 0) { /* As per Brent, exp(x) = 1/exp(-x) */
1029 mpf_neg(t, x);
1030 mpf_exp(t, t);
1031 if (mpf_sgn(t) != 0) mpf_ui_div(expn, 1, t);
1032 else mpf_set_ui(expn, 0);
1033 mpf_clear(t);
1034 return;
1035 }
1036
1037 /* Doubling rule, to make -.25 < x < .25. Speeds convergence. */
1038 mpf_set(t, x);
1039 for (k = 0; mpf_cmp_d(t, 1.0L/8192.0L) > 0; k++)
1040 mpf_div_2exp(t, t, 1);
1041
1042 /* exp with sinh method, then Newton lift to final bits */
1043 for (rbits = bits, r = 0; rbits > 4000; rbits = (rbits+7)/8)
1044 r++;
1045 _exp_sinh(expn, t, rbits);
1046 while (r-- > 0) {
1047 rbits *= 8;
1048 _exp_lift(expn, t, rbits);
1049 }
1050 if (rbits < bits)
1051 _exp_lift(expn, t, bits);
1052
1053 if (k > 0) {
1054 const unsigned long maxred = 8*sizeof(unsigned long)-1;
1055 for ( ; k > maxred; k -= maxred)
1056 mpf_pow_ui(expn, expn, 1UL << maxred);
1057 mpf_pow_ui(expn, expn, 1UL << k);
1058 }
1059 mpf_clear(t);
1060 }
1061
1062
1063 /* Negative b with non-int x usually gives a complex result.
1064 * We try to at least give a consistent result. */
1065
mpf_pow(mpf_t powx,mpf_t b,mpf_t x)1066 void mpf_pow(mpf_t powx, mpf_t b, mpf_t x)
1067 {
1068 mpf_t t;
1069 int neg = 0;
1070
1071 if (mpf_sgn(b) == 0) { mpf_set_ui(powx, 0); return; }
1072 if (mpf_sgn(b) < 0) { neg = 1; }
1073 if (mpf_cmp_ui(b,1) == 0) { mpf_set_ui(powx, 1-2*neg); return; }
1074
1075 if (mpf_integer_p(x) && mpf_fits_ulong_p(x)) {
1076 mpf_pow_ui(powx, b, mpf_get_ui(x));
1077 return;
1078 }
1079
1080 if (neg) mpf_neg(b,b);
1081 mpf_init2(t, mpf_get_prec(powx));
1082 mpf_log(t, b);
1083 mpf_mul(t, t, x);
1084 mpf_exp(powx, t);
1085 if (neg) mpf_neg(powx,powx);
1086 mpf_clear(t);
1087 }
1088
mpf_root(mpf_t rootx,mpf_t x,mpf_t n)1089 void mpf_root(mpf_t rootx, mpf_t x, mpf_t n)
1090 {
1091 if (mpf_sgn(n) == 0) {
1092 mpf_set_ui(rootx, 0);
1093 } else if (mpf_cmp_ui(n, 2) == 0) {
1094 mpf_sqrt(rootx, x);
1095 } else {
1096 mpf_t t;
1097 mpf_init2(t, mpf_get_prec(rootx));
1098 mpf_ui_div(t, 1, n);
1099 mpf_pow(rootx, x, t);
1100 mpf_clear(t);
1101 }
1102 }
1103
mpf_agm(mpf_t r,mpf_t a,mpf_t b)1104 void mpf_agm(mpf_t r, mpf_t a, mpf_t b)
1105 {
1106 mpf_t t;
1107 unsigned long bits = mpf_get_prec(r);
1108
1109 if (mpf_cmp(a,b) > 0) mpf_swap(a,b);
1110
1111 mpf_init2(t, 6+bits);
1112 while (1) {
1113 mpf_sub(t, b, a);
1114 mpf_abs(t, t);
1115 mpf_mul_2exp(t, t, bits);
1116 mpf_sub_ui(t,t,1);
1117 if (mpf_sgn(t) < 0)
1118 break;
1119 mpf_sub_ui(t,t,1);
1120 mpf_set(t, a);
1121 mpf_add(a, a, b);
1122 mpf_div_2exp(a, a, 1);
1123 mpf_mul(b, b, t);
1124 mpf_sqrt(b, b);
1125 }
1126 mpf_set(r, b);
1127 mpf_clear(t);
1128 }
1129
1130 /******************************************************************************/
1131
1132
1133 #if 0
1134 /* Simple polynomial multiplication */
1135 void poly_mod_mul(mpz_t* px, mpz_t* py, mpz_t* ptmp, UV r, mpz_t mod)
1136 {
1137 UV i, j, prindex;
1138
1139 for (i = 0; i < r; i++)
1140 mpz_set_ui(ptmp[i], 0);
1141 for (i = 0; i < r; i++) {
1142 if (!mpz_sgn(px[i])) continue;
1143 for (j = 0; j < r; j++) {
1144 if (!mpz_sgn(py[j])) continue;
1145 prindex = (i+j) % r;
1146 mpz_addmul( ptmp[prindex], px[i], py[j] );
1147 }
1148 }
1149 /* Put ptmp into px and mod n */
1150 for (i = 0; i < r; i++)
1151 mpz_mod(px[i], ptmp[i], mod);
1152 }
1153 void poly_mod_sqr(mpz_t* px, mpz_t* ptmp, UV r, mpz_t mod)
1154 {
1155 UV i, d, s;
1156 UV degree = r-1;
1157
1158 for (i = 0; i < r; i++)
1159 mpz_set_ui(ptmp[i], 0);
1160 for (d = 0; d <= 2*degree; d++) {
1161 UV prindex = d % r;
1162 for (s = (d <= degree) ? 0 : d-degree; s <= (d/2); s++) {
1163 if (s*2 == d) {
1164 mpz_addmul( ptmp[prindex], px[s], px[s] );
1165 } else {
1166 mpz_addmul( ptmp[prindex], px[s], px[d-s] );
1167 mpz_addmul( ptmp[prindex], px[s], px[d-s] );
1168 }
1169 }
1170 }
1171 /* Put ptmp into px and mod n */
1172 for (i = 0; i < r; i++)
1173 mpz_mod(px[i], ptmp[i], mod);
1174 }
1175 #endif
1176
1177 #if (__GNU_MP_VERSION < 4) || (__GNU_MP_VERSION == 4 && __GNU_MP_VERSION_MINOR < 1)
1178 /* Binary segmentation, using simple shift+add method for processing p.
1179 * Faster than twiddling bits, but not nearly as fast as import/export.
1180 * mpz_import and mpz_export were added in GMP 4.1 released in 2002.
1181 */
poly_mod_mul(mpz_t * px,mpz_t * py,UV r,mpz_t mod,mpz_t p,mpz_t p2,mpz_t t)1182 void poly_mod_mul(mpz_t* px, mpz_t* py, UV r, mpz_t mod, mpz_t p, mpz_t p2, mpz_t t)
1183 {
1184 UV i, d, bits;
1185 UV degree = r-1;
1186
1187 mpz_mul(t, mod, mod);
1188 mpz_mul_ui(t, t, r);
1189 bits = mpz_sizeinbase(t, 2);
1190
1191 mpz_set_ui(p, 0);
1192 for (i = 0; i < r; i++) {
1193 mpz_mul_2exp(p, p, bits);
1194 mpz_add(p, p, px[r-i-1]);
1195 }
1196
1197 if (px == py) {
1198 mpz_mul(p, p, p);
1199 } else {
1200 mpz_set_ui(p2, 0);
1201 for (i = 0; i < r; i++) {
1202 mpz_mul_2exp(p2, p2, bits);
1203 mpz_add(p2, p2, py[r-i-1]);
1204 }
1205 mpz_mul(p, p, p2);
1206 }
1207
1208 for (d = 0; d <= 2*degree; d++) {
1209 mpz_tdiv_r_2exp(t, p, bits);
1210 mpz_tdiv_q_2exp(p, p, bits);
1211 if (d < r)
1212 mpz_set(px[d], t);
1213 else
1214 mpz_add(px[d-r], px[d-r], t);
1215 }
1216 for (i = 0; i < r; i++)
1217 mpz_mod(px[i], px[i], mod);
1218 }
1219
1220 #else
1221
1222 /* Binary segmentation, using import/export method for processing p.
1223 * Thanks to Dan Bernstein's 2007 Quartic paper.
1224 */
poly_mod_mul(mpz_t * px,mpz_t * py,UV r,mpz_t mod,mpz_t p,mpz_t p2,mpz_t t)1225 void poly_mod_mul(mpz_t* px, mpz_t* py, UV r, mpz_t mod, mpz_t p, mpz_t p2, mpz_t t)
1226 {
1227 UV i, bytes;
1228 char* s;
1229
1230 mpz_mul(t, mod, mod);
1231 mpz_mul_ui(t, t, r);
1232 bytes = mpz_sizeinbase(t, 256);
1233 mpz_set_ui(p, 0);
1234 mpz_set_ui(p2, 0);
1235
1236 /* 1. Create big integers from px and py with padding. */
1237 {
1238 Newz(0, s, r*bytes, char);
1239 for (i = 0; i < r; i++)
1240 mpz_export(s + i*bytes, NULL, -1, 1, 0, 0, px[i]);
1241 mpz_import(p, r*bytes, -1, 1, 0, 0, s);
1242 Safefree(s);
1243 }
1244 if (px != py) {
1245 Newz(0, s, r*bytes, char);
1246 for (i = 0; i < r; i++)
1247 mpz_export(s + i*bytes, NULL, -1, 1, 0, 0, py[i]);
1248 mpz_import(p2, r*bytes, -1, 1, 0, 0, s);
1249 Safefree(s);
1250 }
1251
1252 /* 2. Multiply using the awesomeness that is GMP. */
1253 mpz_mul( p, p, (px == py) ? p : p2 );
1254
1255 /* 3. Pull out the parts of the result, add+mod, and put in px. */
1256 {
1257 Newz(0, s, 2*r*bytes, char);
1258 /* fill s with data from p */
1259 mpz_export(s, NULL, -1, 1, 0, 0, p);
1260 for (i = 0; i < r; i++) {
1261 /* Set px[i] to the top part, t to the bottom. */
1262 mpz_import(px[i], bytes, -1, 1, 0, 0, s + (i+r)*bytes);
1263 mpz_import(t, bytes, -1, 1, 0, 0, s + i*bytes);
1264 /* Add and mod */
1265 mpz_add(px[i], px[i], t);
1266 mpz_mod(px[i], px[i], mod);
1267 }
1268 Safefree(s);
1269 }
1270 }
1271 #endif
1272
poly_mod_pow(mpz_t * pres,mpz_t * pn,mpz_t power,UV r,mpz_t mod)1273 void poly_mod_pow(mpz_t *pres, mpz_t *pn, mpz_t power, UV r, mpz_t mod)
1274 {
1275 UV i;
1276 mpz_t mpow, t1, t2, t3;
1277
1278 for (i = 0; i < r; i++)
1279 mpz_set_ui(pres[i], 0);
1280 mpz_set_ui(pres[0], 1);
1281
1282 mpz_init_set(mpow, power);
1283 mpz_init(t1); mpz_init(t2); mpz_init(t3);
1284
1285 while (mpz_cmp_ui(mpow, 0) > 0) {
1286 if (mpz_odd_p(mpow)) poly_mod_mul(pres, pn, r, mod, t1, t2, t3);
1287 mpz_tdiv_q_2exp(mpow, mpow, 1);
1288 if (mpz_cmp_ui(mpow, 0) > 0) poly_mod_mul(pn, pn, r, mod, t1, t2, t3);
1289 }
1290 mpz_clear(t1); mpz_clear(t2); mpz_clear(t3);
1291 mpz_clear(mpow);
1292 }
1293
poly_mod(mpz_t * pres,mpz_t * pn,UV * dn,mpz_t mod)1294 void poly_mod(mpz_t *pres, mpz_t *pn, UV *dn, mpz_t mod)
1295 {
1296 UV i;
1297 for (i = 0; i < *dn; i++) {
1298 mpz_mod(pres[i], pn[i], mod);
1299 }
1300 while (*dn > 0 && mpz_sgn(pres[*dn-1]) == 0)
1301 *dn -= 1;
1302 }
polyz_mod(mpz_t * pres,mpz_t * pn,long * dn,mpz_t mod)1303 void polyz_mod(mpz_t *pres, mpz_t *pn, long *dn, mpz_t mod)
1304 {
1305 long i;
1306 for (i = 0; i <= *dn; i++) {
1307 mpz_mod(pres[i], pn[i], mod);
1308 }
1309 while (*dn > 0 && mpz_sgn(pres[*dn]) == 0)
1310 *dn -= 1;
1311 }
1312
polyz_set(mpz_t * pr,long * dr,mpz_t * ps,long ds)1313 void polyz_set(mpz_t* pr, long* dr, mpz_t* ps, long ds)
1314 {
1315 *dr = ds;
1316 do {
1317 mpz_set(pr[ds], ps[ds]);
1318 } while (ds-- > 0);
1319 }
1320
polyz_print(const char * header,mpz_t * pn,long dn)1321 void polyz_print(const char* header, mpz_t* pn, long dn)
1322 {
1323 gmp_printf("%s", header);
1324 do { gmp_printf("%Zd ", pn[dn]); } while (dn-- > 0);
1325 gmp_printf("\n");
1326 }
1327
1328 /* Multiply polys px and py to create new poly pr, all modulo 'mod' */
1329 #if 0
1330 void polyz_mulmod(mpz_t* pr, mpz_t* px, mpz_t *py, long *dr, long dx, long dy, mpz_t mod)
1331 {
1332 long i, j;
1333 *dr = dx + dy;
1334 for (i = 0; i <= *dr; i++)
1335 mpz_set_ui(pr[i], 0);
1336 for (i = 0; i <= dx; i++) {
1337 if (!mpz_sgn(px[i])) continue;
1338 for (j = 0; j <= dy; j++) {
1339 if (!mpz_sgn(py[j])) continue;
1340 mpz_addmul( pr[i+j], px[i], py[j] );
1341 }
1342 }
1343 for (i = 0; i <= *dr; i++)
1344 mpz_mod(pr[i], pr[i], mod);
1345 while (*dr > 0 && mpz_sgn(pr[*dr]) == 0) dr[0]--;
1346 }
1347 #endif
1348 #if 1
polyz_mulmod(mpz_t * pr,mpz_t * px,mpz_t * py,long * dr,long dx,long dy,mpz_t mod)1349 void polyz_mulmod(mpz_t* pr, mpz_t* px, mpz_t *py, long *dr, long dx, long dy, mpz_t mod)
1350 {
1351 UV i, bits, r;
1352 mpz_t p, p2, t;
1353
1354 mpz_init(p); mpz_init(t);
1355 *dr = dx+dy;
1356 r = *dr+1;
1357 mpz_mul(t, mod, mod);
1358 mpz_mul_ui(t, t, r);
1359 bits = mpz_sizeinbase(t, 2);
1360 mpz_set_ui(p, 0);
1361
1362 /* Create big integers p and p2 from px and py, with padding */
1363 {
1364 for (i = 0; i <= (UV)dx; i++) {
1365 mpz_mul_2exp(p, p, bits);
1366 mpz_add(p, p, px[dx-i]);
1367 }
1368 }
1369 if (px == py) {
1370 mpz_pow_ui(p, p, 2);
1371 } else {
1372 mpz_init_set_ui(p2, 0);
1373 for (i = 0; i <= (UV)dy; i++) {
1374 mpz_mul_2exp(p2, p2, bits);
1375 mpz_add(p2, p2, py[dy-i]);
1376 }
1377 mpz_mul(p, p, p2);
1378 mpz_clear(p2);
1379 }
1380
1381 /* Pull out parts of result p to pr */
1382 for (i = 0; i < r; i++) {
1383 mpz_tdiv_r_2exp(t, p, bits);
1384 mpz_tdiv_q_2exp(p, p, bits);
1385 mpz_mod(pr[i], t, mod);
1386 }
1387
1388 mpz_clear(p); mpz_clear(t);
1389 }
1390 #endif
1391 #if 0
1392 void polyz_mulmod(mpz_t* pr, mpz_t* px, mpz_t *py, long *dr, long dx, long dy, mpz_t mod)
1393 {
1394 UV i, bytes, r;
1395 char* s;
1396 mpz_t p, p2, t;
1397
1398 mpz_init(p); mpz_init(p2); mpz_init(t);
1399 *dr = dx+dy;
1400 r = *dr+1;
1401 mpz_mul(t, mod, mod);
1402 mpz_mul_ui(t, t, r);
1403 bytes = mpz_sizeinbase(t, 256);
1404 mpz_set_ui(p, 0);
1405 mpz_set_ui(p2, 0);
1406
1407 /* Create big integers p and p2 from px and py, with padding */
1408 {
1409 Newz(0, s, (dx+1)*bytes, char);
1410 for (i = 0; i <= dx; i++)
1411 mpz_export(s + i*bytes, NULL, -1, 1, 0, 0, px[i]);
1412 mpz_import(p, (dx+1)*bytes, -1, 1, 0, 0, s);
1413 Safefree(s);
1414 }
1415 if (px != py) {
1416 Newz(0, s, (dy+1)*bytes, char);
1417 for (i = 0; i <= dy; i++)
1418 mpz_export(s + i*bytes, NULL, -1, 1, 0, 0, py[i]);
1419 mpz_import(p2, (dy+1)*bytes, -1, 1, 0, 0, s);
1420 Safefree(s);
1421 }
1422
1423 /* Multiply! */
1424 mpz_mul( p ,p, (px == py) ? p : p2 );
1425
1426 /* Pull out parts of result p to pr */
1427 {
1428 Newz(0, s, r*bytes, char);
1429 /* fill s with data from p */
1430 mpz_export(s, NULL, -1, 1, 0, 0, p);
1431 for (i = 0; i < r; i++) {
1432 mpz_import(t, bytes, -1, 1, 0, 0, s + i*bytes);
1433 mpz_mod(pr[i], t, mod);
1434 }
1435 Safefree(s);
1436 }
1437
1438 mpz_clear(p); mpz_clear(p2); mpz_clear(t);
1439 }
1440 #endif
1441
1442 /* Polynomial division modulo N.
1443 * This is Cohen algorithm 3.1.2 "pseudo-division". */
polyz_div(mpz_t * pq,mpz_t * pr,mpz_t * pn,mpz_t * pd,long * dq,long * dr,long dn,long dd,mpz_t NMOD)1444 void polyz_div(mpz_t *pq, mpz_t *pr, mpz_t *pn, mpz_t *pd,
1445 long *dq, long *dr, long dn, long dd, mpz_t NMOD)
1446 {
1447 long i, j;
1448 mpz_t t;
1449
1450 /* Ensure n and d are reduced */
1451 while (dn > 0 && mpz_sgn(pn[dn]) == 0) dn--;
1452 while (dd > 0 && mpz_sgn(pd[dd]) == 0) dd--;
1453 if (dd == 0 && mpz_sgn(pd[0]) == 0)
1454 croak("polyz_divmod: divide by zero\n");
1455
1456 /* Q = 0 */
1457 *dq = 0;
1458 mpz_set_ui(pq[0], 0);
1459
1460 /* R = N */
1461 *dr = dn;
1462 for (i = 0; i <= dn; i++)
1463 mpz_set(pr[i], pn[i]); /* pn should already be mod */
1464
1465 if (*dr < dd)
1466 return;
1467 if (dd == 0) {
1468 *dq = 0; *dr = 0;
1469 mpz_tdiv_qr( pq[0], pr[0], pn[0], pd[0] );
1470 return;
1471 }
1472
1473 *dq = dn - dd;
1474 *dr = dd-1;
1475
1476 if (mpz_cmp_ui(pd[dd], 1) == 0) {
1477 for (i = *dq; i >= 0; i--) {
1478 long di = dd + i;
1479 mpz_set(pq[i], pr[di]);
1480 for (j = di-1; j >= i; j--) {
1481 mpz_submul(pr[j], pr[di], pd[j-i]);
1482 mpz_mod(pr[j], pr[j], NMOD);
1483 }
1484 }
1485 } else {
1486 mpz_init(t);
1487 for (i = *dq; i >= 0; i--) {
1488 long di = dd + i;
1489 mpz_powm_ui(t, pd[dd], i, NMOD);
1490 mpz_mul(t, t, pr[di]);
1491 mpz_mod(pq[i], t, NMOD);
1492 for (j = di-1; j >= 0; j--) {
1493 mpz_mul(pr[j], pr[j], pd[dd]); /* j != di so this is safe */
1494 if (j >= i)
1495 mpz_submul(pr[j], pr[di], pd[j-i]);
1496 mpz_mod(pr[j], pr[j], NMOD);
1497 }
1498 }
1499 mpz_clear(t);
1500 }
1501 /* Reduce R and Q. */
1502 while (*dr > 0 && mpz_sgn(pr[*dr]) == 0) dr[0]--;
1503 while (*dq > 0 && mpz_sgn(pq[*dq]) == 0) dq[0]--;
1504 }
1505
1506 /* Raise poly pn to the power, modulo poly pmod and coefficient NMOD. */
polyz_pow_polymod(mpz_t * pres,mpz_t * pn,mpz_t * pmod,long * dres,long dn,long dmod,mpz_t power,mpz_t NMOD)1507 void polyz_pow_polymod(mpz_t* pres, mpz_t* pn, mpz_t* pmod,
1508 long *dres, long dn, long dmod,
1509 mpz_t power, mpz_t NMOD)
1510 {
1511 mpz_t mpow;
1512 long dProd, dQ, dX, maxd, i;
1513 mpz_t *pProd, *pQ, *pX;
1514
1515 /* Product = res*x. With a prediv this would be dmod+dmod, but without it
1516 * is max(dmod,dn)+dmod. */
1517 maxd = (dn > dmod) ? dn+dmod : dmod+dmod;
1518 New(0, pProd, maxd+1, mpz_t);
1519 New(0, pQ, maxd+1, mpz_t);
1520 New(0, pX, maxd+1, mpz_t);
1521 for (i = 0; i <= maxd; i++) {
1522 mpz_init(pProd[i]);
1523 mpz_init(pQ[i]);
1524 mpz_init(pX[i]);
1525 }
1526
1527 *dres = 0;
1528 mpz_set_ui(pres[0], 1);
1529
1530 dX = dn;
1531 for (i = 0; i <= dX; i++)
1532 mpz_set(pX[i], pn[i]);
1533
1534 mpz_init_set(mpow, power);
1535 while (mpz_cmp_ui(mpow, 0) > 0) {
1536 if (mpz_odd_p(mpow)) {
1537 polyz_mulmod(pProd, pres, pX, &dProd, *dres, dX, NMOD);
1538 polyz_div(pQ, pres, pProd, pmod, &dQ, dres, dProd, dmod, NMOD);
1539 }
1540 mpz_tdiv_q_2exp(mpow, mpow, 1);
1541 if (mpz_cmp_ui(mpow, 0) > 0) {
1542 polyz_mulmod(pProd, pX, pX, &dProd, dX, dX, NMOD);
1543 polyz_div(pQ, pX, pProd, pmod, &dQ, &dX, dProd, dmod, NMOD);
1544 }
1545 }
1546 mpz_clear(mpow);
1547 for (i = 0; i <= maxd; i++) {
1548 mpz_clear(pProd[i]);
1549 mpz_clear(pQ[i]);
1550 mpz_clear(pX[i]);
1551 }
1552 Safefree(pProd);
1553 Safefree(pQ);
1554 Safefree(pX);
1555 }
1556
polyz_gcd(mpz_t * pres,mpz_t * pa,mpz_t * pb,long * dres,long da,long db,mpz_t MODN)1557 void polyz_gcd(mpz_t* pres, mpz_t* pa, mpz_t* pb, long* dres, long da, long db, mpz_t MODN)
1558 {
1559 long i;
1560 long dr1, dq, dr, maxd;
1561 mpz_t *pr1, *pq, *pr;
1562
1563 /* Early reduction so we're not wasteful with messy input. */
1564 while (da > 0 && mpz_sgn(pa[da]) == 0) da--;
1565 while (db > 0 && mpz_sgn(pb[db]) == 0) db--;
1566
1567 /* Swap a/b so degree a >= degree b */
1568 if (db > da) {
1569 mpz_t* mtmp;
1570 long ltmp;
1571 mtmp = pa; pa = pb; pb = mtmp;
1572 ltmp = da; da = db; db = ltmp;
1573 }
1574 /* TODO: Should set c=pa[da], then after Euclid loop, res = c^-1 * res */
1575
1576 /* Allocate temporary polys */
1577 maxd = da;
1578 New(0, pr1, maxd+1, mpz_t);
1579 New(0, pq, maxd+1, mpz_t);
1580 New(0, pr, maxd+1, mpz_t);
1581 for (i = 0; i <= maxd; i++) {
1582 mpz_init(pr1[i]);
1583 mpz_init(pq[i]);
1584 mpz_init(pr[i]);
1585 }
1586
1587 /* R0 = a (we use pres) */
1588 *dres = da;
1589 for (i = 0; i <= da; i++)
1590 mpz_mod(pres[i], pa[i], MODN);
1591 while (*dres > 0 && mpz_sgn(pres[*dres]) == 0) dres[0]--;
1592
1593 /* R1 = b */
1594 dr1 = db;
1595 for (i = 0; i <= db; i++)
1596 mpz_mod(pr1[i], pb[i], MODN);
1597 while (dr1 > 0 && mpz_sgn(pr1[dr1]) == 0) dr1--;
1598
1599 while (dr1 > 0 || mpz_sgn(pr1[dr1]) != 0) {
1600 polyz_div(pq, pr, pres, pr1, &dq, &dr, *dres, dr1, MODN);
1601 if (dr < 0 || dq < 0 || dr > maxd || dq > maxd)
1602 croak("division error: dq %ld dr %ld maxd %ld\n", dq, dr, maxd);
1603 /* pr0 = pr1. pr1 = pr */
1604 *dres = dr1;
1605 for (i = 0; i <= dr1; i++)
1606 mpz_set(pres[i], pr1[i]); /* pr1 is mod MODN already */
1607 dr1 = dr;
1608 for (i = 0; i <= dr; i++)
1609 mpz_set(pr1[i], pr[i]);
1610 }
1611 /* return pr0 */
1612 while (*dres > 0 && mpz_sgn(pres[*dres]) == 0) dres[0]--;
1613
1614 for (i = 0; i <= maxd; i++) {
1615 mpz_clear(pr1[i]);
1616 mpz_clear(pq[i]);
1617 mpz_clear(pr[i]);
1618 }
1619 Safefree(pr1);
1620 Safefree(pq);
1621 Safefree(pr);
1622 }
1623
1624
1625
polyz_root_deg1(mpz_t root,mpz_t * pn,mpz_t NMOD)1626 void polyz_root_deg1(mpz_t root, mpz_t* pn, mpz_t NMOD)
1627 {
1628 mpz_invert(root, pn[1], NMOD);
1629 mpz_mul(root, root, pn[0]);
1630 mpz_neg(root, root);
1631 mpz_mod(root, root, NMOD);
1632 }
polyz_root_deg2(mpz_t root1,mpz_t root2,mpz_t * pn,mpz_t NMOD)1633 void polyz_root_deg2(mpz_t root1, mpz_t root2, mpz_t* pn, mpz_t NMOD)
1634 {
1635 mpz_t e, d, t, t2, t3, t4;
1636
1637 mpz_init(e); mpz_init(d);
1638 mpz_init(t); mpz_init(t2); mpz_init(t3); mpz_init(t4);
1639
1640 mpz_mul(t, pn[0], pn[2]);
1641 mpz_mul_ui(t, t, 4);
1642 mpz_mul(d, pn[1], pn[1]);
1643 mpz_sub(d, d, t);
1644 sqrtmod_t(e, d, NMOD, t, t2, t3, t4);
1645
1646 mpz_neg(t4, pn[1]); /* t4 = -a_1 */
1647 mpz_mul_ui(t3, pn[2], 2); /* t3 = 2a_2 */
1648 mpz_add(t, t4, e); /* t = -a_1 + e */
1649 mpz_divmod( root1, t, t3, NMOD, t2); /* (-a_1+e)/2a_2 */
1650 mpz_sub(t, t4, e); /* t = -a_1 - e */
1651 mpz_divmod( root2, t, t3, NMOD, t2); /* (-a_1-e)/2a_2 */
1652 mpz_clear(e); mpz_clear(d);
1653 mpz_clear(t); mpz_clear(t2); mpz_clear(t3); mpz_clear(t4);
1654 }
1655
1656 /* Algorithm 2.3.10 procedure "roots" from Crandall & Pomerance.
1657 * Step 3/4 of Cohen Algorithm 1.6.1.
1658 * Uses some hints from Pate Williams (1997-1998) for the poly math */
polyz_roots(mpz_t * roots,long * nroots,long maxroots,mpz_t * pg,long dg,mpz_t NMOD)1659 static void polyz_roots(mpz_t* roots, long *nroots,
1660 long maxroots, mpz_t* pg, long dg, mpz_t NMOD)
1661 {
1662 long i, ntries, maxtries, maxd, dxa, dt, dh, dq, dup;
1663 mpz_t t, power;
1664 mpz_t pxa[2];
1665 mpz_t *pt, *ph, *pq;
1666
1667 if (*nroots >= maxroots || dg <= 0) return;
1668
1669 mpz_init(t);
1670 mpz_init(pxa[0]); mpz_init(pxa[1]);
1671
1672 if (dg <= 2) {
1673 if (dg == 1) polyz_root_deg1( pxa[0], pg, NMOD );
1674 else polyz_root_deg2( pxa[0], pxa[1], pg, NMOD );
1675 for (dt = 0; dt < dg; dt++) {
1676 mpz_set(t, pxa[dt]);
1677 dup = 0; /* don't add duplicate roots */
1678 for (i = 0; i < *nroots; i++)
1679 if (mpz_cmp(t, roots[i]) == 0)
1680 { dup = 1; break; }
1681 if (!dup)
1682 mpz_set(roots[ (*nroots)++ ], t);
1683 }
1684 mpz_clear(t);
1685 mpz_clear(pxa[0]); mpz_clear(pxa[1]);
1686 return;
1687 }
1688
1689 /* If not a monic polynomial, divide by leading coefficient */
1690 if (mpz_cmp_ui(pg[dg], 1) != 0) {
1691 if (!mpz_invert(t, pg[dg], NMOD)) {
1692 mpz_clear(t);
1693 return;
1694 }
1695 for (i = 0; i <= dg; i++)
1696 mpz_mulmod(pg[i], pg[i], t, NMOD, pg[i]);
1697 }
1698
1699 /* Try hard to find a single root, work less after we got one or two.
1700 * In a generic routine we would want to try hard all the time. */
1701 ntries = 0;
1702 maxtries = (*nroots == 0) ? 200 : (*nroots == 1) ? 50 : 10;
1703
1704 mpz_init(power);
1705 mpz_set_ui(pxa[1], 1);
1706 dxa = 1;
1707
1708 maxd = 2 * dg;
1709 New(0, pt, maxd+1, mpz_t);
1710 New(0, ph, maxd+1, mpz_t);
1711 New(0, pq, maxd+1, mpz_t);
1712 for (i = 0; i <= maxd; i++) {
1713 mpz_init(pt[i]);
1714 mpz_init(ph[i]);
1715 mpz_init(pq[i]);
1716 }
1717
1718 mpz_sub_ui(t, NMOD, 1);
1719 mpz_tdiv_q_2exp(power, t, 1);
1720 /* We'll pick random "a" values from 1 to 1000M */
1721 mpz_set_ui(t, 1000000000UL);
1722 if (mpz_cmp(t, NMOD) > 0) mpz_set(t, NMOD);
1723
1724 while (ntries++ < maxtries) {
1725 /* pxa = X+a for randomly selected a */
1726 if (ntries <= 2) mpz_set_ui(pxa[0], ntries); /* Simple small values */
1727 else mpz_isaac_urandomm(pxa[0], t);
1728
1729 /* Raise pxa to (NMOD-1)/2, all modulo NMOD and g(x) */
1730 polyz_pow_polymod(pt, pxa, pg, &dt, dxa, dg, power, NMOD);
1731
1732 /* Subtract 1 and gcd */
1733 mpz_sub_ui(pt[0], pt[0], 1);
1734 polyz_gcd(ph, pt, pg, &dh, dt, dg, NMOD);
1735
1736 if (dh >= 1 && dh < dg)
1737 break;
1738 }
1739
1740 if (dh >= 1 && dh < dg) {
1741 /* Pick the smaller of the two splits to process first */
1742 if (dh <= 2 || dh <= (dg-dh)) {
1743 polyz_roots(roots, nroots, maxroots, ph, dh, NMOD);
1744 if (*nroots < maxroots) {
1745 /* q = g/h, and recurse */
1746 polyz_div(pq, pt, pg, ph, &dq, &dt, dg, dh, NMOD);
1747 polyz_roots(roots, nroots, maxroots, pq, dq, NMOD);
1748 }
1749 } else {
1750 polyz_div(pq, pt, pg, ph, &dq, &dt, dg, dh, NMOD);
1751 polyz_roots(roots, nroots, maxroots, pq, dq, NMOD);
1752 if (*nroots < maxroots) {
1753 polyz_roots(roots, nroots, maxroots, ph, dh, NMOD);
1754 }
1755 }
1756 }
1757
1758 mpz_clear(t);
1759 mpz_clear(power);
1760 mpz_clear(pxa[0]); mpz_clear(pxa[1]);
1761
1762 for (i = 0; i <= maxd; i++) {
1763 mpz_clear(pt[i]);
1764 mpz_clear(ph[i]);
1765 mpz_clear(pq[i]);
1766 }
1767 Safefree(pt);
1768 Safefree(ph);
1769 Safefree(pq);
1770 }
1771
1772
1773 /* Algorithm 1.6.1 from Cohen, minus step 1. */
polyz_roots_modp(mpz_t ** roots,long * nroots,long maxroots,mpz_t * pP,long dP,mpz_t NMOD)1774 void polyz_roots_modp(mpz_t** roots, long *nroots, long maxroots,
1775 mpz_t *pP, long dP, mpz_t NMOD)
1776 {
1777 long i;
1778
1779 *nroots = 0;
1780 *roots = 0;
1781
1782 if (dP == 0)
1783 return;
1784
1785 /* Do degree 1 or 2 explicitly */
1786 if (dP == 1) {
1787 New(0, *roots, 1, mpz_t);
1788 mpz_init((*roots)[0]);
1789 polyz_root_deg1( (*roots)[0], pP, NMOD );
1790 *nroots = 1;
1791 return;
1792 }
1793 if (dP == 2) {
1794 New(0, *roots, 2, mpz_t);
1795 mpz_init((*roots)[0]);
1796 mpz_init((*roots)[1]);
1797 polyz_root_deg2( (*roots)[0], (*roots)[1], pP, NMOD );
1798 *nroots = 2;
1799 return;
1800 }
1801
1802 /* Allocate space for the maximum number of roots (plus 1 for safety) */
1803 New(0, *roots, dP+1, mpz_t);
1804 for (i = 0; i <= dP; i++)
1805 mpz_init((*roots)[i]);
1806
1807 if (maxroots > dP || maxroots == 0)
1808 maxroots = dP;
1809
1810 polyz_roots(*roots, nroots, maxroots, pP, dP, NMOD);
1811 /* This could be just really bad luck. Let the caller handle it. */
1812 /* if (*nroots == 0) croak("failed to find roots\n"); */
1813
1814 /* Clear out space for roots we didn't find */
1815 for (i = *nroots; i <= dP; i++)
1816 mpz_clear((*roots)[i]);
1817 }
1818
1819
1820 #include "class_poly_data.h"
1821
poly_class_type_name(int type)1822 const char* poly_class_type_name(int type)
1823 {
1824 switch (type) {
1825 case 1: return "Hilbert";
1826 case 2: return "Weber";
1827 case 3: return "Ramanujan";
1828 default: return "Unknown";
1829 }
1830 }
1831
poly_class_nums(void)1832 int* poly_class_nums(void)
1833 {
1834 int* dlist;
1835 UV i;
1836 int degree_offset[256] = {0};
1837
1838 for (i = 1; i < NUM_CLASS_POLYS; i++)
1839 if (_class_poly_data[i].D < _class_poly_data[i-1].D)
1840 croak("Problem with data file, out of order at D=%d\n", (int)_class_poly_data[i].D);
1841
1842 Newz(0, dlist, NUM_CLASS_POLYS + 1, int);
1843 /* init degree_offset to total number of this degree */
1844 for (i = 0; i < NUM_CLASS_POLYS; i++)
1845 degree_offset[_class_poly_data[i].degree]++;
1846 /* set degree_offset to sum of this and all previous degrees. */
1847 for (i = 1; i < 256; i++)
1848 degree_offset[i] += degree_offset[i-1];
1849 /* Fill in dlist, sorted */
1850 for (i = 0; i < NUM_CLASS_POLYS; i++) {
1851 int position = degree_offset[_class_poly_data[i].degree-1]++;
1852 dlist[position] = i+1;
1853 }
1854 /* Null terminate */
1855 dlist[NUM_CLASS_POLYS] = 0;
1856 return dlist;
1857 }
1858
poly_class_poly_num(int i,int * D,mpz_t ** T,int * type)1859 UV poly_class_poly_num(int i, int *D, mpz_t**T, int* type)
1860 {
1861 UV degree, j;
1862 int ctype;
1863 mpz_t t;
1864 const char* s;
1865
1866 if (i < 1 || i > (int)NUM_CLASS_POLYS) { /* Invalid number */
1867 if (D != 0) *D = 0;
1868 if (T != 0) *T = 0;
1869 return 0;
1870 }
1871 i--; /* i now is the index into our table */
1872
1873 degree = _class_poly_data[i].degree;
1874 ctype = _class_poly_data[i].type;
1875 s = _class_poly_data[i].coefs;
1876
1877 if (D != 0) *D = -_class_poly_data[i].D;
1878 if (type != 0) *type = ctype;
1879 if (T == 0) return degree;
1880
1881 New(0, *T, degree+1, mpz_t);
1882 mpz_init(t);
1883 for (j = 0; j < degree; j++) {
1884 unsigned char signcount = (*s++) & 0xFF;
1885 unsigned char sign = signcount >> 7;
1886 unsigned long count = signcount & 0x7F;
1887 if (count == 127) {
1888 do {
1889 signcount = (*s++) & 0xFF;
1890 count += signcount;
1891 } while (signcount == 127);
1892 }
1893 mpz_set_ui(t, 0);
1894 while (count-- > 0) {
1895 mpz_mul_2exp(t, t, 8);
1896 mpz_add_ui(t, t, (unsigned long) (*s++) & 0xFF);
1897 }
1898 /* Cube the last coefficient of Hilbert polys */
1899 if (j == 0 && ctype == 1) mpz_pow_ui(t, t, 3);
1900 if (sign) mpz_neg(t, t);
1901 mpz_init_set( (*T)[j], t );
1902 }
1903 mpz_clear(t);
1904 mpz_init_set_ui( (*T)[degree], 1 );
1905 return degree;
1906 }
1907