1 /*
2 Copyright (C) 2012 William Hart
3
4 This file is part of FLINT.
5
6 FLINT is free software: you can redistribute it and/or modify it under
7 the terms of the GNU Lesser General Public License (LGPL) as published
8 by the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version. See <http://www.gnu.org/licenses/>.
10 */
11
12 #define ulong ulongxx /* interferes with system includes */
13 #include <string.h>
14 #undef ulong
15 #define ulong mp_limb_t
16 #include <gmp.h>
17 #include "flint.h"
18 #include "fmpz.h"
19 #include "fmpz_vec.h"
20 #include "fmpz_mod_poly.h"
21 #include "mpn_extras.h"
22 #include "ulong_extras.h"
23
24 #define DEBUG 0 /* turn on some trace information */
25
26 #define pp1_mulmod(rxx, axx, bxx, nnn, nxx, ninv, norm) \
27 flint_mpn_mulmod_preinvn(rxx, axx, bxx, nnn, nxx, ninv, norm)
28
29 #ifdef FLINT64
30 static
31 ulong pp1_primorial[15] =
32 {
33 UWORD(2), UWORD(6), UWORD(30), UWORD(210), UWORD(2310), UWORD(30030), UWORD(510510), UWORD(9699690),
34 UWORD(223092870), UWORD(6469693230), UWORD(200560490130), UWORD(7420738134810),
35 UWORD(304250263527210), UWORD(13082761331670030), UWORD(614889782588491410)
36 };
37 #define num_primorials 15
38 #else
39 static
40 ulong pp1_primorial[9] =
41 {
42 UWORD(2), UWORD(6), UWORD(30), UWORD(210), UWORD(2310), UWORD(30030), UWORD(510510), UWORD(9699690),
43 UWORD(223092870)
44 };
45 #define num_primorials 9
46 #endif
47
pp1_set(mp_ptr x1,mp_ptr y1,mp_srcptr x2,mp_srcptr y2,mp_size_t nn)48 void pp1_set(mp_ptr x1, mp_ptr y1,
49 mp_srcptr x2, mp_srcptr y2, mp_size_t nn)
50 {
51 flint_mpn_copyi(x1, x2, nn);
52 flint_mpn_copyi(y1, y2, nn);
53 }
54
pp1_set_ui(mp_ptr x,mp_size_t nn,ulong norm,ulong c)55 void pp1_set_ui(mp_ptr x, mp_size_t nn, ulong norm, ulong c)
56 {
57 mpn_zero(x, nn);
58 x[0] = (c << norm);
59 if (nn > 1 && norm)
60 x[1] = (c >> (FLINT_BITS - norm));
61 }
62
pp1_print(mp_srcptr x,mp_srcptr y,mp_size_t nn,ulong norm)63 void pp1_print(mp_srcptr x, mp_srcptr y, mp_size_t nn, ulong norm)
64 {
65 mp_ptr tx = flint_malloc(nn*sizeof(mp_limb_t));
66 mp_ptr ty = flint_malloc(nn*sizeof(mp_limb_t));
67
68 if (norm)
69 {
70 mpn_rshift(tx, x, nn, norm);
71 mpn_rshift(ty, y, nn, norm);
72 } else
73 {
74 flint_mpn_copyi(tx, x, nn);
75 flint_mpn_copyi(ty, y, nn);
76 }
77
78 flint_printf("["), gmp_printf("%Nd", tx, nn), flint_printf(", "), gmp_printf("%Nd", ty, nn), flint_printf("]");
79
80 flint_free(tx);
81 flint_free(ty);
82 }
83
pp1_2k(mp_ptr x,mp_ptr y,mp_size_t nn,mp_srcptr n,mp_srcptr ninv,mp_srcptr x0,ulong norm)84 void pp1_2k(mp_ptr x, mp_ptr y, mp_size_t nn, mp_srcptr n,
85 mp_srcptr ninv, mp_srcptr x0, ulong norm)
86 {
87 pp1_mulmod(y, y, x, nn, n, ninv, norm);
88 if (mpn_sub_n(y, y, x0, nn))
89 mpn_add_n(y, y, n, nn);
90
91 pp1_mulmod(x, x, x, nn, n, ninv, norm);
92 if (mpn_sub_1(x, x, nn, UWORD(2) << norm))
93 mpn_add_n(x, x, n, nn);
94 }
95
pp1_2kp1(mp_ptr x,mp_ptr y,mp_size_t nn,mp_srcptr n,mp_srcptr ninv,mp_srcptr x0,ulong norm)96 void pp1_2kp1(mp_ptr x, mp_ptr y, mp_size_t nn, mp_srcptr n,
97 mp_srcptr ninv, mp_srcptr x0, ulong norm)
98 {
99 pp1_mulmod(x, x, y, nn, n, ninv, norm);
100 if (mpn_sub_n(x, x, x0, nn))
101 mpn_add_n(x, x, n, nn);
102
103 pp1_mulmod(y, y, y, nn, n, ninv, norm);
104 if (mpn_sub_1(y, y, nn, UWORD(2) << norm))
105 mpn_add_n(y, y, n, nn);
106 }
107
pp1_pow_ui(mp_ptr x,mp_ptr y,mp_size_t nn,ulong exp,mp_srcptr n,mp_srcptr ninv,ulong norm)108 void pp1_pow_ui(mp_ptr x, mp_ptr y, mp_size_t nn,
109 ulong exp, mp_srcptr n, mp_srcptr ninv, ulong norm)
110 {
111 mp_limb_t t[30];
112 mp_ptr x0 = t;
113 ulong bit = ((UWORD(1) << FLINT_BIT_COUNT(exp)) >> 2);
114
115 if (nn > 30)
116 x0 = flint_malloc(nn*sizeof(mp_limb_t));
117 flint_mpn_copyi(x0, x, nn);
118
119 pp1_mulmod(y, x, x, nn, n, ninv, norm);
120 if (mpn_sub_1(y, y, nn, UWORD(2) << norm))
121 mpn_add_n(y, y, n, nn);
122
123 while (bit)
124 {
125 if (exp & bit)
126 pp1_2kp1(x, y, nn, n, ninv, x0, norm);
127 else
128 pp1_2k(x, y, nn, n, ninv, x0, norm);
129
130 bit >>= 1;
131 }
132
133 if (nn > 30)
134 flint_free(x0);
135 }
136
pp1_factor(mp_ptr factor,mp_srcptr n,mp_srcptr x,mp_size_t nn,ulong norm)137 mp_size_t pp1_factor(mp_ptr factor, mp_srcptr n,
138 mp_srcptr x, mp_size_t nn, ulong norm)
139 {
140 mp_size_t ret = 0, xn = nn;
141
142 mp_ptr n2 = flint_malloc(nn*sizeof(mp_limb_t));
143 mp_ptr x2 = flint_malloc(nn*sizeof(mp_limb_t));
144
145 if (norm)
146 mpn_rshift(n2, n, nn, norm);
147 else
148 flint_mpn_copyi(n2, n, nn);
149
150 if (norm)
151 mpn_rshift(x2, x, nn, norm);
152 else
153 flint_mpn_copyi(x2, x, nn);
154
155 if (mpn_sub_1(x2, x2, nn, 2))
156 mpn_add_n(x2, x2, n2, nn);
157
158 MPN_NORM(x2, xn);
159
160 if (xn == 0)
161 goto cleanup;
162
163 ret = flint_mpn_gcd_full(factor, n2, nn, x2, xn);
164
165 cleanup:
166
167 flint_free(n2);
168 flint_free(x2);
169
170 return ret;
171 }
172
pp1_find_power(mp_ptr factor,mp_ptr x,mp_ptr y,mp_size_t nn,ulong p,mp_srcptr n,mp_srcptr ninv,ulong norm)173 mp_size_t pp1_find_power(mp_ptr factor, mp_ptr x, mp_ptr y, mp_size_t nn,
174 ulong p, mp_srcptr n, mp_srcptr ninv, ulong norm)
175 {
176 mp_size_t ret;
177
178 do
179 {
180 pp1_pow_ui(x, y, nn, p, n, ninv, norm);
181 ret = pp1_factor(factor, n, x, nn, norm);
182 } while (ret == 1 && factor[0] == 1);
183
184 return ret;
185 }
186
fmpz_factor_pp1(fmpz_t fac,const fmpz_t n_in,ulong B1,ulong B2sqrt,ulong c)187 int fmpz_factor_pp1(fmpz_t fac, const fmpz_t n_in, ulong B1, ulong B2sqrt, ulong c)
188 {
189 slong i, j;
190 int ret = 0;
191 mp_size_t nn = fmpz_size(n_in), r;
192 mp_ptr x, y, oldx, oldy, n, ninv, factor, ptr_0, ptr_1, ptr_2, ptr_k;
193 ulong pr, oldpr, sqrt, bits0, norm;
194 n_primes_t iter;
195
196 if (fmpz_is_even(n_in))
197 {
198 fmpz_set_ui(fac, 2);
199 return 1;
200 }
201
202 #if DEBUG
203 flint_printf("starting stage 1\n");
204 #endif
205
206 n_primes_init(iter);
207
208 sqrt = n_sqrt(B1);
209 bits0 = FLINT_BIT_COUNT(B1);
210
211 x = flint_malloc(nn*sizeof(mp_limb_t));
212 y = flint_malloc(nn*sizeof(mp_limb_t));
213 oldx = flint_malloc(nn*sizeof(mp_limb_t));
214 oldy = flint_malloc(nn*sizeof(mp_limb_t));
215 n = flint_malloc(nn*sizeof(mp_limb_t));
216 ninv = flint_malloc(nn*sizeof(mp_limb_t));
217 factor = flint_malloc(nn*sizeof(mp_limb_t));
218
219 if (nn == 1)
220 {
221 n[0] = fmpz_get_ui(n_in);
222 count_leading_zeros(norm, n[0]);
223 n[0] <<= norm;
224 } else
225 {
226 mp_ptr np = COEFF_TO_PTR(*n_in)->_mp_d;
227 count_leading_zeros(norm, np[nn - 1]);
228 if (norm)
229 mpn_lshift(n, np, nn, norm);
230 else
231 flint_mpn_copyi(n, np, nn);
232 }
233
234 flint_mpn_preinvn(ninv, n, nn);
235
236 pp1_set_ui(x, nn, norm, c);
237
238 /* mul by various prime powers */
239 pr = 0;
240 oldpr = 0;
241
242 for (i = 0; pr < B1; )
243 {
244 j = i + 1024;
245 oldpr = pr;
246 pp1_set(oldx, oldy, x, y, nn);
247 for ( ; i < j; i++)
248 {
249 pr = n_primes_next(iter);
250 if (pr < sqrt)
251 {
252 ulong bits = FLINT_BIT_COUNT(pr);
253 ulong exp = bits0 / bits;
254 pp1_pow_ui(x, y, nn, n_pow(pr, exp), n, ninv, norm);
255 } else
256 pp1_pow_ui(x, y, nn, pr, n, ninv, norm);
257 }
258
259 r = pp1_factor(factor, n, x, nn, norm);
260 if (r == 0)
261 break;
262 if (r != 1 || factor[0] != 1)
263 {
264 ret = 1;
265 goto cleanup;
266 }
267 }
268
269 if (pr < B1) /* factor = 0 */
270 {
271 n_primes_jump_after(iter, oldpr);
272 pp1_set(x, y, oldx, oldy, nn);
273
274 do
275 {
276 pr = n_primes_next(iter);
277 pp1_set(oldx, oldy, x, y, nn);
278 if (pr < sqrt)
279 {
280 ulong bits = FLINT_BIT_COUNT(pr);
281 ulong exp = bits0 / bits;
282 pp1_pow_ui(x, y, nn, n_pow(pr, exp), n, ninv, norm);
283 } else
284 pp1_pow_ui(x, y, nn, pr, n, ninv, norm);
285
286 r = pp1_factor(factor, n, x, nn, norm);
287 if (r == 0)
288 break;
289 if (r != 1 || factor[0] != 1)
290 {
291 ret = 1;
292 goto cleanup;
293 }
294 } while (1);
295
296 /* factor is still 0 */
297 ret = pp1_find_power(factor, oldx, oldy, nn, pr, n, ninv, norm);
298 } else /* stage 2 */
299 {
300 double quot;
301 int num;
302 char * sieve = flint_malloc(32768);
303 slong * sieve_index = flint_malloc(32768*sizeof(slong));
304 mp_ptr diff = flint_malloc(16384*nn*sizeof(mp_limb_t));
305 ulong offset[15], num_roots;
306 slong k, index = 0, s;
307 fmpz * roots, * roots2, * evals;
308 fmpz_poly_struct ** tree, ** tree2;
309
310 #if DEBUG
311 ulong primorial;
312 flint_printf("starting stage 2\n");
313 #endif
314
315 /* find primorial <= B2sqrt ... */
316 for (num = 1; num < num_primorials; num++)
317 {
318 if (pp1_primorial[num] > B2sqrt)
319 break;
320 }
321 num--;
322
323 /* ... but not too big */
324 quot = (double) B2sqrt / (double) pp1_primorial[num];
325 if (quot < 1.1 && num > 0)
326 num--;
327
328 #if DEBUG
329 primorial = pp1_primorial[num];
330 flint_printf("found primorial %wu\n", primorial);
331 #endif
332
333 /* adjust B2sqrt to multiple of primorial */
334 B2sqrt = (((B2sqrt - 1)/ pp1_primorial[num]) + 1) * pp1_primorial[num];
335
336 #if DEBUG
337 flint_printf("adjusted B2sqrt %wu\n", B2sqrt);
338 #endif
339
340 /* compute num roots */
341 num++; /* number of primes is 1 more than primorial index */
342 pr = 2;
343 num_roots = B2sqrt;
344 for (i = 0; i < num; i++)
345 {
346 num_roots = (num_roots*(pr - 1))/pr;
347 pr = n_nextprime(pr, 0);
348 }
349
350 #if DEBUG
351 flint_printf("computed num_roots %wu\n", num_roots);
352 flint_printf("B2 = %wu\n", num_roots * B2sqrt);
353 #endif
354
355 /* construct roots */
356 roots = _fmpz_vec_init(num_roots);
357 for (i = 0; i < num_roots; i++)
358 {
359 __mpz_struct * m = _fmpz_promote(roots + i);
360 mpz_realloc(m, nn);
361 }
362
363 roots2 = _fmpz_vec_init(num_roots);
364 for (i = 0; i < num_roots; i++)
365 {
366 __mpz_struct * m = _fmpz_promote(roots2 + i);
367 mpz_realloc(m, nn);
368 }
369
370 evals = _fmpz_vec_init(num_roots);
371
372 #if DEBUG
373 flint_printf("constructed roots\n");
374 #endif
375
376 /* compute differences table v0, ... */
377 mpn_zero(diff, nn);
378 diff[0] = (UWORD(2) << norm);
379
380 /* ... v2, ... */
381 pp1_mulmod(diff + nn, x, x, nn, n, ninv, norm);
382 if (mpn_sub_1(diff + nn, diff + nn, nn, UWORD(2) << norm))
383 mpn_add_n(diff + nn, diff + nn, n, nn);
384
385 /* ... the rest ... v_{k+2} = v_k v_2 - v_{k-2} */
386 k = 2*nn;
387 for (i = 2; i < 16384; i++, k += nn)
388 {
389 pp1_mulmod(diff + k, diff + k - nn, diff + nn, nn, n, ninv, norm);
390 if (mpn_sub_n(diff + k, diff + k, diff + k - 2*nn, nn))
391 mpn_add_n(diff + k, diff + k, n, nn);
392 }
393
394 #if DEBUG
395 flint_printf("conputed differences table\n");
396 #endif
397
398 /* initial positions */
399 pr = 2;
400 for (i = 0; i < num; i++)
401 {
402 offset[i] = pr/2;
403 pr = n_nextprime(pr, 0);
404 }
405
406 s = 0;
407 while (2*s + 1 < B2sqrt)
408 {
409 /* sieve */
410 memset(sieve, 1, 32768);
411 pr = 3;
412 for (i = 1; i < num; i++)
413 {
414 j = offset[i];
415 while (j < 32768)
416 sieve[j] = 0, j += pr;
417
418 /* store offset for start of next sieve run */
419 offset[i] = j - 32768;
420 pr = n_nextprime(pr, 0);
421 }
422
423 /* compute roots */
424 for (i = 0; i < 32768 && 2*(s + i) + 1 < B2sqrt; i++)
425 {
426 if (sieve[i])
427 {
428 ptr_2 = COEFF_TO_PTR(roots[index])->_mp_d;
429 k = (i + 1)/2;
430 for (j = i - 1; j >= k; j--)
431 {
432 if (sieve[j] && sieve[2*j - i])
433 {
434 /* V_{n+k} = V_n V_k - V_{n-k} */
435 ptr_0 = COEFF_TO_PTR(roots[sieve_index[2*j - i]])->_mp_d;
436 ptr_1 = COEFF_TO_PTR(roots[sieve_index[j]])->_mp_d;
437 ptr_k = diff + (i - j)*nn;
438 pp1_mulmod(ptr_2, ptr_1, ptr_k, nn, n, ninv, norm);
439 if (mpn_sub_n(ptr_2, ptr_2, ptr_0, nn))
440 mpn_add_n(ptr_2, ptr_2, n, nn);
441 break;
442 }
443 }
444
445 if (j < k) /* pair not found, compute using pow_ui */
446 {
447 flint_mpn_copyi(ptr_2, x, nn);
448 pp1_pow_ui(ptr_2, y, nn, 2*(s + i) + 1, n, ninv, norm);
449 }
450
451 sieve_index[i] = index;
452 index++;
453 }
454 }
455
456 s += 32768;
457 }
458
459 #if DEBUG
460 flint_printf("roots computed %wd\n", index);
461 #endif
462
463 /* v_1 */
464 flint_mpn_copyi(oldx, x, nn);
465 pp1_pow_ui(oldx, y, nn, B2sqrt, n, ninv, norm);
466 ptr_0 = COEFF_TO_PTR(roots2[0])->_mp_d;
467 flint_mpn_copyi(ptr_0, oldx, nn);
468
469 /* v_2 */
470 ptr_1 = COEFF_TO_PTR(roots2[1])->_mp_d;
471 pp1_mulmod(ptr_1, ptr_0, ptr_0, nn, n, ninv, norm);
472 if (mpn_sub_1(ptr_1, ptr_1, nn, UWORD(2) << norm))
473 mpn_add_n(ptr_1, ptr_1, n, nn);
474
475 for (i = 2; i < num_roots; i++)
476 {
477 /* V_{k+n} = V_k V_n - V_{k-n} */
478 ptr_2 = COEFF_TO_PTR(roots2[i])->_mp_d;
479
480 pp1_mulmod(ptr_2, ptr_1, oldx, nn, n, ninv, norm);
481 if (mpn_sub_n(ptr_2, ptr_2, ptr_0, nn))
482 mpn_add_n(ptr_2, ptr_2, n, nn);
483
484 ptr_0 = ptr_1;
485 ptr_1 = ptr_2;
486 }
487
488 #if DEBUG
489 flint_printf("roots2 computed %wu\n", num_roots);
490 #endif
491
492 for (i = 0; i < num_roots; i++)
493 {
494 mp_size_t sn;
495 __mpz_struct * m1 = COEFF_TO_PTR(roots[i]);
496 __mpz_struct * m2 = COEFF_TO_PTR(roots2[i]);
497
498 ptr_1 = m1->_mp_d;
499 ptr_2 = m2->_mp_d;
500
501 if (norm)
502 {
503 mpn_rshift(ptr_1, ptr_1, nn, norm);
504 mpn_rshift(ptr_2, ptr_2, nn, norm);
505 }
506
507 sn = nn;
508 MPN_NORM(ptr_1, sn);
509 m1->_mp_size = sn;
510
511 sn = nn;
512 MPN_NORM(ptr_2, sn);
513 m2->_mp_size = sn;
514
515 _fmpz_demote_val(roots + i);
516 _fmpz_demote_val(roots2 + i);
517 }
518
519 #if DEBUG
520 flint_printf("normalised roots\n");
521 #endif
522
523 tree = _fmpz_mod_poly_tree_alloc(num_roots);
524 _fmpz_mod_poly_tree_build(tree, roots, num_roots, n_in);
525
526 tree2 = _fmpz_mod_poly_tree_alloc(num_roots);
527 _fmpz_mod_poly_tree_build(tree2, roots2, num_roots, n_in);
528
529 fmpz_poly_mul(tree2[FLINT_CLOG2(num_roots)], tree2[FLINT_CLOG2(num_roots)-1], tree2[FLINT_CLOG2(num_roots)-1]+1);
530
531 #if DEBUG
532 flint_printf("built trees\n");
533 #endif
534
535 _fmpz_mod_poly_evaluate_fmpz_vec_fast_precomp(evals, tree2[FLINT_CLOG2(num_roots)]->coeffs, tree2[FLINT_CLOG2(num_roots)]->length, tree, num_roots, n_in);
536 _fmpz_mod_poly_tree_free(tree, num_roots);
537 _fmpz_mod_poly_tree_free(tree2, num_roots);
538
539 #if DEBUG
540 flint_printf("evaluated at roots\n");
541 #endif
542
543 for (i = 0; i < num_roots; i++)
544 {
545 fmpz_gcd(fac, n_in, evals + i);
546 if (!fmpz_is_zero(fac) && !fmpz_is_one(fac))
547 {
548 ret = 1;
549 break;
550 }
551 }
552
553 _fmpz_vec_clear(evals, num_roots);
554 _fmpz_vec_clear(roots, num_roots);
555 _fmpz_vec_clear(roots2, num_roots);
556 flint_free(sieve);
557 flint_free(sieve_index);
558 flint_free(diff);
559
560 if (i < num_roots)
561 goto cleanup2;
562 }
563
564 #if DEBUG
565 flint_printf("done stage2\n");
566 #endif
567
568 cleanup:
569
570 if (ret)
571 {
572 __mpz_struct * fm = _fmpz_promote(fac);
573 mpz_realloc(fm, r);
574 flint_mpn_copyi(fm->_mp_d, factor, r);
575 fm->_mp_size = r;
576 _fmpz_demote_val(fac);
577 }
578
579 cleanup2:
580
581 flint_free(x);
582 flint_free(y);
583 flint_free(oldx);
584 flint_free(oldy);
585 flint_free(n);
586 flint_free(ninv);
587 flint_free(factor);
588
589 n_primes_clear(iter);
590
591 return ret;
592 }
593