1 /*
2     Copyright (C) 2012 William Hart
3 
4     This file is part of FLINT.
5 
6     FLINT is free software: you can redistribute it and/or modify it under
7     the terms of the GNU Lesser General Public License (LGPL) as published
8     by the Free Software Foundation; either version 2.1 of the License, or
9     (at your option) any later version.  See <http://www.gnu.org/licenses/>.
10 */
11 
12 #define ulong ulongxx /* interferes with system includes */
13 #include <string.h>
14 #undef ulong
15 #define ulong mp_limb_t
16 #include <gmp.h>
17 #include "flint.h"
18 #include "fmpz.h"
19 #include "fmpz_vec.h"
20 #include "fmpz_mod_poly.h"
21 #include "mpn_extras.h"
22 #include "ulong_extras.h"
23 
24 #define DEBUG 0 /* turn on some trace information */
25 
26 #define pp1_mulmod(rxx, axx, bxx, nnn, nxx, ninv, norm)             \
27    flint_mpn_mulmod_preinvn(rxx, axx, bxx, nnn, nxx, ninv, norm)
28 
29 #ifdef FLINT64
30 static
31 ulong pp1_primorial[15] =
32 {
33    UWORD(2), UWORD(6), UWORD(30), UWORD(210), UWORD(2310), UWORD(30030), UWORD(510510), UWORD(9699690),
34    UWORD(223092870), UWORD(6469693230), UWORD(200560490130), UWORD(7420738134810),
35    UWORD(304250263527210), UWORD(13082761331670030), UWORD(614889782588491410)
36 };
37 #define num_primorials 15
38 #else
39 static
40 ulong pp1_primorial[9] =
41 {
42    UWORD(2), UWORD(6), UWORD(30), UWORD(210), UWORD(2310), UWORD(30030), UWORD(510510), UWORD(9699690),
43    UWORD(223092870)
44 };
45 #define num_primorials 9
46 #endif
47 
pp1_set(mp_ptr x1,mp_ptr y1,mp_srcptr x2,mp_srcptr y2,mp_size_t nn)48 void pp1_set(mp_ptr x1, mp_ptr y1,
49               mp_srcptr x2, mp_srcptr y2, mp_size_t nn)
50 {
51    flint_mpn_copyi(x1, x2, nn);
52    flint_mpn_copyi(y1, y2, nn);
53 }
54 
pp1_set_ui(mp_ptr x,mp_size_t nn,ulong norm,ulong c)55 void pp1_set_ui(mp_ptr x, mp_size_t nn, ulong norm, ulong c)
56 {
57    mpn_zero(x, nn);
58    x[0] = (c << norm);
59    if (nn > 1 && norm)
60       x[1] = (c >> (FLINT_BITS - norm));
61 }
62 
pp1_print(mp_srcptr x,mp_srcptr y,mp_size_t nn,ulong norm)63 void pp1_print(mp_srcptr x, mp_srcptr y, mp_size_t nn, ulong norm)
64 {
65    mp_ptr tx = flint_malloc(nn*sizeof(mp_limb_t));
66    mp_ptr ty = flint_malloc(nn*sizeof(mp_limb_t));
67 
68    if (norm)
69    {
70       mpn_rshift(tx, x, nn, norm);
71       mpn_rshift(ty, y, nn, norm);
72    } else
73    {
74        flint_mpn_copyi(tx, x, nn);
75        flint_mpn_copyi(ty, y, nn);
76    }
77 
78    flint_printf("["), gmp_printf("%Nd", tx, nn), flint_printf(", "), gmp_printf("%Nd", ty, nn), flint_printf("]");
79 
80    flint_free(tx);
81    flint_free(ty);
82 }
83 
pp1_2k(mp_ptr x,mp_ptr y,mp_size_t nn,mp_srcptr n,mp_srcptr ninv,mp_srcptr x0,ulong norm)84 void pp1_2k(mp_ptr x, mp_ptr y, mp_size_t nn, mp_srcptr n,
85             mp_srcptr ninv, mp_srcptr x0, ulong norm)
86 {
87    pp1_mulmod(y, y, x, nn, n, ninv, norm);
88    if (mpn_sub_n(y, y, x0, nn))
89       mpn_add_n(y, y, n, nn);
90 
91    pp1_mulmod(x, x, x, nn, n, ninv, norm);
92    if (mpn_sub_1(x, x, nn, UWORD(2) << norm))
93       mpn_add_n(x, x, n, nn);
94 }
95 
pp1_2kp1(mp_ptr x,mp_ptr y,mp_size_t nn,mp_srcptr n,mp_srcptr ninv,mp_srcptr x0,ulong norm)96 void pp1_2kp1(mp_ptr x, mp_ptr y, mp_size_t nn, mp_srcptr n,
97               mp_srcptr ninv, mp_srcptr x0, ulong norm)
98 {
99    pp1_mulmod(x, x, y, nn, n, ninv, norm);
100    if (mpn_sub_n(x, x, x0, nn))
101       mpn_add_n(x, x, n, nn);
102 
103    pp1_mulmod(y, y, y, nn, n, ninv, norm);
104    if (mpn_sub_1(y, y, nn, UWORD(2) << norm))
105       mpn_add_n(y, y, n, nn);
106 }
107 
pp1_pow_ui(mp_ptr x,mp_ptr y,mp_size_t nn,ulong exp,mp_srcptr n,mp_srcptr ninv,ulong norm)108 void pp1_pow_ui(mp_ptr x, mp_ptr y, mp_size_t nn,
109                 ulong exp, mp_srcptr n, mp_srcptr ninv, ulong norm)
110 {
111    mp_limb_t t[30];
112    mp_ptr x0 = t;
113    ulong bit = ((UWORD(1) << FLINT_BIT_COUNT(exp)) >> 2);
114 
115    if (nn > 30)
116       x0 = flint_malloc(nn*sizeof(mp_limb_t));
117    flint_mpn_copyi(x0, x, nn);
118 
119    pp1_mulmod(y, x, x, nn, n, ninv, norm);
120    if (mpn_sub_1(y, y, nn, UWORD(2) << norm))
121       mpn_add_n(y, y, n, nn);
122 
123    while (bit)
124    {
125       if (exp & bit)
126          pp1_2kp1(x, y, nn, n, ninv, x0, norm);
127       else
128          pp1_2k(x, y, nn, n, ninv, x0, norm);
129 
130       bit >>= 1;
131    }
132 
133    if (nn > 30)
134       flint_free(x0);
135 }
136 
pp1_factor(mp_ptr factor,mp_srcptr n,mp_srcptr x,mp_size_t nn,ulong norm)137 mp_size_t pp1_factor(mp_ptr factor, mp_srcptr n,
138                      mp_srcptr x, mp_size_t nn, ulong norm)
139 {
140    mp_size_t ret = 0, xn = nn;
141 
142    mp_ptr n2 = flint_malloc(nn*sizeof(mp_limb_t));
143    mp_ptr x2 = flint_malloc(nn*sizeof(mp_limb_t));
144 
145    if (norm)
146       mpn_rshift(n2, n, nn, norm);
147    else
148       flint_mpn_copyi(n2, n, nn);
149 
150    if (norm)
151       mpn_rshift(x2, x, nn, norm);
152    else
153       flint_mpn_copyi(x2, x, nn);
154 
155    if (mpn_sub_1(x2, x2, nn, 2))
156       mpn_add_n(x2, x2, n2, nn);
157 
158    MPN_NORM(x2, xn);
159 
160    if (xn == 0)
161       goto cleanup;
162 
163    ret = flint_mpn_gcd_full(factor, n2, nn, x2, xn);
164 
165 cleanup:
166 
167    flint_free(n2);
168    flint_free(x2);
169 
170    return ret;
171 }
172 
pp1_find_power(mp_ptr factor,mp_ptr x,mp_ptr y,mp_size_t nn,ulong p,mp_srcptr n,mp_srcptr ninv,ulong norm)173 mp_size_t pp1_find_power(mp_ptr factor, mp_ptr x, mp_ptr y, mp_size_t nn,
174                           ulong p, mp_srcptr n, mp_srcptr ninv, ulong norm)
175 {
176    mp_size_t ret;
177 
178    do
179    {
180       pp1_pow_ui(x, y, nn, p, n, ninv, norm);
181       ret = pp1_factor(factor, n, x, nn, norm);
182    } while (ret == 1 && factor[0] == 1);
183 
184    return ret;
185 }
186 
fmpz_factor_pp1(fmpz_t fac,const fmpz_t n_in,ulong B1,ulong B2sqrt,ulong c)187 int fmpz_factor_pp1(fmpz_t fac, const fmpz_t n_in, ulong B1, ulong B2sqrt, ulong c)
188 {
189    slong i, j;
190    int ret = 0;
191    mp_size_t nn = fmpz_size(n_in), r;
192    mp_ptr x, y, oldx, oldy, n, ninv, factor, ptr_0, ptr_1, ptr_2, ptr_k;
193    ulong pr, oldpr, sqrt, bits0, norm;
194    n_primes_t iter;
195 
196    if (fmpz_is_even(n_in))
197    {
198       fmpz_set_ui(fac, 2);
199       return 1;
200    }
201 
202 #if DEBUG
203    flint_printf("starting stage 1\n");
204 #endif
205 
206    n_primes_init(iter);
207 
208    sqrt = n_sqrt(B1);
209    bits0 = FLINT_BIT_COUNT(B1);
210 
211    x      = flint_malloc(nn*sizeof(mp_limb_t));
212    y      = flint_malloc(nn*sizeof(mp_limb_t));
213    oldx   = flint_malloc(nn*sizeof(mp_limb_t));
214    oldy   = flint_malloc(nn*sizeof(mp_limb_t));
215    n      = flint_malloc(nn*sizeof(mp_limb_t));
216    ninv   = flint_malloc(nn*sizeof(mp_limb_t));
217    factor = flint_malloc(nn*sizeof(mp_limb_t));
218 
219    if (nn == 1)
220    {
221       n[0] = fmpz_get_ui(n_in);
222       count_leading_zeros(norm, n[0]);
223       n[0] <<= norm;
224    } else
225    {
226       mp_ptr np = COEFF_TO_PTR(*n_in)->_mp_d;
227       count_leading_zeros(norm, np[nn - 1]);
228       if (norm)
229          mpn_lshift(n, np, nn, norm);
230       else
231          flint_mpn_copyi(n, np, nn);
232    }
233 
234    flint_mpn_preinvn(ninv, n, nn);
235 
236    pp1_set_ui(x, nn, norm, c);
237 
238    /* mul by various prime powers */
239    pr = 0;
240    oldpr = 0;
241 
242    for (i = 0; pr < B1; )
243    {
244       j = i + 1024;
245       oldpr = pr;
246       pp1_set(oldx, oldy, x, y, nn);
247       for ( ; i < j; i++)
248       {
249          pr = n_primes_next(iter);
250          if (pr < sqrt)
251          {
252             ulong bits = FLINT_BIT_COUNT(pr);
253             ulong exp = bits0 / bits;
254             pp1_pow_ui(x, y, nn, n_pow(pr, exp), n, ninv, norm);
255          } else
256             pp1_pow_ui(x, y, nn, pr, n, ninv, norm);
257       }
258 
259       r = pp1_factor(factor, n, x, nn, norm);
260       if (r == 0)
261          break;
262       if (r != 1 || factor[0] != 1)
263       {
264          ret = 1;
265          goto cleanup;
266       }
267    }
268 
269    if (pr < B1) /* factor = 0 */
270    {
271       n_primes_jump_after(iter, oldpr);
272       pp1_set(x, y, oldx, oldy, nn);
273 
274       do
275       {
276          pr = n_primes_next(iter);
277          pp1_set(oldx, oldy, x, y, nn);
278          if (pr < sqrt)
279          {
280             ulong bits = FLINT_BIT_COUNT(pr);
281             ulong exp = bits0 / bits;
282             pp1_pow_ui(x, y, nn, n_pow(pr, exp), n, ninv, norm);
283          } else
284             pp1_pow_ui(x, y, nn, pr, n, ninv, norm);
285 
286          r = pp1_factor(factor, n, x, nn, norm);
287          if (r == 0)
288             break;
289          if (r != 1 || factor[0] != 1)
290          {
291             ret = 1;
292             goto cleanup;
293          }
294       } while (1);
295 
296       /* factor is still 0 */
297       ret = pp1_find_power(factor, oldx, oldy, nn, pr, n, ninv, norm);
298    } else /* stage 2 */
299    {
300       double quot;
301       int num;
302       char * sieve = flint_malloc(32768);
303       slong * sieve_index = flint_malloc(32768*sizeof(slong));
304       mp_ptr diff = flint_malloc(16384*nn*sizeof(mp_limb_t));
305       ulong offset[15], num_roots;
306       slong k, index = 0, s;
307       fmpz * roots, * roots2, * evals;
308       fmpz_poly_struct ** tree, ** tree2;
309 
310 #if DEBUG
311       ulong primorial;
312       flint_printf("starting stage 2\n");
313 #endif
314 
315       /* find primorial <= B2sqrt ... */
316       for (num = 1; num < num_primorials; num++)
317       {
318          if (pp1_primorial[num] > B2sqrt)
319             break;
320       }
321       num--;
322 
323       /* ... but not too big */
324       quot = (double) B2sqrt / (double) pp1_primorial[num];
325       if (quot < 1.1 && num > 0)
326          num--;
327 
328 #if DEBUG
329       primorial = pp1_primorial[num];
330       flint_printf("found primorial %wu\n", primorial);
331 #endif
332 
333       /* adjust B2sqrt to multiple of primorial */
334       B2sqrt = (((B2sqrt - 1)/ pp1_primorial[num]) + 1) * pp1_primorial[num];
335 
336 #if DEBUG
337       flint_printf("adjusted B2sqrt %wu\n", B2sqrt);
338 #endif
339 
340       /* compute num roots */
341       num++; /* number of primes is 1 more than primorial index */
342       pr = 2;
343       num_roots = B2sqrt;
344       for (i = 0; i < num; i++)
345       {
346          num_roots = (num_roots*(pr - 1))/pr;
347          pr = n_nextprime(pr, 0);
348       }
349 
350 #if DEBUG
351       flint_printf("computed num_roots %wu\n", num_roots);
352       flint_printf("B2 = %wu\n", num_roots * B2sqrt);
353 #endif
354 
355       /* construct roots */
356       roots = _fmpz_vec_init(num_roots);
357       for (i = 0; i < num_roots; i++)
358       {
359          __mpz_struct * m = _fmpz_promote(roots + i);
360          mpz_realloc(m, nn);
361       }
362 
363       roots2 = _fmpz_vec_init(num_roots);
364       for (i = 0; i < num_roots; i++)
365       {
366          __mpz_struct * m = _fmpz_promote(roots2 + i);
367          mpz_realloc(m, nn);
368       }
369 
370       evals = _fmpz_vec_init(num_roots);
371 
372 #if DEBUG
373       flint_printf("constructed roots\n");
374 #endif
375 
376       /* compute differences table v0, ... */
377       mpn_zero(diff, nn);
378       diff[0] = (UWORD(2) << norm);
379 
380       /* ... v2, ... */
381       pp1_mulmod(diff + nn, x, x, nn, n, ninv, norm);
382       if (mpn_sub_1(diff + nn, diff + nn, nn, UWORD(2) << norm))
383          mpn_add_n(diff + nn, diff + nn, n, nn);
384 
385       /* ... the rest ... v_{k+2} = v_k v_2 - v_{k-2} */
386       k = 2*nn;
387       for (i = 2; i < 16384; i++, k += nn)
388       {
389          pp1_mulmod(diff + k, diff + k - nn, diff + nn, nn, n, ninv, norm);
390          if (mpn_sub_n(diff + k, diff + k, diff + k - 2*nn, nn))
391             mpn_add_n(diff + k, diff + k, n, nn);
392       }
393 
394 #if DEBUG
395       flint_printf("conputed differences table\n");
396 #endif
397 
398       /* initial positions */
399       pr = 2;
400       for (i = 0; i < num; i++)
401       {
402          offset[i] = pr/2;
403          pr = n_nextprime(pr, 0);
404       }
405 
406       s = 0;
407       while (2*s + 1 < B2sqrt)
408       {
409          /* sieve */
410          memset(sieve, 1, 32768);
411          pr = 3;
412          for (i = 1; i < num; i++)
413          {
414             j = offset[i];
415             while (j < 32768)
416                sieve[j] = 0, j += pr;
417 
418             /* store offset for start of next sieve run */
419             offset[i] = j - 32768;
420             pr = n_nextprime(pr, 0);
421          }
422 
423          /* compute roots */
424          for (i = 0; i < 32768 && 2*(s + i) + 1 < B2sqrt; i++)
425          {
426             if (sieve[i])
427             {
428                ptr_2 = COEFF_TO_PTR(roots[index])->_mp_d;
429                k = (i + 1)/2;
430                for (j = i - 1; j >= k; j--)
431                {
432                   if (sieve[j] && sieve[2*j - i])
433                   {
434                      /* V_{n+k} = V_n V_k - V_{n-k} */
435                      ptr_0 = COEFF_TO_PTR(roots[sieve_index[2*j - i]])->_mp_d;
436                      ptr_1 = COEFF_TO_PTR(roots[sieve_index[j]])->_mp_d;
437                      ptr_k = diff + (i - j)*nn;
438                      pp1_mulmod(ptr_2, ptr_1, ptr_k, nn, n, ninv, norm);
439                      if (mpn_sub_n(ptr_2, ptr_2, ptr_0, nn))
440                         mpn_add_n(ptr_2, ptr_2, n, nn);
441                      break;
442                   }
443                }
444 
445                if (j < k) /* pair not found, compute using pow_ui */
446                {
447                   flint_mpn_copyi(ptr_2, x, nn);
448                   pp1_pow_ui(ptr_2, y, nn, 2*(s + i) + 1, n, ninv, norm);
449                }
450 
451                sieve_index[i] = index;
452                index++;
453             }
454          }
455 
456          s += 32768;
457       }
458 
459 #if DEBUG
460       flint_printf("roots computed %wd\n", index);
461 #endif
462 
463       /* v_1 */
464       flint_mpn_copyi(oldx, x, nn);
465       pp1_pow_ui(oldx, y, nn, B2sqrt, n, ninv, norm);
466       ptr_0 = COEFF_TO_PTR(roots2[0])->_mp_d;
467       flint_mpn_copyi(ptr_0, oldx, nn);
468 
469       /* v_2 */
470       ptr_1 = COEFF_TO_PTR(roots2[1])->_mp_d;
471       pp1_mulmod(ptr_1, ptr_0, ptr_0, nn, n, ninv, norm);
472       if (mpn_sub_1(ptr_1, ptr_1, nn, UWORD(2) << norm))
473          mpn_add_n(ptr_1, ptr_1, n, nn);
474 
475       for (i = 2; i < num_roots; i++)
476       {
477          /* V_{k+n} = V_k V_n - V_{k-n} */
478          ptr_2 = COEFF_TO_PTR(roots2[i])->_mp_d;
479 
480          pp1_mulmod(ptr_2, ptr_1, oldx, nn, n, ninv, norm);
481          if (mpn_sub_n(ptr_2, ptr_2, ptr_0, nn))
482              mpn_add_n(ptr_2, ptr_2, n, nn);
483 
484          ptr_0 = ptr_1;
485          ptr_1 = ptr_2;
486       }
487 
488 #if DEBUG
489       flint_printf("roots2 computed %wu\n", num_roots);
490 #endif
491 
492       for (i = 0; i < num_roots; i++)
493       {
494          mp_size_t sn;
495          __mpz_struct * m1 = COEFF_TO_PTR(roots[i]);
496          __mpz_struct * m2 = COEFF_TO_PTR(roots2[i]);
497 
498          ptr_1 = m1->_mp_d;
499          ptr_2 = m2->_mp_d;
500 
501          if (norm)
502 	 {
503 	     mpn_rshift(ptr_1, ptr_1, nn, norm);
504              mpn_rshift(ptr_2, ptr_2, nn, norm);
505 	 }
506 
507          sn = nn;
508          MPN_NORM(ptr_1, sn);
509          m1->_mp_size = sn;
510 
511          sn = nn;
512          MPN_NORM(ptr_2, sn);
513          m2->_mp_size = sn;
514 
515          _fmpz_demote_val(roots + i);
516          _fmpz_demote_val(roots2 + i);
517       }
518 
519 #if DEBUG
520       flint_printf("normalised roots\n");
521 #endif
522 
523       tree = _fmpz_mod_poly_tree_alloc(num_roots);
524       _fmpz_mod_poly_tree_build(tree, roots, num_roots, n_in);
525 
526       tree2 = _fmpz_mod_poly_tree_alloc(num_roots);
527       _fmpz_mod_poly_tree_build(tree2, roots2, num_roots, n_in);
528 
529       fmpz_poly_mul(tree2[FLINT_CLOG2(num_roots)], tree2[FLINT_CLOG2(num_roots)-1], tree2[FLINT_CLOG2(num_roots)-1]+1);
530 
531 #if DEBUG
532       flint_printf("built trees\n");
533 #endif
534 
535       _fmpz_mod_poly_evaluate_fmpz_vec_fast_precomp(evals, tree2[FLINT_CLOG2(num_roots)]->coeffs, tree2[FLINT_CLOG2(num_roots)]->length, tree, num_roots, n_in);
536       _fmpz_mod_poly_tree_free(tree, num_roots);
537       _fmpz_mod_poly_tree_free(tree2, num_roots);
538 
539 #if DEBUG
540       flint_printf("evaluated at roots\n");
541 #endif
542 
543       for (i = 0; i < num_roots; i++)
544       {
545          fmpz_gcd(fac, n_in, evals + i);
546          if (!fmpz_is_zero(fac) && !fmpz_is_one(fac))
547          {
548             ret = 1;
549             break;
550          }
551       }
552 
553       _fmpz_vec_clear(evals, num_roots);
554       _fmpz_vec_clear(roots, num_roots);
555       _fmpz_vec_clear(roots2, num_roots);
556       flint_free(sieve);
557       flint_free(sieve_index);
558       flint_free(diff);
559 
560       if (i < num_roots)
561          goto cleanup2;
562    }
563 
564 #if DEBUG
565    flint_printf("done stage2\n");
566 #endif
567 
568 cleanup:
569 
570    if (ret)
571    {
572       __mpz_struct * fm = _fmpz_promote(fac);
573       mpz_realloc(fm, r);
574       flint_mpn_copyi(fm->_mp_d, factor, r);
575       fm->_mp_size = r;
576       _fmpz_demote_val(fac);
577    }
578 
579 cleanup2:
580 
581    flint_free(x);
582    flint_free(y);
583    flint_free(oldx);
584    flint_free(oldy);
585    flint_free(n);
586    flint_free(ninv);
587    flint_free(factor);
588 
589    n_primes_clear(iter);
590 
591    return ret;
592 }
593