1 #include "cado.h" // IWYU pragma: keep
2 
3 #include <cctype>          // for isspace, isdigit
4 #include <cerrno>          // for errno
5 #include <climits>         // for UINT_MAX
6 #include <cstdlib>         // for exit, EXIT_FAILURE, strtoul
7 #include <cstring>         // for strerror
8 #include <cstdarg>         // IWYU pragma: keep
9 #include <algorithm>       // for find, min
10 #include <memory>          // for allocator_traits<>::value_type
11 #include <vector>          // for vector, vector<>::iterator
12 #include <sstream>
13 #include <thread>                         // for thread
14 #include <gmp.h>           // for mpz_set, mpz_cmp_ui, mpz_t, mpz_cmp, gmp_r...
15 #include "gmp_aux.h"
16 #include "las-todo-list.hpp"
17 #include "las-galois.hpp"  // for skip_galois_roots
18 #include "macros.h"        // for ASSERT_ALWAYS
19 #include "mpz_poly.h"
20 #include "rootfinder.h" // mpz_poly_roots_ulong
21 #include "verbose.h"    // verbose_output_print
22 #include "params.h"
23 
24 /* Put in r the smallest legitimate special-q value that it at least
25  * s + diff (note that if s+diff is already legitimate, then r = s+diff
26  * will result.
27  * In case of composite sq, also store the factorization of r in fac_r
28  */
29 
next_legitimate_specialq(cxx_mpz & r,std::vector<uint64_t> & fac_r,cxx_mpz const & s,const unsigned long diff,las_todo_list const & L)30 static void next_legitimate_specialq(cxx_mpz & r, std::vector<uint64_t> & fac_r, cxx_mpz const & s, const unsigned long diff, las_todo_list const & L)
31 {
32     if (L.allow_composite_q) {
33         unsigned long tfac[64];
34         int nf = next_mpz_with_factor_constraints(r, tfac,
35                 s, diff, L.qfac_min, L.qfac_max);
36         fac_r.assign(tfac, tfac + nf);
37     } else {
38         mpz_add_ui(r, s, diff);
39         /* mpz_nextprime() returns a prime *greater than* its input argument,
40            which we don't always want, so we subtract 1 first. */
41         mpz_sub_ui(r, r, 1);
42         mpz_nextprime(r, r);
43     }
44 }
45 
next_legitimate_specialq(cxx_mpz & r,cxx_mpz const & s,const unsigned long diff,las_todo_list const & L)46 static void next_legitimate_specialq(cxx_mpz & r, cxx_mpz const & s, const unsigned long diff, las_todo_list const & L)
47 {
48     std::vector<uint64_t> t;
49     next_legitimate_specialq(r, t, s, diff, L);
50 }
51 
configure_switches(cxx_param_list & pl)52 void las_todo_list::configure_switches(cxx_param_list & pl)
53 {
54     param_list_configure_switch(pl, "-allow-compsq", NULL);
55     param_list_configure_switch(pl, "-print-todo-list", NULL);
56 }
57 
declare_usage(cxx_param_list & pl)58 void las_todo_list::declare_usage(cxx_param_list & pl)
59 {
60     param_list_decl_usage(pl, "sqside", "put special-q on this side");
61     param_list_decl_usage(pl, "q0",   "left bound of special-q range");
62     param_list_decl_usage(pl, "q1",   "right bound of special-q range");
63     param_list_decl_usage(pl, "rho",  "sieve only root r mod q0");
64     param_list_decl_usage(pl, "random-sample", "Sample this number of special-q's at random, within the range [q0,q1]");
65     param_list_decl_usage(pl, "nq", "Process this number of special-q's and stop");
66     param_list_decl_usage(pl, "todo", "provide file with a list of special-q to sieve instead of qrange");
67     param_list_decl_usage(pl, "allow-compsq", "allows composite special-q");
68     param_list_decl_usage(pl, "qfac-min", "factors of q must be at least that");
69     param_list_decl_usage(pl, "qfac-max", "factors of q must be at most that");
70     param_list_decl_usage(pl, "print-todo-list", "only print the special-q's to be sieved");
71 }
72 
~las_todo_list()73 las_todo_list::~las_todo_list()
74 {
75     if (todo_list_fd) {
76         fclose(todo_list_fd);
77         todo_list_fd = NULL;
78     }
79 }
80 
las_todo_list(cxx_cado_poly const & cpoly,cxx_param_list & pl)81 las_todo_list::las_todo_list(cxx_cado_poly const & cpoly, cxx_param_list & pl)
82     : cpoly(cpoly)
83 {
84     nq_pushed = 0;
85     nq_max = UINT_MAX;
86     random_sampling = 0;
87     if (param_list_parse_uint(pl, "random-sample", &nq_max)) {
88         random_sampling = 1;
89     } else if (param_list_parse_uint(pl, "nq", &nq_max)) {
90         if (param_list_lookup_string(pl, "rho")) {
91             fprintf(stderr, "Error: argument -nq is incompatible with -rho\n");
92             exit(EXIT_FAILURE);
93         }
94         if (param_list_lookup_string(pl, "q1"))
95             verbose_output_print(0, 1, "# Warning: argument -nq takes priority over -q1 ; -q1 ignored\n");
96     }
97 
98     sqside = 1;
99     if (!param_list_parse_int(pl, "sqside", &sqside)) {
100         verbose_output_print(0, 1, "# Warning: sqside not given, "
101                 "assuming side 1 for backward compatibility.\n");
102     }
103 
104     /* Init and parse info regarding work to be done by the siever */
105     /* Actual parsing of the command-line fragments is done within
106      * las_todo_feed, but this is an admittedly contrived way to work */
107     const char * filename = param_list_lookup_string(pl, "todo");
108     if (filename) {
109         todo_list_fd = fopen(filename, "r");
110         if (todo_list_fd == NULL) {
111             fprintf(stderr, "%s: %s\n", filename, strerror(errno));
112             /* There's no point in proceeding, since it would really change
113              * the behaviour of the program to do so */
114             exit(EXIT_FAILURE);
115         }
116     } else {
117         todo_list_fd = NULL;
118     }
119 
120     /* composite special-q ? Note: this block is present both in
121      * las-todo-list.cpp and las-info.cpp */
122     if ((allow_composite_q = param_list_parse_switch(pl, "-allow-compsq"))) {
123         /* defaults are set in the class description */
124         param_list_parse_uint64(pl, "qfac-min", &qfac_min);
125         param_list_parse_uint64(pl, "qfac-max", &qfac_max);
126     }
127 
128     galois = param_list_lookup_string(pl, "galois");
129 
130     if (allow_composite_q && galois) {
131         fprintf(stderr, "-galois and -allow-compsq are incompatible options at the moment");
132         exit(EXIT_FAILURE);
133     }
134 
135     print_todo_list_flag = param_list_parse_switch(pl, "-print-todo-list");
136 
137     /* It's not forbidden to miss -q0 */
138     param_list_parse_mpz(pl, "q0", q0);
139     param_list_parse_mpz(pl, "q1", q1);
140 
141     if (mpz_cmp_ui(q0, 0) == 0) {
142         if (!todo_list_fd) {
143             fprintf(stderr, "Error: Need either -todo or -q0\n");
144             exit(EXIT_FAILURE);
145         }
146         return;
147     }
148 
149     gmp_randstate_t rstate;
150     gmp_randinit_default(rstate);
151 
152     if (mpz_cmp_ui(q1, 0) != 0) {
153         next_legitimate_specialq(q0, q0, 0, *this);
154     } else {
155         /* We don't have -q1. If we have -rho, we sieve only <q0,
156          * rho>. */
157         cxx_mpz t;
158         if (param_list_parse_mpz(pl, "rho", (mpz_ptr) t)) {
159             cxx_mpz q0_cmdline = q0;
160             std::vector<uint64_t> fac_q;
161             next_legitimate_specialq(q0, fac_q, q0, 0, *this);
162             if (mpz_cmp(q0, q0_cmdline) != 0) {
163                 fprintf(stderr, "Error: q0 is not a legitimate special-q\n");
164                 exit(EXIT_FAILURE);
165             }
166             std::vector<cxx_mpz> roots = mpz_poly_roots(cpoly->pols[sqside], q0, fac_q, rstate);
167             if (std::find(roots.begin(), roots.end(), t) == roots.end()) {
168                 fprintf(stderr, "Error: rho is not a root modulo q0\n");
169                 exit(EXIT_FAILURE);
170             }
171             push_unlocked(q0, t, sqside);
172             /* Set empty interval [q0 + 1, q0] as special-q interval */
173             mpz_set(q1, q0);
174             mpz_add_ui (q0, q0, 1);
175         } else {
176             /* If we don't have -rho, we sieve only q0, but all roots of it.
177                If -q0 does not give a legitimate special-q value, advance to the
178                next legitimate one. */
179             mpz_set(t, q0);
180             next_legitimate_specialq(q0, q0, 0, *this);
181             mpz_set(q1, q0);
182         }
183     }
184 
185     if (random_sampling) {
186         if (mpz_cmp_ui(q0, 0) == 0 || mpz_cmp_ui(q1, 0) == 0) {
187             fprintf(stderr, "Error: --random-sample requires -q0 and -q1\n");
188             exit(EXIT_FAILURE);
189         }
190         /* For random sampling, it's important that for all integers in
191          * the range [q0, q1[, their nextprime() is within the range, and
192          * that at least one such has roots mod f. Make sure that
193          * this is the case.
194          */
195         cxx_mpz q, q1_orig = q1;
196         /* we need to know the limit of the q range */
197         for(unsigned long i = 1 ; ; i++) {
198             mpz_sub_ui(q, q1, i);
199             std::vector<uint64_t> fac_q;
200             next_legitimate_specialq(q, fac_q, q, 0, *this);
201             if (mpz_cmp(q, q1) >= 0)
202                 continue;
203             if (!mpz_poly_roots(cpoly->pols[sqside], q, fac_q, rstate).empty())
204                 break;
205             /* small optimization: avoid redoing root finding
206              * several times */
207             mpz_set (q1, q);
208             i = 1;
209         }
210         /* now q is the largest prime < q1 with f having roots mod q */
211         mpz_add_ui (q1, q, 1);
212 
213         /* so now if we pick x an integer in [q0, q1[, then nextprime(x-1)
214          * will be in [q0, q1_orig[, which is what we look for,
215          * really.
216          */
217         if (mpz_cmp(q0, q1) > 0) {
218             gmp_fprintf(stderr, "Error: range [%Zd,%Zd[ contains no prime with roots mod f\n",
219                     (mpz_srcptr) q0,
220                     (mpz_srcptr) q1_orig);
221             exit(EXIT_FAILURE);
222         }
223     }
224 
225     gmp_randclear(rstate);
226 }
227 
228 /* {{{ Populating the todo list */
229 /* See below in main() for documentation about the q-range and q-list
230  * modes */
231 /* These functions return non-zero if the todo list is not empty.
232  * Note: contrary to the qlist mode, here the q-range will be pushed at
233  * once (but the caller doesn't need to know that).
234  *
235  * Note: the random state is used by both the random sampler **AND** the
236  * rootfinder.
237  */
feed_qrange(gmp_randstate_t rstate)238 bool las_todo_list::feed_qrange(gmp_randstate_t rstate)
239 {
240     /* If we still have entries in the stack, don't add more now */
241     if (!super::empty())
242         return true;
243 
244     mpz_poly_ptr f = cpoly->pols[sqside];
245 
246     std::vector<uint64_t> fac_q;
247 
248     if (!random_sampling) {
249         /* We're going to process the sq's and put them into the list
250            The loop processes all special-q in [q0, q1]. On loop entry,
251            the value in q0 is known to be a legitimate special-q. Its
252            factorization is lost, so we recompute it. */
253 
254         /* handy aliases */
255         cxx_mpz & q = q0;
256         next_legitimate_specialq(q, fac_q, q, 0, *this);
257 
258         struct q_r_pair {
259             cxx_mpz q;
260             cxx_mpz r;
261             q_r_pair(const mpz_t _q, const mpz_t _r) {
262                 mpz_set(q, _q);
263                 mpz_set(r, _r);
264             }
265         };
266 
267         std::vector<q_r_pair> my_list;
268 
269         int nb_no_roots = 0;
270         int nb_rootfinding = 0;
271         /* If nq_max is specified, then q1 has no effect, even though it
272          * has been set equal to q */
273         for ( ; (nq_max < UINT_MAX || mpz_cmp(q, q1) < 0) &&
274                 nq_pushed + my_list.size() < nq_max ; )
275         {
276             std::vector<cxx_mpz> roots = mpz_poly_roots(f, q, fac_q, rstate);
277 
278             nb_rootfinding++;
279             if (roots.empty()) nb_no_roots++;
280 
281             if (galois) {
282                 size_t nroots = skip_galois_roots(roots.size(), q, (mpz_t*)roots.data(), galois);
283                 roots.erase(roots.begin() + nroots, roots.end());
284             }
285 
286             for (auto const & r : roots)
287                 my_list.push_back({q, r});
288 
289             next_legitimate_specialq(q, fac_q, q, 1, *this);
290         }
291 
292         if (nb_no_roots) {
293             verbose_output_vfprint(0, 1, gmp_vfprintf, "# polynomial has no roots for %d of the %d primes that were tried\n", nb_no_roots, nb_rootfinding);
294         }
295 
296         // Truncate to nq_max if necessary and push the sq in reverse
297         // order, because they are processed via a stack (required for
298         // the descent).
299         int push_here = my_list.size();
300         if (nq_max < UINT_MAX)
301             push_here = std::min(push_here, int(nq_max - nq_pushed));
302         for(int i = 0 ; i < push_here ; i++) {
303             nq_pushed++;
304             int ind = push_here-i-1;
305             push_unlocked(my_list[ind].q, my_list[ind].r, sqside);
306         }
307     } else { /* random sampling case */
308         /* we care about being uniform here */
309         cxx_mpz q;
310         cxx_mpz diff;
311         mpz_sub(diff, q1, q0);
312         ASSERT_ALWAYS(nq_pushed == 0 || nq_pushed == nq_max);
313 	unsigned long n = nq_max;
314         unsigned int spin = 0;
315         for ( ; nq_pushed < n ; ) {
316             /* try in [q0 + k * (q1-q0) / n, q0 + (k+1) * (q1-q0) / n[ */
317             cxx_mpz q0l, q1l;
318 	    /* we use k = n-1-nq_pushed instead of k=nq_pushed so that
319 	       special-q's are sieved in increasing order */
320 	    unsigned long k = n - 1 - nq_pushed;
321             mpz_mul_ui(q0l, diff, k);
322             mpz_mul_ui(q1l, diff, k + 1);
323             mpz_fdiv_q_ui(q0l, q0l, n);
324             mpz_fdiv_q_ui(q1l, q1l, n);
325             mpz_add(q0l, q0, q0l);
326             mpz_add(q1l, q0, q1l);
327 
328             mpz_sub(q, q1l, q0l);
329             mpz_urandomm(q, rstate, q);
330             mpz_add(q, q, q0l);
331             next_legitimate_specialq(q, fac_q, q, 0, *this);
332             std::vector<cxx_mpz> roots = mpz_poly_roots(f, q, fac_q, rstate);
333             if (roots.empty()) {
334                 spin++;
335                 if (spin >= 1000) {
336                     fprintf(stderr, "Error: cannot find primes with roots in one of the sub-ranges for random sampling\n");
337                     exit(EXIT_FAILURE);
338                 }
339                 continue;
340             }
341             spin = 0;
342             if (galois) {
343                 size_t nroots = skip_galois_roots(roots.size(), q, (mpz_t*)roots.data(), galois);
344                 roots.erase(roots.begin() + nroots, roots.end());
345             }
346             nq_pushed++;
347             push_unlocked(q, roots[gmp_urandomm_ui(rstate, roots.size())], sqside);
348         }
349     }
350 
351     return !super::empty();
352 }
353 
354 /* Format of a file with a list of special-q (-todo option):
355  *   - Comments are allowed (start line with #)
356  *   - Blank lines are ignored
357  *   - Each valid line must have the form
358  *       s q r
359  *     where s is the side (0 or 1) of the special q, and q and r are as usual.
360  */
feed_qlist()361 bool las_todo_list::feed_qlist()
362 {
363     if (!super::empty())
364         return true;
365 
366     char line[1024];
367     char * x;
368     for( ; ; ) {
369         /* We should consider the blocking case as well. (e.g. we're
370          * reading from a pipe.)  */
371         x = fgets(line, sizeof(line), todo_list_fd);
372         /* Tolerate comments and blank lines */
373         if (x == NULL) return 0;
374         if (*x == '#') continue;
375         for( ; *x && isspace(*x) ; x++) ;
376         if (!*x) continue;
377         break;
378     }
379 
380     /* We have a new entry to parse */
381     cxx_mpz p, r;
382     int side = -1;
383     int rc;
384     switch(*x++) {
385         case '0':
386         case '1':
387         case '2':
388         case '3':
389         case '4':
390         case '5':
391         case '6':
392         case '7':
393         case '8':
394         case '9':
395                    x--;
396                    errno = 0;
397                    side = strtoul(x, &x, 0);
398                    ASSERT_ALWAYS(!errno);
399                    ASSERT_ALWAYS(side < 2);
400                    break;
401         default:
402                    fprintf(stderr,
403                            "parse error in todo file, while reading: %s\n",
404                            line);
405                    /* We may as well default on the command-line switch */
406                    exit(EXIT_FAILURE);
407     }
408 
409     int nread1 = 0;
410     int nread2 = 0;
411 
412     gmp_randstate_t rstate;
413     gmp_randinit_default(rstate);
414     mpz_set_ui(r, 0);
415     for( ; *x && !isdigit(*x) ; x++) ;
416     rc = gmp_sscanf(x, "%Zi%n %Zi%n", (mpz_ptr) p, &nread1, (mpz_ptr) r, &nread2);
417     ASSERT_ALWAYS(rc == 1 || rc == 2); /* %n does not count */
418     x += (rc==1) ? nread1 : nread2;
419     {
420         mpz_poly_ptr f = cpoly->pols[side];
421         /* specifying the rational root as <0
422          * means that it must be recomputed. Putting 0 does not have this
423          * effect, since it is a legitimate value after all.
424          */
425         if (rc < 2 || (f->deg == 1 && rc == 2 && mpz_cmp_ui(r, 0) < 0)) {
426             // For rational side, we can compute the root easily.
427             ASSERT_ALWAYS(f->deg == 1);
428             /* ugly cast, yes */
429             int nroots = mpz_poly_roots ((mpz_t*) &r, f, p, rstate);
430             ASSERT_ALWAYS(nroots == 1);
431         }
432     }
433     gmp_randclear(rstate);
434 
435     for( ; *x ; x++) ASSERT_ALWAYS(isspace(*x));
436     push_unlocked(p, r, side);
437     return true;
438 }
439 
440 
feed(gmp_randstate_t rstate)441 bool las_todo_list::feed(gmp_randstate_t rstate)
442 {
443     std::lock_guard<std::mutex> foo(mm);
444     if (!super::empty())
445         return true;
446     if (todo_list_fd)
447         return feed_qlist();
448     else
449         return feed_qrange(rstate);
450 }
451 
452 /* This exists because of the race condition between feed() and pop()
453  */
feed_and_pop(gmp_randstate_t rstate)454 las_todo_entry * las_todo_list::feed_and_pop(gmp_randstate_t rstate)
455 {
456     std::lock_guard<std::mutex> foo(mm);
457     if (super::empty()) {
458         if (todo_list_fd)
459             feed_qlist();
460         else
461             feed_qrange(rstate);
462     }
463     if (super::empty())
464         return nullptr;
465     las_todo_entry doing = super::top();
466     super::pop();
467     history.push_back(doing);
468     return &history.back();
469 }
470 /* }}} */
471 
print_todo_list(cxx_param_list & pl,gmp_randstate_ptr rstate,int nthreads) const472 void las_todo_list::print_todo_list(cxx_param_list & pl, gmp_randstate_ptr rstate, int nthreads) const
473 {
474 
475     if (random_sampling) {
476         /* Then we cannot print the todo list in a multithreaded way
477          * without breaking the randomness predictability. It's a bit
478          * annoying, yes. On the other hand, it's highly likely in this
479          * case that the number of q's to pick is small enough anyway !
480          */
481         nthreads = 1;
482     }
483 
484     std::vector<std::vector<las_todo_entry>> lists(nthreads);
485     std::vector<std::thread> subjobs;
486 
487     auto segment = [&, this](unsigned int i) {
488         cxx_param_list pl2 = pl;
489         cxx_mpz tmp;
490         mpz_sub(tmp, q1, q0);
491         mpz_mul_ui(tmp, tmp, i);
492         mpz_fdiv_q_ui(tmp, tmp, nthreads);
493         mpz_add(tmp, q0, tmp);
494         {
495             std::ostringstream os;
496             os << tmp;
497             param_list_add_key(pl2, "q0", os.str().c_str(), PARAMETER_FROM_CMDLINE);
498         }
499 
500         mpz_sub(tmp, q1, q0);
501         mpz_mul_ui(tmp, tmp, i + 1);
502         mpz_fdiv_q_ui(tmp, tmp, nthreads);
503         mpz_add(tmp, q0, tmp);
504         {
505             std::ostringstream os;
506             os << tmp;
507             param_list_add_key(pl2, "q1", os.str().c_str(), PARAMETER_FROM_CMDLINE);
508         }
509         las_todo_list todo2(cpoly, pl2);
510         for(;;) {
511             las_todo_entry * doing_p = todo2.feed_and_pop(rstate);
512             if (!doing_p) break;
513             las_todo_entry& doing(*doing_p);
514             if (nthreads == 1) {
515                 verbose_output_vfprint(0, 1, gmp_vfprintf,
516                         "%d %Zd %Zd\n",
517                         doing.side,
518                         (mpz_srcptr) doing.p,
519                         (mpz_srcptr) doing.r);
520             } else {
521                 lists[i].push_back(doing);
522             }
523         }
524     };
525 
526     if (nthreads > 1) {
527         verbose_output_vfprint(0, 1, gmp_vfprintf,
528                 "# Collecting the todo list in memory from q0=%Zd to q1=%Zd using %d threads\n",
529                 (mpz_srcptr) q0, (mpz_srcptr) q1, nthreads);
530     }
531 
532     for(int subjob = 0 ; subjob < nthreads ; ++subjob)
533         subjobs.push_back(std::thread(segment, subjob));
534     for(auto & t : subjobs) t.join();
535     for(auto const & v : lists) {
536         for(auto const & doing : v) {
537             verbose_output_vfprint(0, 1, gmp_vfprintf,
538                     "%d %Zd %Zd\n",
539                     doing.side,
540                     (mpz_srcptr) doing.p,
541                     (mpz_srcptr) doing.r);
542         }
543     }
544 }
545