1 /*****************************************************************
2 *                Functions for the factor base                  *
3 *****************************************************************/
4 
5 #ifndef FB_HPP_
6 #define FB_HPP_
7 
8 #include "cado_config.h"               // for HAVE_GLIBC_VECTOR_INTERNALS
9 
10 #include <cstddef>                    // for size_t, NULL
11 #include <cstdio>                     // for fprintf, FILE
12 #include <algorithm>                   // for sort, max
13 #include <array>                       // for array
14 #include <iosfwd>                      // for ostream
15 #include <limits>                      // for numeric_limits
16 #include <list>                        // for list
17 #include <map>                         // for map, operator!=, _Rb_tree_iter...
18 #include <mutex>                       // for lock_guard, mutex
19 #include <utility>                     // for pair
20 #include <vector>                      // for vector
21 
22 #include "macros.h"                    // for ASSERT_ALWAYS, ASSERT, MAYBE_U...
23 #include "mpz_poly.h"   // for cxx_mpz, cxx_mpz_poly
24 #include "cado_poly.h"   // for MAX_DEGREE
25 
26 #include "fb-types.h"                  // for fbprime_t, slice_index_t, fbro...
27 #include "las-base.hpp"                // for _padded_pod
28 #include "las-config.h"                // for FB_MAX_PARTS
29 #include "lock_guarded_container.hpp"  // for lock_guarded_container
30 #include "mmap_allocator.hpp"          // for mmap_allocator
31 #ifdef HAVE_GLIBC_VECTOR_INTERNALS
32 #include "mmappable_vector.hpp"
33 #else
34 /* yes, it's a hack */
35 #define mmappable_vector std::vector
36 #endif
37 #include "multityped_array.hpp"        // for multityped_array_foreach, mult...
38 struct qlattice_basis;
39 struct cxx_param_list;
40 
41 
42 /* The *factor base* is made of *entries*. We have several vectors of
43  * entries, each with primes splitting in the same number of roots.
44  *
45  * For each set of *thresholds* and each *scale* value, we define a
46  * *slicing*. A slicing is a division of the factor base into *parts*.
47  * Then each part contains several vectors of *slices* (in the same
48  * manner as we have several vectors of entries in the factor base). A
49  * slice is formed by basically two pointers into the vector of entries
50  * in the factor base that have this number of roots.
51  *
52  * The factor base also contains auxiliary data attached to each vector
53  * of entries, namely the cumulative distribution function of the weight.
54  * This is used to subdivide slices into pieces of equal weight when
55  * needed.
56  */
57 
58 /* {{{ fb entries: sets of prime ideals above one single rational prime p. */
59 /* Forward declaration so fb_entry_general can use it in constructors */
60 template <int Nr_roots>
61 class fb_entry_x_roots;
62 
63 /* A root modulo a prime power q. q is specified externally */
64 struct fb_general_root {
65   /* exp and oldexp are maximal such that:
66      If not projective and a == br (mod p^k), then p^exp | F(a,b)
67      -"-                   a == br (mod p^(k-1)), then p^oldexp | F(a,b)
68      If projective and ar == b  -"- */
69   fbroot_t r;
70   bool proj;
71   unsigned char exp, oldexp;
72 
fb_general_rootfb_general_root73   fb_general_root (){}
fb_general_rootfb_general_root74   fb_general_root (const fbroot_t r, const unsigned char nexp,
75                    const unsigned char oldexp, const bool proj=false) :
76                    r(r), proj(proj), exp(nexp), oldexp(oldexp) {}
fb_general_rootfb_general_root77   fb_general_root (const fbroot_t r)
78                    : r(r)
79                    , proj(false)
80                    , exp(1)
81                    , oldexp(0) {}
82   /* Create a root from a linear polynomial */
83   static fb_general_root fb_linear_root (fbprime_t q, cxx_mpz_poly const & poly,
84           const unsigned char nexp, const unsigned char oldexp);
85 
86   /* Constructor from the old format of storing projective roots, which has q
87      added to the root if the root is projective */
fb_general_rootfb_general_root88   fb_general_root (const fb_root_p1 R,
89                    const unsigned char nexp=1, const unsigned char oldexp=0) :
90                    r(R.r), proj(R.proj),
91                    exp(nexp), oldexp(oldexp) {}
92 
93   /* A root is simple if it is not projective and the exp goes from 0 to 1 */
is_simplefb_general_root94   bool is_simple() const {return exp == 1 && oldexp == 0 && !proj;}
95 
96   /* Convert a root to the old format of storing projective roots with q added */
to_old_formatfb_general_root97   unsigned long long to_old_format(const fbprime_t q) const {
98     return (unsigned long long) r + (proj ? q : 0);
99   }
100 
101   /* Print one root. Projective roots are printed as r+q */
fprintfb_general_root102   void fprint(FILE *out, const fbprime_t q) const {
103     fprintf(out, "%llu", to_old_format(q));
104     if (oldexp != 0 || this->exp != 1)
105       fprintf(out, ":%hhu:%hhu", oldexp, this->exp);
106   }
107 
108   void transform(fb_general_root &result, const fbprime_t q,
109                  const redc_invp_t invq,
110                  const qlattice_basis &basis) const;
111 };
112 
113 /* General entries are anything that needs auxiliary information:
114    Prime powers, projective roots, ramified primes where exp != oldexp + 1,
115    etc. They could, of course, also store the simple cases, but for those we
116    use the simple struct to conserve memory and to decide algorithms (batch
117    inversion, etc.) statically. */
118 class fb_entry_general : public _padded_pod<fb_entry_general> {
119   void read_roots (const char *, unsigned char, unsigned char, unsigned long);
120 public:
121   typedef fb_entry_general transformed_entry_t;
122   fbprime_t q, p; /* q = p^k */
123   redc_invp_t invq; /* invq = -1/q (mod 2^32), or (mod 2^64), depending on
124 		       the size of redc_invp_t */
125   fb_general_root roots[MAX_DEGREE];
126   unsigned char k, nr_roots;
127   /* Static class members to allow fb_vector<> to distinguish between and
128      operate on both kind of entries */
129   static const bool is_general_type = true;
130   static const unsigned char fixed_nr_roots = 0;
get_nr_roots() const131   inline int get_nr_roots() const { return nr_roots; }
132 
fb_entry_general()133   fb_entry_general() {}
134   template <int Nr_roots>
135   fb_entry_general (const fb_entry_x_roots<Nr_roots> &e);
get_q() const136   fbprime_t get_q() const {return q;}
get_r(const size_t i) const137   fbroot_t get_r(const size_t i) const {return roots[i].r;};
get_proj(const size_t i) const138   fbroot_t get_proj(const size_t i) const {return roots[i].proj;};
139   void parse_line (const char *line, unsigned long linenr);
140   void merge (const fb_entry_general &);
141   void fprint(FILE *out) const;
142   bool is_simple() const;
143   void transform_roots(transformed_entry_t &, const qlattice_basis &) const;
weight() const144   double weight() const {return 1./q * nr_roots;}
145   /* Allow sorting by p */
operator <(const fb_entry_general & other) const146   bool operator<(const fb_entry_general &other) const {return this->p < other.p;}
operator >(const fb_entry_general & other) const147   bool operator>(const fb_entry_general &other) const {return this->p > other.p;}
148   struct sort_byq {
operator ()fb_entry_general::sort_byq149     bool operator()(fb_entry_general const & a, fb_entry_general const & b) const { return a.get_q() < b.get_q(); };
150   };
151 };
152 
153 template <int Nr_roots>
154 class fb_transformed_entry_x_roots {
155 public:
156   fbprime_t p;
157   std::array<fbroot_t, Nr_roots> roots;
158   std::array<bool, Nr_roots> proj;
159   static const unsigned char k = 1, nr_roots = Nr_roots;
160   /* Static class members to allow fb_vector<> to distinguish between and
161      operate on both kind of entries */
162   static const bool is_general_type = false;
163   static const unsigned char fixed_nr_roots = Nr_roots;
get_q() const164   fbprime_t get_q() const {return p;}
get_r(const size_t i) const165   fbroot_t get_r(const size_t i) const {return roots[i];};
get_proj(const size_t i) const166   fbroot_t get_proj(const size_t i) const {return proj[i];};
167 };
168 
169 /* "Simple" factor base entries. We imply q=p, k=1, oldexp=0, exp=1,
170    and projective=false for all roots. */
171 template <int Nr_roots>
172 class fb_entry_x_roots : public _padded_pod<fb_entry_x_roots<Nr_roots> > {
173 public:
174   typedef fb_transformed_entry_x_roots<Nr_roots> transformed_entry_t;
175   fbprime_t p;
176   redc_invp_t invq; /* invq = -1/q (mod 2^32), or (mod 2^64), depending on
177 		       the size of redc_invp_t */
178   std::array<fbroot_t, Nr_roots> roots;
179   /* Static class members to allow fb_vector<> to distinguish between and
180      operate on both kind of entries */
181   static const unsigned char k = 1, nr_roots = Nr_roots;
182   static const bool is_general_type = false;
183   static const unsigned char fixed_nr_roots = Nr_roots;
get_nr_roots() const184   inline int get_nr_roots() const { return Nr_roots; }
185   // fb_entry_x_roots() {};
fb_entry_x_roots(fbprime_t p,redc_invp_t invq,fbroot_t * roots)186   fb_entry_x_roots(fbprime_t p, redc_invp_t invq, fbroot_t * roots) : p(p), invq(invq) {
187     for (int i = 0; i < Nr_roots; i++)
188       this->roots[i] = roots[i];
189   }
190   /* Allow assignment-construction from general entries */
fb_entry_x_roots(const fb_entry_general & e)191   fb_entry_x_roots(const fb_entry_general &e) : p(e.p), invq(e.invq) {
192     ASSERT_ALWAYS(Nr_roots == e.nr_roots);
193     ASSERT(e.is_simple());
194     for (int i = 0; i < Nr_roots; i++)
195       roots[i] = e.roots[i].r;
196   }
get_q() const197   fbprime_t get_q() const {return p;}
get_r(const size_t i) const198   fbroot_t get_r(const size_t i) const {return roots[i];}
get_proj(const size_t i MAYBE_UNUSED) const199   fbroot_t get_proj(const size_t i MAYBE_UNUSED) const {return false;}
weight() const200   double weight() const {return 1./p * Nr_roots;}
201   /* Allow sorting by p */
operator <(const fb_entry_x_roots<Nr_roots> & other) const202   bool operator<(const fb_entry_x_roots<Nr_roots> &other) const {return this->p < other.p;}
operator >(const fb_entry_x_roots<Nr_roots> & other) const203   bool operator>(const fb_entry_x_roots<Nr_roots> &other) const {return this->p > other.p;}
204   void fprint(FILE *) const;
205   void transform_roots(transformed_entry_t &, const qlattice_basis &) const;
206 };
207 
208 /* }}} */
209 
210 class fb_slice_interface {
211     public:
212         fb_slice_interface() = default;
~fb_slice_interface()213         virtual ~fb_slice_interface(){}
214         virtual size_t size() const = 0;
215         virtual unsigned char get_logp() const = 0;
216         virtual fbprime_t get_prime(slice_offset_t offset) const = 0;
217         virtual unsigned char get_k(slice_offset_t offset) const = 0;
218         /* global index across all fb parts */
219         virtual slice_index_t get_index() const = 0;
220         virtual double get_weight() const = 0;
221         virtual bool is_general() const = 0;
222         virtual int get_nr_roots() const = 0;
223 };
224 
225 /* see fb_slice_weight.hpp */
226 template<typename FB_ENTRY_TYPE>
227 class fb_slice_weight_estimator;
228 
229 template<typename FB_ENTRY_TYPE>
230 class fb_slice : public fb_slice_interface {
231     friend class fb_slice_weight_estimator<FB_ENTRY_TYPE>;
232     typedef mmappable_vector<FB_ENTRY_TYPE> fb_entry_vector;
233     typename fb_entry_vector::const_iterator _begin, _end;
234     unsigned char logp;
235     slice_index_t index; /* global index across all fb parts */
236     double weight;
237     friend struct helper_functor_subdivide_slices;
fb_slice(typename fb_entry_vector::const_iterator it,unsigned char logp)238     fb_slice(typename fb_entry_vector::const_iterator it, unsigned char logp) : _begin(it), _end(it), logp(logp), index(0), weight(0) {}
fb_slice(typename fb_entry_vector::const_iterator it,typename fb_entry_vector::const_iterator jt,unsigned char logp)239     fb_slice(typename fb_entry_vector::const_iterator it, typename fb_entry_vector::const_iterator jt, unsigned char logp) : _begin(it), _end(jt), logp(logp), index(0), weight(0) {}
240     public:
241     typedef FB_ENTRY_TYPE entry_t;
begin() const242     inline typename fb_entry_vector::const_iterator begin() const { return _begin; }
end() const243     inline typename fb_entry_vector::const_iterator end() const { return _end; }
size() const244     inline size_t size() const override { return _end - _begin; }
get_logp() const245     unsigned char get_logp() const override { return logp; }
get_prime(slice_offset_t offset) const246     fbprime_t get_prime(slice_offset_t offset) const override {
247         /* While it may make sense to manipulate slices that exceed the
248          * expected max size during the work to construct them, it should
249          * be pretty clear that we'll never ever try to use get_prime,
250          * which is limited in its type, on a slice that has more prime
251          * ideals that this function can address */
252         ASSERT(size() <= std::numeric_limits<slice_offset_t>::max());
253         return _begin[offset].p;
254     }
get_k(slice_offset_t offset) const255     unsigned char get_k(slice_offset_t offset) const override {
256         return _begin[offset].k;
257         /* power. Well, most often it's a constant ! We need to
258          * access it from the virtual base though. This is way we're
259          * not folding it to a template access.  */
260     }
261     /* global index across all fb parts */
get_index() const262     slice_index_t get_index() const override {return index;}
get_weight() const263     double get_weight() const override {return weight;}
is_general() const264     bool is_general() const override { return FB_ENTRY_TYPE::is_general_type; }
265     /* get_nr_roots() on a fb_slice returns zero for slices of general
266      * type ! */
get_nr_roots() const267     int get_nr_roots() const override { return FB_ENTRY_TYPE::fixed_nr_roots;}
268 };
269 
270 /* entries and general_entries: we declare general entries, as well
271  * as entries for all numbers of roots between 1 and MAX_ROOTS
272  *
273  * (notationally, general_entries corresponds to -1 as a number of
274  * roots).
275  * */
276 
277 template<typename T> struct entries_and_cdf {
278     typedef mmappable_vector<T> container_type;
279     typedef mmappable_vector<double> weight_container_type;
280     struct type : public container_type {
281         typedef typename entries_and_cdf<T>::container_type container_type;
282         typedef typename entries_and_cdf<T>::weight_container_type weight_container_type;
283         /* cumulative distribution function. This is set up by
284          * helper_functor_append. We use it to split into slices.
285          * weight_cdf[i] is \sum_{j < i} super[j].weight
286          *
287          * Thus we always need a lone 0 to initialize. See
288          * helper_functor_put_first_0, used in the fb_factorbase ctor.
289          */
290         /* We *might* want to consider the cdf only for several entries
291          * at a time (say, 16), so as to minimize the cost of finding the
292          * split points */
293         weight_container_type weight_cdf;
weight_beginentries_and_cdf::type294         inline weight_container_type::const_iterator weight_begin() const {
295             return weight_cdf.begin();
296         }
weight_endentries_and_cdf::type297         inline weight_container_type::const_iterator weight_end() const {
298             return weight_cdf.end();
299         }
weight_sizeentries_and_cdf::type300         inline weight_container_type::size_type weight_size() const {
301             return weight_cdf.size();
302         }
weight_deltaentries_and_cdf::type303         double weight_delta(size_t a, size_t b) const {
304             return weight_cdf[b] - weight_cdf[a];
305         }
weight_deltaentries_and_cdf::type306         double weight_delta(typename container_type::const_iterator a, typename container_type::const_iterator b) const {
307             return weight_cdf[b-container_type::begin()] - weight_cdf[a-container_type::begin()];
308         }
weight_cdf_atentries_and_cdf::type309         double weight_cdf_at(size_t a) const {
310             return weight_cdf[a];
311         }
weight_cdf_atentries_and_cdf::type312         double weight_cdf_at(typename container_type::const_iterator a) const {
313             return weight_cdf[a-container_type::begin()];
314         }
315     };
316 };
317 template<int n> struct fb_entries_factory {
318     typedef typename entries_and_cdf<fb_entry_x_roots<n>>::type type;
319 };
320 template<> struct fb_entries_factory<-1> {
321     typedef typename entries_and_cdf<fb_entry_general>::type type;
322 };
323 template<int n> struct fb_slices_factory {
324     typedef std::vector<fb_slice<fb_entry_x_roots<n>>> type;
325 };
326 template<> struct fb_slices_factory<-1> {
327     typedef std::vector<fb_slice<fb_entry_general>> type;
328 };
329 
330 class fb_factorbase {
331     public:
332         /* Has to be <= MAX_DEGREE */
333     static const int MAX_ROOTS = MAX_DEGREE;
334 
335     private:
336 
337     cxx_mpz_poly f;
338     int side;
339     unsigned long lim;
340     unsigned long powlim;
341 
342     public:
empty() const343     bool empty() const { return lim == 0; }
344 
345     private:
346     typedef multityped_array<fb_entries_factory, -1, MAX_ROOTS+1> entries_t;
347     entries_t entries;
348 
349     public:
350     typedef std::array<size_t, MAX_ROOTS+2> threshold_pos;
351     threshold_pos get_threshold_pos(fbprime_t) const;
352 
353     private:
354     /* The threshold position cache is used to accelerate the creation of
355      * slicings. */
356     mutable std::map<fbprime_t, threshold_pos> threshold_pos_cache;
357 
358     /* {{{ append. This inserts a pool of entries to the factor base. We
359      * do it with many entries in a row so as to avoid looping MAX_ROOTS
360      * times for each root, and so that we don't feel sorry to use
361      * multityped_array_foreach (even though in truth, it's quite
362      * probably fine)
363      */
364     private:
365     struct helper_functor_append;
366     public:
367     void append(std::list<fb_entry_general> &pool);
368     /* }}} */
369 
370     /* {{{ Various ways to count primes, prime ideals, and hit ratio
371      * ("weight") of entries in the whole factor base, or in subranges
372      */
373     private:
374     struct helper_functor_count_primes;
375     struct helper_functor_count_prime_ideals;
376     struct helper_functor_count_weight;
377     struct helper_functor_count_combined;
378     struct helper_functor_count_primes_interval;
379     struct helper_functor_count_prime_ideals_interval;
380     struct helper_functor_count_weight_interval;
381     struct helper_functor_count_combined_interval;
382     public:
383     size_t count_primes() const;
384     size_t count_prime_ideals() const;
385     size_t count_weight() const;
386     /* }}} */
387 
388 
389     /* the key type lists the things with respect to which we're willing
390      * to divide the views on our factor base.
391      */
392         struct key_type {
393             std::array<fbprime_t, FB_MAX_PARTS> thresholds;
394             fbprime_t td_thresh;
395             fbprime_t skipped;
396             double scale;
397             /* This might seem non obvious, but this parameters controls
398              * the size of the slices, because we want to enforce some
399              * relatively-even division. It's not entirely clear that we
400              * want it here, but we definitely want it somewhere. */
401             unsigned int nb_threads;
402 
operator <fb_factorbase::key_type403             bool operator<(key_type const& x) const {
404                 int r;
405                 for(int i = 0; i < FB_MAX_PARTS; ++i) {
406                     r = (thresholds[i] > x.thresholds[i]) - (x.thresholds[i] > thresholds[i]);
407                     if (r) return r < 0;
408                 }
409                 r = (td_thresh > x.td_thresh) - (x.td_thresh > td_thresh);
410                 if (r) return r < 0;
411                 r = (skipped > x.skipped) - (x.skipped > skipped);
412                 if (r) return r < 0;
413                 return scale < x.scale;
414             }
415         };
416 
417     public:
418     class slicing {
419         public:
420         struct stats_type {
421             /* explicit-initializing as below forces zero-initialization
422              * of members */
423             std::array<size_t, FB_MAX_PARTS> primes {{}};
424             std::array<size_t, FB_MAX_PARTS> ideals {{}};
425             std::array<double, FB_MAX_PARTS> weight {{}};
426         };
427         stats_type stats;
428 
429         class part {
430             /* slices and general_slices: actually general_slices is
431              * slices for number of roots -1, and that is
432              * always a vector of length 1. And we have slices (vectors
433              * of fb_slice objects) for all numbers of roots between 1 and
434              * MAX_ROOTS.
435              */
436             typedef multityped_array<fb_slices_factory, -1, MAX_ROOTS+1> slices_t;
437             friend struct helper_functor_subdivide_slices;
438             slices_t slices;
439             slice_index_t first_slice_index = 0;
440             public:
441             template<int n>
get_slices_vector_for_nroots()442                 typename fb_slices_factory<n>::type& get_slices_vector_for_nroots() {
443                     return slices.get<n>();
444                 }
445             private:
446 
447             /* the general vector (the one with index -1 in the
448              * multityped array) is made of "anything that is not
449              * simple" (as per the logic that used to be in
450              * fb_part::append and fb_entry_general::is_simple). That means:
451              *  - all powers go to the general vector
452              *  - everything below bkthresh, too (but see below).
453              *  - primes with multiple roots as well (but to be honest,
454              *    it's not entirely clear to me why -- after all, there's
455              *    no special treatment to speak of).
456              */
457 
458             friend class slicing;
459 
460             /* We use this to access our arrays called slices and
461              * general_slices.
462              *
463              * 0 <= i <slicesG.size()   -->  return &(slicesG[i])
464              * slicesG.size() <= i < slices0.size() --> return &(slices0[i-slicesG.size()])
465              * slices0.size() <= i < slices1.size() --> return &(slices1[i-slicesG.size()-slices0.size()])
466              * and so on.
467              */
468             struct helper_functor_get {
469                 typedef fb_slice_interface const * type;
470                 typedef slice_index_t key_type;
471                 template<typename T>
operator ()fb_factorbase::slicing::part::helper_functor_get472                     type operator()(T const & x, slice_index_t & k) {
473                         if ((size_t) k < x.size()) {
474                             return &(x[k]);
475                         } else {
476                             k -= x.size();
477                             return NULL;
478                         }
479                     }
480             };
481 
482             /* index is the global index across all fb parts */
get(slice_index_t index) const483             fb_slice_interface const * get(slice_index_t index) const {
484                 /* used to be in fb_vector::get_slice
485                  *
486                  * and in fb_part::get_slice for the lookup of the
487                  * vector.
488                  *
489                  * TODO: dichotomy, perhaps ? Can we do the dichotomy
490                  * elegantly ?
491                  */
492                 if (index < first_slice_index) return NULL;
493                 slice_index_t idx = index - first_slice_index;
494                 if (idx >= nslices()) {
495                     // index = idx - nslices();
496                     return NULL;
497                 }
498                 const fb_slice_interface * res = multityped_array_locate<helper_functor_get>()(slices, idx);
499                 ASSERT_ALWAYS(res);
500                 ASSERT_ALWAYS(res->get_index() == index);
501                 return res;
502             }
503 
504             /* {{{ use caching for the number of slices, as it's a handy
505              * thing to query */
506             mutable slice_index_t _nslices = std::numeric_limits<slice_index_t>::max();
507             struct helper_functor_nslices {
508                 template<typename T>
operator ()fb_factorbase::slicing::part::helper_functor_nslices509                     slice_index_t operator()(slice_index_t t, T const & x) const {
510                         return t + x.size();
511                     }
512             };
513             public:
nslices() const514             slice_index_t nslices() const {
515                 if (_nslices != std::numeric_limits<slice_index_t>::max())
516                     return _nslices;
517                 helper_functor_nslices N;
518                 return _nslices = multityped_array_fold(N, 0, slices);
519             }
520             /* }}} */
521 
522             private:
523             template<typename F>
524             struct foreach_slice_s {
525                 F & f;
526                 template<typename T>
operator ()fb_factorbase::slicing::part::foreach_slice_s527                 void operator()(T & x) {
528                     for(auto & a : x)
529                         f(a);
530                 }
531                 template<typename T>
operator ()fb_factorbase::slicing::part::foreach_slice_s532                 void operator()(T const & x) {
533                     for(auto const & a : x)
534                         f(a);
535                 }
536             };
537             public:
538             template<typename F>
foreach_slice(F & f)539             void foreach_slice(F & f) {
540                 foreach_slice_s<F> FF { f };
541                 multityped_array_foreach(FF, slices);
542             }
543             template<typename F>
foreach_slice(F & f) const544             void foreach_slice(F & f) const {
545                 foreach_slice_s<F> FF { f };
546                 multityped_array_foreach(FF, slices);
547             }
548             /*
549              * old g++ seems to have difficulties with this variant, and
550              * is puzzled by the apparent ambiguity -- newer g++ groks it
551              * correctly, as does clang
552             template<typename F>
553             void foreach_slice(F && f) {
554                 multityped_array_foreach(foreach_slice_s<F> { f }, slices);
555             }
556             template<typename F>
557             void foreach_slice(F && f) const {
558                 multityped_array_foreach(foreach_slice_s<F> { f }, slices);
559             }
560             */
561 
562             /* index: global index across all fb parts */
operator [](slice_index_t index) const563             fb_slice_interface const & operator[](slice_index_t index) const {
564                 /* This bombs out at runtime if get returns NULL, but
565                  * then it should be an indication of a programmer
566                  * mistake.
567                  */
568                 return *get(index);
569             }
570         };
571 
572         private:
573 
574         /* We have "parts" that correspond to the various layers of the
575          * bucket sieving
576          *
577          *  ==> THERE IS NO "part" FOR THE SMALL SIEVE <==
578          *
579          * This means that parts[0] will always be a meaningless
580          * placeholder. Indeed, the small sieve does not care about
581          * slices!
582          */
583         std::array<part, FB_MAX_PARTS> parts;
584 
585         /* toplevel is set by the ctor */
586         int toplevel = 0;
587 
588         /* index: global index across all fb parts */
get(slice_index_t index) const589         fb_slice_interface const * get(slice_index_t index) const {
590             for (auto const & p : parts) {
591                 if (index < p.first_slice_index + p.nslices())
592                     return p.get(index);
593             }
594             return NULL;
595         }
596         public:
597 
get_part(int i) const598         part const & get_part(int i) const { return parts[i]; }
599 
get_toplevel() const600         inline int get_toplevel() const { return toplevel; }
601 
602         /* This accesses the *fb_slice* with this index. Not the vector of
603          * slices !
604          * index: global index across all fb parts */
operator [](slice_index_t index) const605         fb_slice_interface const & operator[](slice_index_t index) const {
606             return *get(index);
607         }
608 
609         public:
610 
611         /* Here, when computing the slices, we stow away the small
612          * primes, and arrange them according to the internal logic of
613          * the small sieve. In particular, we want to keep track of where
614          * the resieved primes are.
615          *
616          * Note that the data that we prepare here is still not attached
617          * to any special-q. We're replacing what used to be done in
618          * init_fb_smallsieved, but not in small_sieve_init.
619          *
620          * Primes below the bucket-sieve threshold are small-sieved. Some
621          * will be treated specially when cofactoring:
622          *
623          *  - primes such that nb_roots / p >= 1/tdhresh will be
624          *    trial-divided.
625          *  - primes (ideals) that are not trial divided and that are not
626          *    powers are resieved.
627          *
628          * And of course, depending on the special-q, small-sieved prime
629          * ideals become projective, and therefore escape the general
630          * treatment.
631          *
632          * Note that the condition on trial division is not clear-cut
633          * w.r.t p. The small sieve code is somewhat dependent on the
634          * number of hits per row, which is I/p. And it matters to have
635          * small-sieved primes sorted according to this value. Hence, as
636          * p increases, we might have:
637          *  small p's that are trial-divided because 1/p >=
638          *  1/td_thresh.
639          *  some p's with p<=2*td_thresh, among which some are trial
640          *   divided if 2 roots or more are above, otherwise they're
641          *   resieved.
642          *  some p's with p<=3*td_thresh, again with a distinction...
643          *  and so on up to p>d*tdshresh, (d being the polynomial
644          *  degree). Above this bound we're sure that we no longer see
645          *  any trial-divided prime.
646          *
647          * For each slicing, we elect to compute two copies of the lists
648          * of prime ideals below bkthresh:
649          *  - first, prime ideals that we know will be resieved. Those
650          *    form the largest part of the list.
651          *  - second, prime ideals above primes that will be trial-divided.
652          *
653          */
654 
655         /* This contains all factor base prime ideals below the bucket
656          * threshold.  */
657         struct small_sieve_entries_t {
658             /* general ones. Not powers, not trial-divided ones. */
659             std::vector<fb_entry_general> resieved;
660             /* the rest. some are powers, others are trial-divided */
661             std::vector<fb_entry_general> rest;
662             /* from "rest" above, we can infer the list of trial-divided
663              * primes by merely restricting to entries with k==1 */
664 
665             /* this is sort of a trash can. We won't use these primes for
666              * the small sieve, but they do matter for trial division */
667             std::vector<unsigned long> skipped;
668         };
669         small_sieve_entries_t small_sieve_entries;
670         /* TODO: I doubt that the struct above will stay. Seems awkward.
671          * We'd better have a single vector, and the position of the
672          * "end-of-resieved" thing.
673          */
674 
675         template<typename T>
foreach_slice(T & f)676         void foreach_slice(T & f) {
677             for (auto & p : parts) {
678                 p.foreach_slice(f);
679             }
680         }
681         template<typename T>
foreach_slice(T && f)682         void foreach_slice(T && f) {
683             for (auto & p : parts) {
684                 p.foreach_slice(f);
685             }
686         }
687 
688         slicing() = default;
689         slicing(fb_factorbase const & fb, key_type const & K);
690     };
691 
692     private:
693         lock_guarded_container<std::map<key_type, slicing>> cache;
694         int read(const char * const filename);
695 
696     public:
697         /* accessors.
698          * As in the std::map case, the non-const accessor is allowed to
699          * create stuff. */
700 
operator [](key_type const & K)701         inline slicing & operator[](key_type const& K) {
702             std::lock_guard<std::mutex> foo(cache.mutex());
703             auto it = cache.find(K);
704             if (it != cache.end())
705                 return it->second;
706 
707             /* create a new slot */
708             slicing & res(cache[K] = slicing(*this, K));
709             return res;
710         }
711 
operator [](key_type const & K) const712         inline slicing const & operator[](key_type const& K) const {
713             return cache.at(K);
714         }
715 
716     private:
717         void make_linear();
718         void make_linear_threadpool (unsigned int nb_threads);
719 
720     public:
721         /* Note that the intent here is to read the factor base once and
722          * for all. In the descent context, where we have several
723          * different fb lim parameters, we should get away with different
724          * slicings, instead of trying to redo the factor base
725          * initialization.
726          */
727         fb_factorbase(cxx_cado_poly const & cpoly, int side, cxx_param_list & pl, const char * fbc_filename, int nthreads = 1);
728         fb_factorbase() = default;
729         fb_factorbase(fb_factorbase &&) = default;
730         fb_factorbase& operator=(fb_factorbase &&) = default;
731 
732     private:
733         struct sorter {
734             template<typename T>
operator ()fb_factorbase::sorter735             void operator()(T & x) {
736                 /* not entirely clear to me. Shall we sort by q or by p ?
737                  */
738                 typedef typename T::value_type X;
739                 auto by_q = [](X const & a, X const & b) { return a.get_q() < b.get_q(); };
740                 /*
741                 auto by_p_then_q = [](X const & a, X const & b) {
742                     return
743                         a.get_p() < b.get_p() ||
744                         a.get_p() == b.get_p() &&
745                         a.get_q() < b.get_q();
746                 };
747                 */
748                 std::sort(x.begin(), x.end(), by_q);
749             }
750         };
751 
752     public:
finalize()753         void finalize() {
754             sorter S;
755             multityped_array_foreach(S, entries);
756         }
757 };
758 
759 std::ostream& operator<<(std::ostream& o, fb_factorbase::key_type const &);
760 
761 unsigned char   fb_log (double x, double y, double z);
762 unsigned char	fb_log_delta (fbprime_t, unsigned long, unsigned long, double);
763 fbprime_t       fb_is_power (fbprime_t, unsigned long *);
764 
765 #endif  /* FB_HPP_ */
766