1 /***************************************************************** 2 * Functions for the factor base * 3 *****************************************************************/ 4 5 #ifndef FB_HPP_ 6 #define FB_HPP_ 7 8 #include "cado_config.h" // for HAVE_GLIBC_VECTOR_INTERNALS 9 10 #include <cstddef> // for size_t, NULL 11 #include <cstdio> // for fprintf, FILE 12 #include <algorithm> // for sort, max 13 #include <array> // for array 14 #include <iosfwd> // for ostream 15 #include <limits> // for numeric_limits 16 #include <list> // for list 17 #include <map> // for map, operator!=, _Rb_tree_iter... 18 #include <mutex> // for lock_guard, mutex 19 #include <utility> // for pair 20 #include <vector> // for vector 21 22 #include "macros.h" // for ASSERT_ALWAYS, ASSERT, MAYBE_U... 23 #include "mpz_poly.h" // for cxx_mpz, cxx_mpz_poly 24 #include "cado_poly.h" // for MAX_DEGREE 25 26 #include "fb-types.h" // for fbprime_t, slice_index_t, fbro... 27 #include "las-base.hpp" // for _padded_pod 28 #include "las-config.h" // for FB_MAX_PARTS 29 #include "lock_guarded_container.hpp" // for lock_guarded_container 30 #include "mmap_allocator.hpp" // for mmap_allocator 31 #ifdef HAVE_GLIBC_VECTOR_INTERNALS 32 #include "mmappable_vector.hpp" 33 #else 34 /* yes, it's a hack */ 35 #define mmappable_vector std::vector 36 #endif 37 #include "multityped_array.hpp" // for multityped_array_foreach, mult... 38 struct qlattice_basis; 39 struct cxx_param_list; 40 41 42 /* The *factor base* is made of *entries*. We have several vectors of 43 * entries, each with primes splitting in the same number of roots. 44 * 45 * For each set of *thresholds* and each *scale* value, we define a 46 * *slicing*. A slicing is a division of the factor base into *parts*. 47 * Then each part contains several vectors of *slices* (in the same 48 * manner as we have several vectors of entries in the factor base). A 49 * slice is formed by basically two pointers into the vector of entries 50 * in the factor base that have this number of roots. 51 * 52 * The factor base also contains auxiliary data attached to each vector 53 * of entries, namely the cumulative distribution function of the weight. 54 * This is used to subdivide slices into pieces of equal weight when 55 * needed. 56 */ 57 58 /* {{{ fb entries: sets of prime ideals above one single rational prime p. */ 59 /* Forward declaration so fb_entry_general can use it in constructors */ 60 template <int Nr_roots> 61 class fb_entry_x_roots; 62 63 /* A root modulo a prime power q. q is specified externally */ 64 struct fb_general_root { 65 /* exp and oldexp are maximal such that: 66 If not projective and a == br (mod p^k), then p^exp | F(a,b) 67 -"- a == br (mod p^(k-1)), then p^oldexp | F(a,b) 68 If projective and ar == b -"- */ 69 fbroot_t r; 70 bool proj; 71 unsigned char exp, oldexp; 72 fb_general_rootfb_general_root73 fb_general_root (){} fb_general_rootfb_general_root74 fb_general_root (const fbroot_t r, const unsigned char nexp, 75 const unsigned char oldexp, const bool proj=false) : 76 r(r), proj(proj), exp(nexp), oldexp(oldexp) {} fb_general_rootfb_general_root77 fb_general_root (const fbroot_t r) 78 : r(r) 79 , proj(false) 80 , exp(1) 81 , oldexp(0) {} 82 /* Create a root from a linear polynomial */ 83 static fb_general_root fb_linear_root (fbprime_t q, cxx_mpz_poly const & poly, 84 const unsigned char nexp, const unsigned char oldexp); 85 86 /* Constructor from the old format of storing projective roots, which has q 87 added to the root if the root is projective */ fb_general_rootfb_general_root88 fb_general_root (const fb_root_p1 R, 89 const unsigned char nexp=1, const unsigned char oldexp=0) : 90 r(R.r), proj(R.proj), 91 exp(nexp), oldexp(oldexp) {} 92 93 /* A root is simple if it is not projective and the exp goes from 0 to 1 */ is_simplefb_general_root94 bool is_simple() const {return exp == 1 && oldexp == 0 && !proj;} 95 96 /* Convert a root to the old format of storing projective roots with q added */ to_old_formatfb_general_root97 unsigned long long to_old_format(const fbprime_t q) const { 98 return (unsigned long long) r + (proj ? q : 0); 99 } 100 101 /* Print one root. Projective roots are printed as r+q */ fprintfb_general_root102 void fprint(FILE *out, const fbprime_t q) const { 103 fprintf(out, "%llu", to_old_format(q)); 104 if (oldexp != 0 || this->exp != 1) 105 fprintf(out, ":%hhu:%hhu", oldexp, this->exp); 106 } 107 108 void transform(fb_general_root &result, const fbprime_t q, 109 const redc_invp_t invq, 110 const qlattice_basis &basis) const; 111 }; 112 113 /* General entries are anything that needs auxiliary information: 114 Prime powers, projective roots, ramified primes where exp != oldexp + 1, 115 etc. They could, of course, also store the simple cases, but for those we 116 use the simple struct to conserve memory and to decide algorithms (batch 117 inversion, etc.) statically. */ 118 class fb_entry_general : public _padded_pod<fb_entry_general> { 119 void read_roots (const char *, unsigned char, unsigned char, unsigned long); 120 public: 121 typedef fb_entry_general transformed_entry_t; 122 fbprime_t q, p; /* q = p^k */ 123 redc_invp_t invq; /* invq = -1/q (mod 2^32), or (mod 2^64), depending on 124 the size of redc_invp_t */ 125 fb_general_root roots[MAX_DEGREE]; 126 unsigned char k, nr_roots; 127 /* Static class members to allow fb_vector<> to distinguish between and 128 operate on both kind of entries */ 129 static const bool is_general_type = true; 130 static const unsigned char fixed_nr_roots = 0; get_nr_roots() const131 inline int get_nr_roots() const { return nr_roots; } 132 fb_entry_general()133 fb_entry_general() {} 134 template <int Nr_roots> 135 fb_entry_general (const fb_entry_x_roots<Nr_roots> &e); get_q() const136 fbprime_t get_q() const {return q;} get_r(const size_t i) const137 fbroot_t get_r(const size_t i) const {return roots[i].r;}; get_proj(const size_t i) const138 fbroot_t get_proj(const size_t i) const {return roots[i].proj;}; 139 void parse_line (const char *line, unsigned long linenr); 140 void merge (const fb_entry_general &); 141 void fprint(FILE *out) const; 142 bool is_simple() const; 143 void transform_roots(transformed_entry_t &, const qlattice_basis &) const; weight() const144 double weight() const {return 1./q * nr_roots;} 145 /* Allow sorting by p */ operator <(const fb_entry_general & other) const146 bool operator<(const fb_entry_general &other) const {return this->p < other.p;} operator >(const fb_entry_general & other) const147 bool operator>(const fb_entry_general &other) const {return this->p > other.p;} 148 struct sort_byq { operator ()fb_entry_general::sort_byq149 bool operator()(fb_entry_general const & a, fb_entry_general const & b) const { return a.get_q() < b.get_q(); }; 150 }; 151 }; 152 153 template <int Nr_roots> 154 class fb_transformed_entry_x_roots { 155 public: 156 fbprime_t p; 157 std::array<fbroot_t, Nr_roots> roots; 158 std::array<bool, Nr_roots> proj; 159 static const unsigned char k = 1, nr_roots = Nr_roots; 160 /* Static class members to allow fb_vector<> to distinguish between and 161 operate on both kind of entries */ 162 static const bool is_general_type = false; 163 static const unsigned char fixed_nr_roots = Nr_roots; get_q() const164 fbprime_t get_q() const {return p;} get_r(const size_t i) const165 fbroot_t get_r(const size_t i) const {return roots[i];}; get_proj(const size_t i) const166 fbroot_t get_proj(const size_t i) const {return proj[i];}; 167 }; 168 169 /* "Simple" factor base entries. We imply q=p, k=1, oldexp=0, exp=1, 170 and projective=false for all roots. */ 171 template <int Nr_roots> 172 class fb_entry_x_roots : public _padded_pod<fb_entry_x_roots<Nr_roots> > { 173 public: 174 typedef fb_transformed_entry_x_roots<Nr_roots> transformed_entry_t; 175 fbprime_t p; 176 redc_invp_t invq; /* invq = -1/q (mod 2^32), or (mod 2^64), depending on 177 the size of redc_invp_t */ 178 std::array<fbroot_t, Nr_roots> roots; 179 /* Static class members to allow fb_vector<> to distinguish between and 180 operate on both kind of entries */ 181 static const unsigned char k = 1, nr_roots = Nr_roots; 182 static const bool is_general_type = false; 183 static const unsigned char fixed_nr_roots = Nr_roots; get_nr_roots() const184 inline int get_nr_roots() const { return Nr_roots; } 185 // fb_entry_x_roots() {}; fb_entry_x_roots(fbprime_t p,redc_invp_t invq,fbroot_t * roots)186 fb_entry_x_roots(fbprime_t p, redc_invp_t invq, fbroot_t * roots) : p(p), invq(invq) { 187 for (int i = 0; i < Nr_roots; i++) 188 this->roots[i] = roots[i]; 189 } 190 /* Allow assignment-construction from general entries */ fb_entry_x_roots(const fb_entry_general & e)191 fb_entry_x_roots(const fb_entry_general &e) : p(e.p), invq(e.invq) { 192 ASSERT_ALWAYS(Nr_roots == e.nr_roots); 193 ASSERT(e.is_simple()); 194 for (int i = 0; i < Nr_roots; i++) 195 roots[i] = e.roots[i].r; 196 } get_q() const197 fbprime_t get_q() const {return p;} get_r(const size_t i) const198 fbroot_t get_r(const size_t i) const {return roots[i];} get_proj(const size_t i MAYBE_UNUSED) const199 fbroot_t get_proj(const size_t i MAYBE_UNUSED) const {return false;} weight() const200 double weight() const {return 1./p * Nr_roots;} 201 /* Allow sorting by p */ operator <(const fb_entry_x_roots<Nr_roots> & other) const202 bool operator<(const fb_entry_x_roots<Nr_roots> &other) const {return this->p < other.p;} operator >(const fb_entry_x_roots<Nr_roots> & other) const203 bool operator>(const fb_entry_x_roots<Nr_roots> &other) const {return this->p > other.p;} 204 void fprint(FILE *) const; 205 void transform_roots(transformed_entry_t &, const qlattice_basis &) const; 206 }; 207 208 /* }}} */ 209 210 class fb_slice_interface { 211 public: 212 fb_slice_interface() = default; ~fb_slice_interface()213 virtual ~fb_slice_interface(){} 214 virtual size_t size() const = 0; 215 virtual unsigned char get_logp() const = 0; 216 virtual fbprime_t get_prime(slice_offset_t offset) const = 0; 217 virtual unsigned char get_k(slice_offset_t offset) const = 0; 218 /* global index across all fb parts */ 219 virtual slice_index_t get_index() const = 0; 220 virtual double get_weight() const = 0; 221 virtual bool is_general() const = 0; 222 virtual int get_nr_roots() const = 0; 223 }; 224 225 /* see fb_slice_weight.hpp */ 226 template<typename FB_ENTRY_TYPE> 227 class fb_slice_weight_estimator; 228 229 template<typename FB_ENTRY_TYPE> 230 class fb_slice : public fb_slice_interface { 231 friend class fb_slice_weight_estimator<FB_ENTRY_TYPE>; 232 typedef mmappable_vector<FB_ENTRY_TYPE> fb_entry_vector; 233 typename fb_entry_vector::const_iterator _begin, _end; 234 unsigned char logp; 235 slice_index_t index; /* global index across all fb parts */ 236 double weight; 237 friend struct helper_functor_subdivide_slices; fb_slice(typename fb_entry_vector::const_iterator it,unsigned char logp)238 fb_slice(typename fb_entry_vector::const_iterator it, unsigned char logp) : _begin(it), _end(it), logp(logp), index(0), weight(0) {} fb_slice(typename fb_entry_vector::const_iterator it,typename fb_entry_vector::const_iterator jt,unsigned char logp)239 fb_slice(typename fb_entry_vector::const_iterator it, typename fb_entry_vector::const_iterator jt, unsigned char logp) : _begin(it), _end(jt), logp(logp), index(0), weight(0) {} 240 public: 241 typedef FB_ENTRY_TYPE entry_t; begin() const242 inline typename fb_entry_vector::const_iterator begin() const { return _begin; } end() const243 inline typename fb_entry_vector::const_iterator end() const { return _end; } size() const244 inline size_t size() const override { return _end - _begin; } get_logp() const245 unsigned char get_logp() const override { return logp; } get_prime(slice_offset_t offset) const246 fbprime_t get_prime(slice_offset_t offset) const override { 247 /* While it may make sense to manipulate slices that exceed the 248 * expected max size during the work to construct them, it should 249 * be pretty clear that we'll never ever try to use get_prime, 250 * which is limited in its type, on a slice that has more prime 251 * ideals that this function can address */ 252 ASSERT(size() <= std::numeric_limits<slice_offset_t>::max()); 253 return _begin[offset].p; 254 } get_k(slice_offset_t offset) const255 unsigned char get_k(slice_offset_t offset) const override { 256 return _begin[offset].k; 257 /* power. Well, most often it's a constant ! We need to 258 * access it from the virtual base though. This is way we're 259 * not folding it to a template access. */ 260 } 261 /* global index across all fb parts */ get_index() const262 slice_index_t get_index() const override {return index;} get_weight() const263 double get_weight() const override {return weight;} is_general() const264 bool is_general() const override { return FB_ENTRY_TYPE::is_general_type; } 265 /* get_nr_roots() on a fb_slice returns zero for slices of general 266 * type ! */ get_nr_roots() const267 int get_nr_roots() const override { return FB_ENTRY_TYPE::fixed_nr_roots;} 268 }; 269 270 /* entries and general_entries: we declare general entries, as well 271 * as entries for all numbers of roots between 1 and MAX_ROOTS 272 * 273 * (notationally, general_entries corresponds to -1 as a number of 274 * roots). 275 * */ 276 277 template<typename T> struct entries_and_cdf { 278 typedef mmappable_vector<T> container_type; 279 typedef mmappable_vector<double> weight_container_type; 280 struct type : public container_type { 281 typedef typename entries_and_cdf<T>::container_type container_type; 282 typedef typename entries_and_cdf<T>::weight_container_type weight_container_type; 283 /* cumulative distribution function. This is set up by 284 * helper_functor_append. We use it to split into slices. 285 * weight_cdf[i] is \sum_{j < i} super[j].weight 286 * 287 * Thus we always need a lone 0 to initialize. See 288 * helper_functor_put_first_0, used in the fb_factorbase ctor. 289 */ 290 /* We *might* want to consider the cdf only for several entries 291 * at a time (say, 16), so as to minimize the cost of finding the 292 * split points */ 293 weight_container_type weight_cdf; weight_beginentries_and_cdf::type294 inline weight_container_type::const_iterator weight_begin() const { 295 return weight_cdf.begin(); 296 } weight_endentries_and_cdf::type297 inline weight_container_type::const_iterator weight_end() const { 298 return weight_cdf.end(); 299 } weight_sizeentries_and_cdf::type300 inline weight_container_type::size_type weight_size() const { 301 return weight_cdf.size(); 302 } weight_deltaentries_and_cdf::type303 double weight_delta(size_t a, size_t b) const { 304 return weight_cdf[b] - weight_cdf[a]; 305 } weight_deltaentries_and_cdf::type306 double weight_delta(typename container_type::const_iterator a, typename container_type::const_iterator b) const { 307 return weight_cdf[b-container_type::begin()] - weight_cdf[a-container_type::begin()]; 308 } weight_cdf_atentries_and_cdf::type309 double weight_cdf_at(size_t a) const { 310 return weight_cdf[a]; 311 } weight_cdf_atentries_and_cdf::type312 double weight_cdf_at(typename container_type::const_iterator a) const { 313 return weight_cdf[a-container_type::begin()]; 314 } 315 }; 316 }; 317 template<int n> struct fb_entries_factory { 318 typedef typename entries_and_cdf<fb_entry_x_roots<n>>::type type; 319 }; 320 template<> struct fb_entries_factory<-1> { 321 typedef typename entries_and_cdf<fb_entry_general>::type type; 322 }; 323 template<int n> struct fb_slices_factory { 324 typedef std::vector<fb_slice<fb_entry_x_roots<n>>> type; 325 }; 326 template<> struct fb_slices_factory<-1> { 327 typedef std::vector<fb_slice<fb_entry_general>> type; 328 }; 329 330 class fb_factorbase { 331 public: 332 /* Has to be <= MAX_DEGREE */ 333 static const int MAX_ROOTS = MAX_DEGREE; 334 335 private: 336 337 cxx_mpz_poly f; 338 int side; 339 unsigned long lim; 340 unsigned long powlim; 341 342 public: empty() const343 bool empty() const { return lim == 0; } 344 345 private: 346 typedef multityped_array<fb_entries_factory, -1, MAX_ROOTS+1> entries_t; 347 entries_t entries; 348 349 public: 350 typedef std::array<size_t, MAX_ROOTS+2> threshold_pos; 351 threshold_pos get_threshold_pos(fbprime_t) const; 352 353 private: 354 /* The threshold position cache is used to accelerate the creation of 355 * slicings. */ 356 mutable std::map<fbprime_t, threshold_pos> threshold_pos_cache; 357 358 /* {{{ append. This inserts a pool of entries to the factor base. We 359 * do it with many entries in a row so as to avoid looping MAX_ROOTS 360 * times for each root, and so that we don't feel sorry to use 361 * multityped_array_foreach (even though in truth, it's quite 362 * probably fine) 363 */ 364 private: 365 struct helper_functor_append; 366 public: 367 void append(std::list<fb_entry_general> &pool); 368 /* }}} */ 369 370 /* {{{ Various ways to count primes, prime ideals, and hit ratio 371 * ("weight") of entries in the whole factor base, or in subranges 372 */ 373 private: 374 struct helper_functor_count_primes; 375 struct helper_functor_count_prime_ideals; 376 struct helper_functor_count_weight; 377 struct helper_functor_count_combined; 378 struct helper_functor_count_primes_interval; 379 struct helper_functor_count_prime_ideals_interval; 380 struct helper_functor_count_weight_interval; 381 struct helper_functor_count_combined_interval; 382 public: 383 size_t count_primes() const; 384 size_t count_prime_ideals() const; 385 size_t count_weight() const; 386 /* }}} */ 387 388 389 /* the key type lists the things with respect to which we're willing 390 * to divide the views on our factor base. 391 */ 392 struct key_type { 393 std::array<fbprime_t, FB_MAX_PARTS> thresholds; 394 fbprime_t td_thresh; 395 fbprime_t skipped; 396 double scale; 397 /* This might seem non obvious, but this parameters controls 398 * the size of the slices, because we want to enforce some 399 * relatively-even division. It's not entirely clear that we 400 * want it here, but we definitely want it somewhere. */ 401 unsigned int nb_threads; 402 operator <fb_factorbase::key_type403 bool operator<(key_type const& x) const { 404 int r; 405 for(int i = 0; i < FB_MAX_PARTS; ++i) { 406 r = (thresholds[i] > x.thresholds[i]) - (x.thresholds[i] > thresholds[i]); 407 if (r) return r < 0; 408 } 409 r = (td_thresh > x.td_thresh) - (x.td_thresh > td_thresh); 410 if (r) return r < 0; 411 r = (skipped > x.skipped) - (x.skipped > skipped); 412 if (r) return r < 0; 413 return scale < x.scale; 414 } 415 }; 416 417 public: 418 class slicing { 419 public: 420 struct stats_type { 421 /* explicit-initializing as below forces zero-initialization 422 * of members */ 423 std::array<size_t, FB_MAX_PARTS> primes {{}}; 424 std::array<size_t, FB_MAX_PARTS> ideals {{}}; 425 std::array<double, FB_MAX_PARTS> weight {{}}; 426 }; 427 stats_type stats; 428 429 class part { 430 /* slices and general_slices: actually general_slices is 431 * slices for number of roots -1, and that is 432 * always a vector of length 1. And we have slices (vectors 433 * of fb_slice objects) for all numbers of roots between 1 and 434 * MAX_ROOTS. 435 */ 436 typedef multityped_array<fb_slices_factory, -1, MAX_ROOTS+1> slices_t; 437 friend struct helper_functor_subdivide_slices; 438 slices_t slices; 439 slice_index_t first_slice_index = 0; 440 public: 441 template<int n> get_slices_vector_for_nroots()442 typename fb_slices_factory<n>::type& get_slices_vector_for_nroots() { 443 return slices.get<n>(); 444 } 445 private: 446 447 /* the general vector (the one with index -1 in the 448 * multityped array) is made of "anything that is not 449 * simple" (as per the logic that used to be in 450 * fb_part::append and fb_entry_general::is_simple). That means: 451 * - all powers go to the general vector 452 * - everything below bkthresh, too (but see below). 453 * - primes with multiple roots as well (but to be honest, 454 * it's not entirely clear to me why -- after all, there's 455 * no special treatment to speak of). 456 */ 457 458 friend class slicing; 459 460 /* We use this to access our arrays called slices and 461 * general_slices. 462 * 463 * 0 <= i <slicesG.size() --> return &(slicesG[i]) 464 * slicesG.size() <= i < slices0.size() --> return &(slices0[i-slicesG.size()]) 465 * slices0.size() <= i < slices1.size() --> return &(slices1[i-slicesG.size()-slices0.size()]) 466 * and so on. 467 */ 468 struct helper_functor_get { 469 typedef fb_slice_interface const * type; 470 typedef slice_index_t key_type; 471 template<typename T> operator ()fb_factorbase::slicing::part::helper_functor_get472 type operator()(T const & x, slice_index_t & k) { 473 if ((size_t) k < x.size()) { 474 return &(x[k]); 475 } else { 476 k -= x.size(); 477 return NULL; 478 } 479 } 480 }; 481 482 /* index is the global index across all fb parts */ get(slice_index_t index) const483 fb_slice_interface const * get(slice_index_t index) const { 484 /* used to be in fb_vector::get_slice 485 * 486 * and in fb_part::get_slice for the lookup of the 487 * vector. 488 * 489 * TODO: dichotomy, perhaps ? Can we do the dichotomy 490 * elegantly ? 491 */ 492 if (index < first_slice_index) return NULL; 493 slice_index_t idx = index - first_slice_index; 494 if (idx >= nslices()) { 495 // index = idx - nslices(); 496 return NULL; 497 } 498 const fb_slice_interface * res = multityped_array_locate<helper_functor_get>()(slices, idx); 499 ASSERT_ALWAYS(res); 500 ASSERT_ALWAYS(res->get_index() == index); 501 return res; 502 } 503 504 /* {{{ use caching for the number of slices, as it's a handy 505 * thing to query */ 506 mutable slice_index_t _nslices = std::numeric_limits<slice_index_t>::max(); 507 struct helper_functor_nslices { 508 template<typename T> operator ()fb_factorbase::slicing::part::helper_functor_nslices509 slice_index_t operator()(slice_index_t t, T const & x) const { 510 return t + x.size(); 511 } 512 }; 513 public: nslices() const514 slice_index_t nslices() const { 515 if (_nslices != std::numeric_limits<slice_index_t>::max()) 516 return _nslices; 517 helper_functor_nslices N; 518 return _nslices = multityped_array_fold(N, 0, slices); 519 } 520 /* }}} */ 521 522 private: 523 template<typename F> 524 struct foreach_slice_s { 525 F & f; 526 template<typename T> operator ()fb_factorbase::slicing::part::foreach_slice_s527 void operator()(T & x) { 528 for(auto & a : x) 529 f(a); 530 } 531 template<typename T> operator ()fb_factorbase::slicing::part::foreach_slice_s532 void operator()(T const & x) { 533 for(auto const & a : x) 534 f(a); 535 } 536 }; 537 public: 538 template<typename F> foreach_slice(F & f)539 void foreach_slice(F & f) { 540 foreach_slice_s<F> FF { f }; 541 multityped_array_foreach(FF, slices); 542 } 543 template<typename F> foreach_slice(F & f) const544 void foreach_slice(F & f) const { 545 foreach_slice_s<F> FF { f }; 546 multityped_array_foreach(FF, slices); 547 } 548 /* 549 * old g++ seems to have difficulties with this variant, and 550 * is puzzled by the apparent ambiguity -- newer g++ groks it 551 * correctly, as does clang 552 template<typename F> 553 void foreach_slice(F && f) { 554 multityped_array_foreach(foreach_slice_s<F> { f }, slices); 555 } 556 template<typename F> 557 void foreach_slice(F && f) const { 558 multityped_array_foreach(foreach_slice_s<F> { f }, slices); 559 } 560 */ 561 562 /* index: global index across all fb parts */ operator [](slice_index_t index) const563 fb_slice_interface const & operator[](slice_index_t index) const { 564 /* This bombs out at runtime if get returns NULL, but 565 * then it should be an indication of a programmer 566 * mistake. 567 */ 568 return *get(index); 569 } 570 }; 571 572 private: 573 574 /* We have "parts" that correspond to the various layers of the 575 * bucket sieving 576 * 577 * ==> THERE IS NO "part" FOR THE SMALL SIEVE <== 578 * 579 * This means that parts[0] will always be a meaningless 580 * placeholder. Indeed, the small sieve does not care about 581 * slices! 582 */ 583 std::array<part, FB_MAX_PARTS> parts; 584 585 /* toplevel is set by the ctor */ 586 int toplevel = 0; 587 588 /* index: global index across all fb parts */ get(slice_index_t index) const589 fb_slice_interface const * get(slice_index_t index) const { 590 for (auto const & p : parts) { 591 if (index < p.first_slice_index + p.nslices()) 592 return p.get(index); 593 } 594 return NULL; 595 } 596 public: 597 get_part(int i) const598 part const & get_part(int i) const { return parts[i]; } 599 get_toplevel() const600 inline int get_toplevel() const { return toplevel; } 601 602 /* This accesses the *fb_slice* with this index. Not the vector of 603 * slices ! 604 * index: global index across all fb parts */ operator [](slice_index_t index) const605 fb_slice_interface const & operator[](slice_index_t index) const { 606 return *get(index); 607 } 608 609 public: 610 611 /* Here, when computing the slices, we stow away the small 612 * primes, and arrange them according to the internal logic of 613 * the small sieve. In particular, we want to keep track of where 614 * the resieved primes are. 615 * 616 * Note that the data that we prepare here is still not attached 617 * to any special-q. We're replacing what used to be done in 618 * init_fb_smallsieved, but not in small_sieve_init. 619 * 620 * Primes below the bucket-sieve threshold are small-sieved. Some 621 * will be treated specially when cofactoring: 622 * 623 * - primes such that nb_roots / p >= 1/tdhresh will be 624 * trial-divided. 625 * - primes (ideals) that are not trial divided and that are not 626 * powers are resieved. 627 * 628 * And of course, depending on the special-q, small-sieved prime 629 * ideals become projective, and therefore escape the general 630 * treatment. 631 * 632 * Note that the condition on trial division is not clear-cut 633 * w.r.t p. The small sieve code is somewhat dependent on the 634 * number of hits per row, which is I/p. And it matters to have 635 * small-sieved primes sorted according to this value. Hence, as 636 * p increases, we might have: 637 * small p's that are trial-divided because 1/p >= 638 * 1/td_thresh. 639 * some p's with p<=2*td_thresh, among which some are trial 640 * divided if 2 roots or more are above, otherwise they're 641 * resieved. 642 * some p's with p<=3*td_thresh, again with a distinction... 643 * and so on up to p>d*tdshresh, (d being the polynomial 644 * degree). Above this bound we're sure that we no longer see 645 * any trial-divided prime. 646 * 647 * For each slicing, we elect to compute two copies of the lists 648 * of prime ideals below bkthresh: 649 * - first, prime ideals that we know will be resieved. Those 650 * form the largest part of the list. 651 * - second, prime ideals above primes that will be trial-divided. 652 * 653 */ 654 655 /* This contains all factor base prime ideals below the bucket 656 * threshold. */ 657 struct small_sieve_entries_t { 658 /* general ones. Not powers, not trial-divided ones. */ 659 std::vector<fb_entry_general> resieved; 660 /* the rest. some are powers, others are trial-divided */ 661 std::vector<fb_entry_general> rest; 662 /* from "rest" above, we can infer the list of trial-divided 663 * primes by merely restricting to entries with k==1 */ 664 665 /* this is sort of a trash can. We won't use these primes for 666 * the small sieve, but they do matter for trial division */ 667 std::vector<unsigned long> skipped; 668 }; 669 small_sieve_entries_t small_sieve_entries; 670 /* TODO: I doubt that the struct above will stay. Seems awkward. 671 * We'd better have a single vector, and the position of the 672 * "end-of-resieved" thing. 673 */ 674 675 template<typename T> foreach_slice(T & f)676 void foreach_slice(T & f) { 677 for (auto & p : parts) { 678 p.foreach_slice(f); 679 } 680 } 681 template<typename T> foreach_slice(T && f)682 void foreach_slice(T && f) { 683 for (auto & p : parts) { 684 p.foreach_slice(f); 685 } 686 } 687 688 slicing() = default; 689 slicing(fb_factorbase const & fb, key_type const & K); 690 }; 691 692 private: 693 lock_guarded_container<std::map<key_type, slicing>> cache; 694 int read(const char * const filename); 695 696 public: 697 /* accessors. 698 * As in the std::map case, the non-const accessor is allowed to 699 * create stuff. */ 700 operator [](key_type const & K)701 inline slicing & operator[](key_type const& K) { 702 std::lock_guard<std::mutex> foo(cache.mutex()); 703 auto it = cache.find(K); 704 if (it != cache.end()) 705 return it->second; 706 707 /* create a new slot */ 708 slicing & res(cache[K] = slicing(*this, K)); 709 return res; 710 } 711 operator [](key_type const & K) const712 inline slicing const & operator[](key_type const& K) const { 713 return cache.at(K); 714 } 715 716 private: 717 void make_linear(); 718 void make_linear_threadpool (unsigned int nb_threads); 719 720 public: 721 /* Note that the intent here is to read the factor base once and 722 * for all. In the descent context, where we have several 723 * different fb lim parameters, we should get away with different 724 * slicings, instead of trying to redo the factor base 725 * initialization. 726 */ 727 fb_factorbase(cxx_cado_poly const & cpoly, int side, cxx_param_list & pl, const char * fbc_filename, int nthreads = 1); 728 fb_factorbase() = default; 729 fb_factorbase(fb_factorbase &&) = default; 730 fb_factorbase& operator=(fb_factorbase &&) = default; 731 732 private: 733 struct sorter { 734 template<typename T> operator ()fb_factorbase::sorter735 void operator()(T & x) { 736 /* not entirely clear to me. Shall we sort by q or by p ? 737 */ 738 typedef typename T::value_type X; 739 auto by_q = [](X const & a, X const & b) { return a.get_q() < b.get_q(); }; 740 /* 741 auto by_p_then_q = [](X const & a, X const & b) { 742 return 743 a.get_p() < b.get_p() || 744 a.get_p() == b.get_p() && 745 a.get_q() < b.get_q(); 746 }; 747 */ 748 std::sort(x.begin(), x.end(), by_q); 749 } 750 }; 751 752 public: finalize()753 void finalize() { 754 sorter S; 755 multityped_array_foreach(S, entries); 756 } 757 }; 758 759 std::ostream& operator<<(std::ostream& o, fb_factorbase::key_type const &); 760 761 unsigned char fb_log (double x, double y, double z); 762 unsigned char fb_log_delta (fbprime_t, unsigned long, unsigned long, double); 763 fbprime_t fb_is_power (fbprime_t, unsigned long *); 764 765 #endif /* FB_HPP_ */ 766