1 /*
2
3 We want a function that checks whether a relation is, with high probability,
4 a duplicate of an earlier relation.
5
6 "Earlier" requires that we define some ordering on relations. Note that we may
7 sieve special-q on both sides for some factorisations, in particular SNFS.
8 It is possible that the same prime occurs on both sides if it divides the
9 two polynomials' resultant, which likewise may occur for SNFS.
10
11 We define the sieving order as sieving special-q on the algebraic side of
12 norm n if n is in the special-q range for the algebraic side,
13 then on the rational side of norm n if n is in the special-q range for the
14 rational side, then increasing n by 1.
15
16 Let Q_0 be the primes in the special-q range on the algebraic side,
17 and Q_1 on the rational side. If we sieve only one side i \in {0,1},
18 then the set Q_{1-i} is empty.
19
20 When we sieve special-q on only one side i, a relation is "earlier"
21 simply if it contains a prime on side i that is in Q_i and smaller than
22 the current q. (1)
23
24 If we sieve both sides, a relation is earlier if
25 - (1) holds, or
26 - we sieve q on the algebraic side and it contains a prime q' on the rational
27 side in Q_1 less than q
28 - we sieve q on the rational side and it contains a prime q' on the algebraic
29 side in Q_0 less than or equal to q
30
31 NOTE: this strategy for the case where we sieve two sides is not implemented.
32 Currently, las has no way to know about it.
33
34 Then we must go through the various steps the siever does to identify
35 relations and check the conditions that cause the relation to be found or
36 missed.
37
38 The relation is a dupe if:
39 - It may have been found by an "earlier" special-q $q'$
40 - The relation must be in the I,J sieving area that was used when sieving in
41 the $q'$-lattice
42 - The estimated norm, minus the log of the FB primes, is within the report
43 threshold. This is tricky, as there are many shortcuts to norm estimation,
44 and the contribution of FB primes depends on the choice of log scale and
45 on limits on prime powers that are sieved
46 - The cofactors of the two norms, without q' but with q on the special-q side,
47 are within the cofactor bounds mfb[ar]
48 - The cofactors on both sides can be factored by the cofactorization routines
49 that were used when sieving q'.
50
51 Thus the function to check for duplicates needs the following information:
52
53 - The relation with fully factored norms
54 - The special-q sieve range on the two sides
55 - For each q' that was sieved "earlier"
56 - The parameters used for generating the special-q' lattice (skewness)
57 - The I,J values
58 - The log scale
59 - The cofactor bounds mfb[ar]
60 - The cofactorization parameters
61
62 */
63
64 #include "cado.h" // IWYU pragma: keep
65
66 #include <algorithm>
67 #include <cinttypes> // for PRId8, PRId64, PRIu64
68 #include <climits> // for ULONG_MAX
69 #include <cstdio> // for size_t
70 #include <array> // for array, array<>::value_type
71 #include <cstdint> // for uint64_t, uint8_t, int64_t, uin...
72 #include <iosfwd> // for ostringstream, ostream
73 #include <memory> // for allocator_traits<>::value_type
74 #include <string> // for basic_string
75 #include <vector> // for vector
76 #include <cstdarg> // IWYU pragma: keep
77 #include <gmp.h> // for mpz_srcptr, gmp_vfprintf, mpz_g...
78
79 #include "cado_poly.h"
80 #include "cxx_mpz.hpp"
81 #include "las-duplicate.hpp"
82 #include "ecm/facul.hpp" // for facul_strategies_t
83 #include "fb.hpp" // for fb_log
84 #include "gmp_aux.h"
85 #include "las-choose-sieve-area.hpp" // for choose_sieve_area
86 #include "las-cofactor.hpp" // for check_leftover_norm, factor_bot...
87 #include "las-coordinates.hpp" // for ABToIJ
88 #include "las-norms.hpp" // for lognorm_smart
89 #include "las-qlattice.hpp" // for operator<<, qlattice_basis
90 #include "las-siever-config.hpp" // for siever_config::side_config, sie...
91 #include "las-todo-entry.hpp" // for las_todo_entry
92 #include "macros.h" // for ASSERT_ALWAYS
93 #include "mpz_poly.h"
94 #include "relation.hpp" // for relation
95 #include "verbose.h"
96
97
98 /* default verbose level of # DUPECHECK lines */
99 #define VERBOSE_LEVEL 2
100
101 struct sq_with_fac {
102 uint64_t q;
103 std::vector<uint64_t> facq;
104 };
105
106 // Warning: the entry we create here does not know whether sq is prime
107 // or composite (it assumes prime). This is fragile!!!
108 //
109 // XXX errr. I don't get it. If r=a/b mod all divisors of q, then this
110 // ought to hold mod q as well, right ?
111 //
112 // if we want to deal with the case where b is not coprime to q, the
113 // problem we have is that of doing a CRT on the projective line. Which
114 // is doable (well, to start with, the projective line element we're
115 // looking for is (a:b) anyway). But
116 // down the line, we stumble on the fact that just a plain integer isn't
117 // appropriate to represent elements of P1(Z/qZ) when q is composite.
118 //
special_q_from_ab(const int64_t a,const uint64_t b,sq_with_fac const & sq,int side)119 static las_todo_entry special_q_from_ab(const int64_t a, const uint64_t b, sq_with_fac const & sq, int side)
120 {
121 cxx_mpz p, r;
122
123 mpz_set_ui(p, sq.q);
124 mpz_set_uint64(r, b);
125 int ret = mpz_invert(r, r, p);
126 ASSERT_ALWAYS(ret);
127 mpz_mul_int64(r, r, a);
128 mpz_mod(r, r, p);
129
130 return las_todo_entry(p, r, side);
131 }
132
133 /* If e == 0, returns 1. Otherwise, if b > lim, returns b.
134 Otherwise returns the largest b^k with k <= e and b^k <= lim. */
135 static unsigned long
bounded_pow(const unsigned long b,const unsigned long e,const unsigned long lim)136 bounded_pow(const unsigned long b, const unsigned long e, const unsigned long lim)
137 {
138 if (e == 0) {
139 return 1;
140 }
141 if (b > lim) {
142 return b;
143 }
144 unsigned long r = b;
145 /* overflow is the largest ulong that can be multiplied by b without
146 overflowing */
147 unsigned long overflow = ULONG_MAX / b;
148 for (unsigned long i = 1; i < e && r <= overflow; i++) {
149 unsigned long t = r * b;
150 if (t > lim) {
151 break;
152 }
153 r = t;
154 }
155 return r;
156 }
157
158 /*
159 * We subtract the sieve contribution of the factor base primes.
160 * I.e., for each p^k || F(a,b) with p <= fbb:
161 * if k > 1 then we reduce k if necessary s.t. p^k <= powlim, but never to k < 1
162 * set l := l - log_b(p^k)
163 */
164 static unsigned char
subtract_fb_log(const unsigned char lognorm,double scale,unsigned long lim,unsigned long powlim,relation const & rel,int side,las_todo_entry const & doing)165 subtract_fb_log(const unsigned char lognorm,
166 double scale,
167 unsigned long lim, unsigned long powlim,
168 relation const& rel,
169 int side,
170 las_todo_entry const & doing)
171 {
172 unsigned char new_lognorm = lognorm;
173
174 for (unsigned int i = 0; i < rel.sides[side].size(); i++) {
175 const unsigned long p = mpz_get_ui(rel.sides[side][i].p);
176 int e = rel.sides[side][i].e;
177 ASSERT_ALWAYS(e > 0);
178 if (p >= lim)
179 continue;
180
181 // log(sq) is already subtracted from the lognorm we've received on
182 // input.
183 // For prime factors of sq, we need to count a smaller valuation
184 // because powlim doesn't have to be that large.
185 // (note that the list is empty if sq is prime).
186 if (side == doing.side)
187 for (auto const & f : doing.prime_factors)
188 e -= (p == f);
189
190 if (e == 0)
191 continue;
192
193 const unsigned long p_pow = bounded_pow(p, e, powlim);
194 const unsigned char p_pow_log = fb_log(p_pow, scale, 0);
195
196 if (p_pow_log > new_lognorm) {
197 new_lognorm = 0;
198 } else {
199 new_lognorm -= p_pow_log;
200 }
201 }
202 return new_lognorm;
203 }
204
205 /* Return true if the relation is probably a duplicate of a relation found
206 * when sieving [doing].
207 * Return false if it is probably not a duplicate */
208 static bool
sq_finds_relation(las_info const & las,las_todo_entry const & doing,siever_config const & conf,qlattice_basis const & Q,uint32_t J,relation const & rel,bool must,bool talk)209 sq_finds_relation(las_info const & las,
210 las_todo_entry const & doing,
211 siever_config const & conf,
212 qlattice_basis const & Q,
213 uint32_t J,
214 relation const& rel,
215 bool must,
216 bool talk)
217 {
218 int logI = conf.logI;
219
220 if (talk) { // Print some info
221 verbose_output_vfprint(0, VERBOSE_LEVEL, gmp_vfprintf,
222 "# DUPECHECK Checking if relation"
223 " (a,b) = (%" PRId64 ",%" PRIu64 ")"
224 " is a dupe of sieving special-q -q0 %Zd -rho %Zd\n",
225 rel.a, rel.b,
226 (mpz_srcptr) doing.p,
227 (mpz_srcptr) doing.r);
228 // should stay consistent with "Sieving side" line printed in
229 // do_one_special_q()
230 std::ostringstream os;
231 os << Q;
232 verbose_output_print(0, VERBOSE_LEVEL,
233 "# DUPECHECK Trying %s; I=%u; J=%d;\n",
234 os.str().c_str(),
235 1u << conf.logI, J);
236 }
237
238 /* Compute i,j-coordinates of this relation in the special-q lattice when
239 p was used as the special-q value. */
240 int i;
241 unsigned int j;
242 {
243 int ok;
244 ok = ABToIJ(i, j, rel.a, rel.b, Q);
245 if (!must && !ok) return false;
246 ASSERT_ALWAYS(ok);
247 }
248
249 /* If the coordinate is outside the i,j-region used when sieving
250 the special-q described in [doing], then it's not a duplicate */
251 if (j >= J || (i < -(1L << (logI-1))) || (i >= (1L << (logI-1))))
252 {
253 if (talk) verbose_output_print(0, VERBOSE_LEVEL,
254 "# DUPECHECK (i,j) = (%d, %u) is outside sieve region\n", i, j);
255 return false;
256 }
257
258 uint8_t remaining_lognorm[2];
259 bool is_dupe = true;
260 for (int side = 0; side < 2; side++) {
261 lognorm_smart L(conf, las.cpoly, side, Q, conf.logI, J);
262
263 /* This lognorm is the evaluation of fij(i,j)/q on the side with the
264 * special-q, so we don't have to subtract it.
265 */
266 const uint8_t lognorm = L.lognorm(i,j);
267
268 remaining_lognorm[side] = subtract_fb_log(lognorm, L.scale,
269 conf.sides[side].lim,
270 conf.sides[side].powlim,
271 rel,
272 side,
273 doing);
274
275 if (remaining_lognorm[side] > L.bound) {
276 if (talk) verbose_output_print(0, VERBOSE_LEVEL, "# DUPECHECK On side %d, remaining lognorm = %" PRId8 " > bound = %" PRId8 "\n",
277 side, remaining_lognorm[side], L.bound);
278 is_dupe = false;
279 }
280 }
281 if (talk) verbose_output_print(0, VERBOSE_LEVEL,
282 "# DUPECHECK relation had i=%d, j=%u, remaining lognorms %" PRId8 ", %" PRId8 "\n",
283 i, j, remaining_lognorm[0], remaining_lognorm[1]);
284 if (!is_dupe) {
285 return false;
286 }
287
288 /* Compute the exact cofactor on each side. This is similar to what we
289 * do in subtract_fb_log -- except that now we do the part *above* the
290 * factor base. */
291 std::array<cxx_mpz, 2> cof;
292 for (int side = 0; side < 2; side++) {
293 mpz_set_ui(cof[side], 1);
294 unsigned long lim = conf.sides[side].lim;
295
296 for (unsigned int i = 0; i < rel.sides[side].size(); i++) {
297 const unsigned long p = mpz_get_ui(rel.sides[side][i].p);
298 int e = rel.sides[side][i].e;
299 ASSERT_ALWAYS(e > 0);
300 if (p <= lim) {
301 continue;
302 }
303
304 // Remove one occurrence of each factor of sq on side sq_side.
305 if (side == doing.side)
306 for (auto const & f : doing.prime_factors)
307 e -= (p == f);
308
309 for (int i = 0; i < e; ++i) {
310 mpz_mul_ui(cof[side], cof[side], p);
311 }
312 }
313 }
314
315 /* Check that the cofactors are within the mfb bound */
316 for (int side = 0; side < 2; ++side) {
317 if (!check_leftover_norm (cof[side], conf.sides[side])) {
318 verbose_output_vfprint(0, VERBOSE_LEVEL, gmp_vfprintf,
319 "# DUPECHECK cofactor %Zd is outside bounds\n",
320 (__mpz_struct*) cof[side]);
321 return false;
322 }
323 }
324
325 std::array<std::vector<cxx_mpz>, 2> f;
326 facul_strategies_t const * strategies = las.get_strategies(conf);
327 int pass = factor_both_leftover_norms(cof, f, {{conf.sides[0].lim, conf.sides[1].lim}}, strategies);
328
329 if (pass <= 0) {
330 if (talk) verbose_output_vfprint(0, VERBOSE_LEVEL, gmp_vfprintf,
331 "# DUPECHECK norms not both smooth, left over factors: %Zd, %Zd\n",
332 (mpz_srcptr) cof[0], (mpz_srcptr) cof[1]);
333 return false;
334 }
335
336 return true;
337 }
338
339 bool
sq_finds_relation(las_info const & las,las_todo_entry const & doing,siever_config const & conf,qlattice_basis const & Q,uint32_t J,relation const & rel)340 sq_finds_relation(las_info const & las,
341 las_todo_entry const & doing,
342 siever_config const & conf,
343 qlattice_basis const & Q,
344 uint32_t J,
345 relation const& rel)
346 {
347 return sq_finds_relation(las, doing, conf, Q, J, rel, false, false);
348 }
349
350 static bool
sq_finds_relation(las_info const & las,las_todo_entry const & doing,relation const & rel,bool must,bool talk)351 sq_finds_relation(las_info const & las,
352 las_todo_entry const & doing,
353 relation const& rel,
354 bool must,
355 bool talk)
356 {
357 siever_config conf;
358 qlattice_basis Q;
359 uint32_t J;
360 if (!choose_sieve_area(las, doing, conf, Q, J)) {
361 if (talk) verbose_output_print(0, VERBOSE_LEVEL, "# DUPECHECK q-lattice discarded\n");
362 return false;
363 }
364 return sq_finds_relation(las, doing, conf, Q, J, rel, must, talk);
365 }
366
367
368 /* This function decides whether the given (sq,side) was previously
369 * sieved (compared to the current special-q stored in [doing]).
370 * This takes qmin and qmax into account, on the side of the sq. The side
371 * doing.side is irrelevant here.
372 */
373 static int
sq_was_previously_sieved(las_info const & las,const uint64_t sq,int side,las_todo_entry const & doing)374 sq_was_previously_sieved (las_info const & las, const uint64_t sq, int side, las_todo_entry const & doing)
375 {
376 /* we use <= and not < since this function is also called with the
377 * current special-q */
378 if (mpz_cmp_uint64(doing.p, sq) <= 0)
379 return 0;
380
381 return sq >= las.dupqmin[side] && sq < las.dupqmax[side];
382 }
383
384 // Warning: this function works with side effects:
385 // - it is recursive
386 // - it destroys its argument along the way
387 // - it allocates the result it returns; the caller must free it.
388 // Keep only products that fit in 64 bits.
389 static std::vector<sq_with_fac>
all_multiples(std::vector<uint64_t> & prime_list)390 all_multiples(std::vector<uint64_t> & prime_list) {
391 if (prime_list.empty()) {
392 std::vector<sq_with_fac> res;
393 return res;
394 }
395
396 uint64_t p = prime_list.back();
397 prime_list.pop_back();
398
399 if (prime_list.empty()) {
400 std::vector<sq_with_fac> res;
401 res.push_back(sq_with_fac { 1, std::vector<uint64_t> () });
402 res.push_back(sq_with_fac { p, std::vector<uint64_t> (1, p) });
403 return res;
404 }
405
406 std::vector<sq_with_fac> res = all_multiples(prime_list);
407
408 size_t L = res.size();
409 cxx_mpz pp;
410 mpz_set_uint64(pp, p);
411 for (size_t i = 0; i < L; ++i) {
412 std::vector<uint64_t> facq;
413 facq = res[i].facq;
414 facq.push_back(p);
415
416 cxx_mpz prod;
417 mpz_mul_uint64(prod, pp, res[i].q);
418 if (mpz_fits_uint64_p(prod)) {
419 uint64_t q = mpz_get_uint64(prod);
420 struct sq_with_fac entry = {q, facq};
421 res.push_back(entry);
422 }
423 }
424 return res;
425 }
426
427 /* Return 1 if the relation, obtained from sieving [doing], is probably a
428 * duplicate of a relation found "earlier", and 0 if it is probably not a
429 * duplicate
430 *
431 * We may imagine having different cofactoring strategies depending on q,
432 * in which case the code below is incorrect. However, for normal
433 * (non-dlp-descent) sieving, there does not seem to be a compelling
434 * argument for that.
435 */
436 int
relation_is_duplicate(relation const & rel,las_todo_entry const & doing,las_info const & las)437 relation_is_duplicate(relation const& rel,
438 las_todo_entry const & doing,
439 las_info const& las)
440 {
441 /* If the special-q does not fit in an unsigned long, we assume it's not a
442 duplicate and just move on */
443 if (doing.is_prime() && !mpz_fits_uint64_p(doing.p)) {
444 return false;
445 }
446
447 /* If any large prime doesn't fit in a 64-bit integer, then we assume
448 * that we're doing the dlp desecent, in which case we couldn't care
449 * less about duplicates check anyway.
450 */
451 for(int side = 0 ; side < 2 ; side++) {
452 for(unsigned int i = 0 ; i < rel.sides[side].size() ; i++) {
453 if (!mpz_fits_uint64_p(rel.sides[side][i].p))
454 return false;
455 }
456 }
457
458 for(int side = 0 ; side < 2 ; side++) {
459 /* It is allowed to have special-q on both sides, and we wish to
460 * check "cross" special-q combinations.
461 */
462
463 // Step 1: prepare a list of potential special-q on this side.
464 // They involve only prime factors within the allowed bounds
465 std::vector<uint64_t> prime_list;
466
467 for(unsigned int i = 0 ; i < rel.sides[side].size() ; i++) {
468 uint64_t p = mpz_get_uint64(rel.sides[side][i].p);
469
470 // can this p be part of valid sq ?
471 if (! las.allow_composite_q) {
472 /* this check is also done in sq_was_previously_sieved,
473 * but it's easy enough to skip the divisibility test
474 * when we know there's no point.
475 */
476 if ((p < las.dupqmin[side]) || (p >= las.dupqmax[side])) {
477 continue;
478 }
479 } else {
480 if (!las.is_in_qfac_range(p))
481 continue;
482 }
483
484 /* projective primes are currently not allowed for composite
485 * special-q */
486 if (mpz_divisible_ui_p(mpz_poly_lc(las.cpoly->pols[side]), p))
487 continue;
488
489 // push it in the list of potential factors of sq
490 prime_list.push_back(p);
491 }
492
493 for (auto const & sq : all_multiples(prime_list)) {
494 // keep sq only if it was sieved before [doing]
495 if (!sq_was_previously_sieved(las, sq.q, side, doing))
496 continue;
497
498 // emulate sieving for the valid sq, and check if it finds our
499 // relation.
500 las_todo_entry other = special_q_from_ab(rel.a, rel.b, sq, side);
501
502 bool is_dupe = sq_finds_relation(las, other, rel, true, true);
503 verbose_output_print(0, VERBOSE_LEVEL,
504 "# DUPECHECK relation is probably%s a dupe\n",
505 is_dupe ? "" : " not");
506 if (is_dupe) return true;
507 }
508 }
509
510 return false;
511 }
512