1 /*
2 
3 We want a function that checks whether a relation is, with high probability,
4 a duplicate of an earlier relation.
5 
6 "Earlier" requires that we define some ordering on relations. Note that we may
7 sieve special-q on both sides for some factorisations, in particular SNFS.
8 It is possible that the same prime occurs on both sides if it divides the
9 two polynomials' resultant, which likewise may occur for SNFS.
10 
11 We define the sieving order as sieving special-q on the algebraic side of
12 norm n if n is in the special-q range for the algebraic side,
13 then on the rational side of norm n if n is in the special-q range for the
14 rational side, then increasing n by 1.
15 
16 Let Q_0 be the primes in the special-q range on the algebraic side,
17 and Q_1 on the rational side. If we sieve only one side i \in {0,1},
18 then the set Q_{1-i} is empty.
19 
20 When we sieve special-q on only one side i, a relation is "earlier"
21 simply if it contains a prime on side i that is in Q_i and smaller than
22 the current q. (1)
23 
24 If we sieve both sides, a relation is earlier if
25 - (1) holds, or
26 - we sieve q on the algebraic side and it contains a prime q' on the rational
27   side in Q_1 less than q
28 - we sieve q on the rational side and it contains a prime q' on the algebraic
29   side in Q_0 less than or equal to q
30 
31 NOTE: this strategy for the case where we sieve two sides is not implemented.
32 Currently, las has no way to know about it.
33 
34 Then we must go through the various steps the siever does to identify
35 relations and check the conditions that cause the relation to be found or
36 missed.
37 
38 The relation is a dupe if:
39 - It may have been found by an "earlier" special-q $q'$
40 - The relation must be in the I,J sieving area that was used when sieving in
41   the $q'$-lattice
42 - The estimated norm, minus the log of the FB primes, is within the report
43   threshold. This is tricky, as there are many shortcuts to norm estimation,
44   and the contribution of FB primes depends on the choice of log scale and
45   on limits on prime powers that are sieved
46 - The cofactors of the two norms, without q' but with q on the special-q side,
47   are within the cofactor bounds mfb[ar]
48 - The cofactors on both sides can be factored by the cofactorization routines
49   that were used when sieving q'.
50 
51 Thus the function to check for duplicates needs the following information:
52 
53 - The relation with fully factored norms
54 - The special-q sieve range on the two sides
55 - For each q' that was sieved "earlier"
56   - The parameters used for generating the special-q' lattice (skewness)
57   - The I,J values
58   - The log scale
59   - The cofactor bounds mfb[ar]
60   - The cofactorization parameters
61 
62 */
63 
64 #include "cado.h" // IWYU pragma: keep
65 
66 #include <algorithm>
67 #include <cinttypes>                  // for PRId8, PRId64, PRIu64
68 #include <climits>                    // for ULONG_MAX
69 #include <cstdio>                     // for size_t
70 #include <array>                      // for array, array<>::value_type
71 #include <cstdint>                    // for uint64_t, uint8_t, int64_t, uin...
72 #include <iosfwd>                     // for ostringstream, ostream
73 #include <memory>                     // for allocator_traits<>::value_type
74 #include <string>                     // for basic_string
75 #include <vector>                     // for vector
76 #include <cstdarg>             // IWYU pragma: keep
77 #include <gmp.h>                      // for mpz_srcptr, gmp_vfprintf, mpz_g...
78 
79 #include "cado_poly.h"
80 #include "cxx_mpz.hpp"
81 #include "las-duplicate.hpp"
82 #include "ecm/facul.hpp"                  // for facul_strategies_t
83 #include "fb.hpp"                     // for fb_log
84 #include "gmp_aux.h"
85 #include "las-choose-sieve-area.hpp"  // for choose_sieve_area
86 #include "las-cofactor.hpp"           // for check_leftover_norm, factor_bot...
87 #include "las-coordinates.hpp"        // for ABToIJ
88 #include "las-norms.hpp"              // for lognorm_smart
89 #include "las-qlattice.hpp"           // for operator<<, qlattice_basis
90 #include "las-siever-config.hpp"      // for siever_config::side_config, sie...
91 #include "las-todo-entry.hpp"         // for las_todo_entry
92 #include "macros.h"                   // for ASSERT_ALWAYS
93 #include "mpz_poly.h"
94 #include "relation.hpp"               // for relation
95 #include "verbose.h"
96 
97 
98 /* default verbose level of # DUPECHECK lines */
99 #define VERBOSE_LEVEL 2
100 
101 struct sq_with_fac {
102   uint64_t q;
103   std::vector<uint64_t> facq;
104 };
105 
106 // Warning: the entry we create here does not know whether sq is prime
107 // or composite (it assumes prime). This is fragile!!!
108 //
109 // XXX errr. I don't get it. If r=a/b mod all divisors of q, then this
110 // ought to hold mod q as well, right ?
111 //
112 // if we want to deal with the case where b is not coprime to q, the
113 // problem we have is that of doing a CRT on the projective line. Which
114 // is doable (well, to start with, the projective line element we're
115 // looking for is (a:b) anyway). But
116 // down the line, we stumble on the fact that just a plain integer isn't
117 // appropriate to represent elements of P1(Z/qZ) when q is composite.
118 //
special_q_from_ab(const int64_t a,const uint64_t b,sq_with_fac const & sq,int side)119 static las_todo_entry special_q_from_ab(const int64_t a, const uint64_t b, sq_with_fac const & sq, int side)
120 {
121     cxx_mpz p, r;
122 
123     mpz_set_ui(p, sq.q);
124     mpz_set_uint64(r, b);
125     int ret = mpz_invert(r, r, p);
126     ASSERT_ALWAYS(ret);
127     mpz_mul_int64(r, r, a);
128     mpz_mod(r, r, p);
129 
130     return las_todo_entry(p, r, side);
131 }
132 
133 /* If e == 0, returns 1. Otherwise, if b > lim, returns b.
134    Otherwise returns the largest b^k with k <= e and b^k <= lim. */
135 static unsigned long
bounded_pow(const unsigned long b,const unsigned long e,const unsigned long lim)136 bounded_pow(const unsigned long b, const unsigned long e, const unsigned long lim)
137 {
138   if (e == 0) {
139     return 1;
140   }
141   if (b > lim) {
142     return b;
143   }
144   unsigned long r = b;
145   /* overflow is the largest ulong that can be multiplied by b without
146      overflowing */
147   unsigned long overflow = ULONG_MAX / b;
148   for (unsigned long i = 1; i < e && r <= overflow; i++) {
149     unsigned long t = r * b;
150     if (t > lim) {
151       break;
152     }
153     r = t;
154   }
155   return r;
156 }
157 
158 /*
159  * We subtract the sieve contribution of the factor base primes.
160  * I.e., for each p^k || F(a,b) with p <= fbb:
161  *   if k > 1 then we reduce k if necessary s.t. p^k <= powlim, but never to k < 1
162  *   set l := l - log_b(p^k)
163 */
164 static unsigned char
subtract_fb_log(const unsigned char lognorm,double scale,unsigned long lim,unsigned long powlim,relation const & rel,int side,las_todo_entry const & doing)165 subtract_fb_log(const unsigned char lognorm,
166         double scale,
167         unsigned long lim, unsigned long powlim,
168         relation const& rel,
169         int side,
170         las_todo_entry const & doing)
171 {
172   unsigned char new_lognorm = lognorm;
173 
174   for (unsigned int i = 0; i < rel.sides[side].size(); i++) {
175     const unsigned long p = mpz_get_ui(rel.sides[side][i].p);
176     int e = rel.sides[side][i].e;
177     ASSERT_ALWAYS(e > 0);
178     if (p >= lim)
179       continue;
180 
181     // log(sq) is already subtracted from the lognorm we've received on
182     // input.
183     // For prime factors of sq, we need to count a smaller valuation
184     // because powlim doesn't have to be that large.
185     // (note that the list is empty if sq is prime).
186     if (side == doing.side)
187         for (auto const & f : doing.prime_factors)
188             e -= (p == f);
189 
190     if (e == 0)
191       continue;
192 
193     const unsigned long p_pow = bounded_pow(p, e, powlim);
194     const unsigned char p_pow_log = fb_log(p_pow, scale, 0);
195 
196     if (p_pow_log > new_lognorm) {
197       new_lognorm = 0;
198     } else {
199       new_lognorm -= p_pow_log;
200     }
201   }
202   return new_lognorm;
203 }
204 
205 /* Return true if the relation is probably a duplicate of a relation found
206  * when sieving [doing].
207  * Return false if it is probably not a duplicate */
208 static bool
sq_finds_relation(las_info const & las,las_todo_entry const & doing,siever_config const & conf,qlattice_basis const & Q,uint32_t J,relation const & rel,bool must,bool talk)209 sq_finds_relation(las_info const & las,
210         las_todo_entry const & doing,
211         siever_config const & conf,
212         qlattice_basis const & Q,
213         uint32_t J,
214         relation const& rel,
215         bool must,
216         bool talk)
217 {
218   int logI = conf.logI;
219 
220   if (talk) {   // Print some info
221       verbose_output_vfprint(0, VERBOSE_LEVEL, gmp_vfprintf,
222               "# DUPECHECK Checking if relation"
223               " (a,b) = (%" PRId64 ",%" PRIu64 ")"
224               " is a dupe of sieving special-q -q0 %Zd -rho %Zd\n",
225               rel.a, rel.b,
226               (mpz_srcptr) doing.p,
227               (mpz_srcptr) doing.r);
228       // should stay consistent with "Sieving side" line printed in
229       // do_one_special_q()
230       std::ostringstream os;
231       os << Q;
232       verbose_output_print(0, VERBOSE_LEVEL,
233               "# DUPECHECK Trying %s; I=%u; J=%d;\n",
234               os.str().c_str(),
235               1u << conf.logI, J);
236   }
237 
238   /* Compute i,j-coordinates of this relation in the special-q lattice when
239      p was used as the special-q value. */
240   int i;
241   unsigned int j;
242   {
243     int ok;
244     ok = ABToIJ(i, j, rel.a, rel.b, Q);
245     if (!must && !ok) return false;
246     ASSERT_ALWAYS(ok);
247   }
248 
249   /* If the coordinate is outside the i,j-region used when sieving
250      the special-q described in [doing], then it's not a duplicate */
251   if (j >= J || (i < -(1L << (logI-1))) || (i >= (1L << (logI-1))))
252   {
253     if (talk) verbose_output_print(0, VERBOSE_LEVEL,
254         "# DUPECHECK (i,j) = (%d, %u) is outside sieve region\n", i, j);
255     return false;
256   }
257 
258   uint8_t remaining_lognorm[2];
259   bool is_dupe = true;
260   for (int side = 0; side < 2; side++) {
261     lognorm_smart L(conf, las.cpoly, side, Q, conf.logI, J);
262 
263     /* This lognorm is the evaluation of fij(i,j)/q on the side with the
264      * special-q, so we don't have to subtract it.
265      */
266     const uint8_t lognorm = L.lognorm(i,j);
267 
268     remaining_lognorm[side] = subtract_fb_log(lognorm, L.scale,
269             conf.sides[side].lim,
270             conf.sides[side].powlim,
271             rel,
272             side,
273             doing);
274 
275     if (remaining_lognorm[side] > L.bound) {
276       if (talk) verbose_output_print(0, VERBOSE_LEVEL, "# DUPECHECK On side %d, remaining lognorm = %" PRId8 " > bound = %" PRId8 "\n",
277           side, remaining_lognorm[side], L.bound);
278       is_dupe = false;
279     }
280   }
281   if (talk) verbose_output_print(0, VERBOSE_LEVEL,
282       "# DUPECHECK relation had i=%d, j=%u, remaining lognorms %" PRId8 ", %" PRId8 "\n",
283       i, j, remaining_lognorm[0], remaining_lognorm[1]);
284   if (!is_dupe) {
285     return false;
286   }
287 
288   /* Compute the exact cofactor on each side. This is similar to what we
289    * do in subtract_fb_log -- except that now we do the part *above* the
290    * factor base. */
291   std::array<cxx_mpz, 2> cof;
292   for (int side = 0; side < 2; side++) {
293     mpz_set_ui(cof[side], 1);
294     unsigned long lim = conf.sides[side].lim;
295 
296     for (unsigned int i = 0; i < rel.sides[side].size(); i++) {
297       const unsigned long p = mpz_get_ui(rel.sides[side][i].p);
298       int e = rel.sides[side][i].e;
299       ASSERT_ALWAYS(e > 0);
300       if (p <= lim) {
301         continue;
302       }
303 
304       // Remove one occurrence of each factor of sq on side sq_side.
305       if (side == doing.side)
306         for (auto const & f : doing.prime_factors)
307             e -= (p == f);
308 
309       for (int i = 0; i < e; ++i) {
310         mpz_mul_ui(cof[side], cof[side], p);
311       }
312     }
313   }
314 
315   /* Check that the cofactors are within the mfb bound */
316   for (int side = 0; side < 2; ++side) {
317     if (!check_leftover_norm (cof[side], conf.sides[side])) {
318       verbose_output_vfprint(0, VERBOSE_LEVEL, gmp_vfprintf,
319           "# DUPECHECK cofactor %Zd is outside bounds\n",
320           (__mpz_struct*) cof[side]);
321       return false;
322     }
323   }
324 
325   std::array<std::vector<cxx_mpz>, 2> f;
326   facul_strategies_t const * strategies = las.get_strategies(conf);
327   int pass = factor_both_leftover_norms(cof, f, {{conf.sides[0].lim, conf.sides[1].lim}}, strategies);
328 
329   if (pass <= 0) {
330     if (talk) verbose_output_vfprint(0, VERBOSE_LEVEL, gmp_vfprintf,
331         "# DUPECHECK norms not both smooth, left over factors: %Zd, %Zd\n",
332         (mpz_srcptr) cof[0], (mpz_srcptr) cof[1]);
333     return false;
334   }
335 
336   return true;
337 }
338 
339 bool
sq_finds_relation(las_info const & las,las_todo_entry const & doing,siever_config const & conf,qlattice_basis const & Q,uint32_t J,relation const & rel)340 sq_finds_relation(las_info const & las,
341         las_todo_entry const & doing,
342         siever_config const & conf,
343         qlattice_basis const & Q,
344         uint32_t J,
345         relation const& rel)
346 {
347     return sq_finds_relation(las, doing, conf, Q, J, rel, false, false);
348 }
349 
350 static bool
sq_finds_relation(las_info const & las,las_todo_entry const & doing,relation const & rel,bool must,bool talk)351 sq_finds_relation(las_info const & las,
352         las_todo_entry const & doing,
353         relation const& rel,
354         bool must,
355         bool talk)
356 {
357   siever_config conf;
358   qlattice_basis Q;
359   uint32_t J;
360   if (!choose_sieve_area(las, doing, conf, Q, J)) {
361     if (talk) verbose_output_print(0, VERBOSE_LEVEL, "# DUPECHECK q-lattice discarded\n");
362     return false;
363   }
364   return sq_finds_relation(las, doing, conf, Q, J, rel, must, talk);
365 }
366 
367 
368 /* This function decides whether the given (sq,side) was previously
369  * sieved (compared to the current special-q stored in [doing]).
370  * This takes qmin and qmax into account, on the side of the sq. The side
371  * doing.side is irrelevant here.
372  */
373 static int
sq_was_previously_sieved(las_info const & las,const uint64_t sq,int side,las_todo_entry const & doing)374 sq_was_previously_sieved (las_info const & las, const uint64_t sq, int side, las_todo_entry const & doing)
375 {
376     /* we use <= and not < since this function is also called with the
377      * current special-q */
378     if (mpz_cmp_uint64(doing.p, sq) <= 0)
379         return 0;
380 
381     return sq >= las.dupqmin[side] && sq < las.dupqmax[side];
382 }
383 
384 // Warning: this function works with side effects:
385 //   - it is recursive
386 //   - it destroys its argument along the way
387 //   - it allocates the result it returns; the caller must free it.
388 // Keep only products that fit in 64 bits.
389 static std::vector<sq_with_fac>
all_multiples(std::vector<uint64_t> & prime_list)390 all_multiples(std::vector<uint64_t> & prime_list) {
391   if (prime_list.empty()) {
392     std::vector<sq_with_fac> res;
393     return res;
394   }
395 
396   uint64_t p = prime_list.back();
397   prime_list.pop_back();
398 
399   if (prime_list.empty()) {
400     std::vector<sq_with_fac> res;
401     res.push_back(sq_with_fac { 1, std::vector<uint64_t> () });
402     res.push_back(sq_with_fac { p, std::vector<uint64_t> (1, p) });
403     return res;
404   }
405 
406   std::vector<sq_with_fac> res = all_multiples(prime_list);
407 
408   size_t L = res.size();
409   cxx_mpz pp;
410   mpz_set_uint64(pp, p);
411   for (size_t i = 0; i < L; ++i) {
412     std::vector<uint64_t> facq;
413     facq = res[i].facq;
414     facq.push_back(p);
415 
416     cxx_mpz prod;
417     mpz_mul_uint64(prod, pp, res[i].q);
418     if (mpz_fits_uint64_p(prod)) {
419       uint64_t q = mpz_get_uint64(prod);
420       struct sq_with_fac entry = {q, facq};
421       res.push_back(entry);
422     }
423   }
424   return res;
425 }
426 
427 /* Return 1 if the relation, obtained from sieving [doing], is probably a
428  * duplicate of a relation found "earlier", and 0 if it is probably not a
429  * duplicate
430  *
431  * We may imagine having different cofactoring strategies depending on q,
432  * in which case the code below is incorrect. However, for normal
433  * (non-dlp-descent) sieving, there does not seem to be a compelling
434  * argument for that.
435  */
436 int
relation_is_duplicate(relation const & rel,las_todo_entry const & doing,las_info const & las)437 relation_is_duplicate(relation const& rel,
438         las_todo_entry const & doing,
439         las_info const& las)
440 {
441     /* If the special-q does not fit in an unsigned long, we assume it's not a
442        duplicate and just move on */
443     if (doing.is_prime() && !mpz_fits_uint64_p(doing.p)) {
444         return false;
445     }
446 
447     /* If any large prime doesn't fit in a 64-bit integer, then we assume
448      * that we're doing the dlp desecent, in which case we couldn't care
449      * less about duplicates check anyway.
450      */
451     for(int side = 0 ; side < 2 ; side++) {
452         for(unsigned int i = 0 ; i < rel.sides[side].size() ; i++) {
453             if (!mpz_fits_uint64_p(rel.sides[side][i].p))
454                 return false;
455         }
456     }
457 
458     for(int side = 0 ; side < 2 ; side++) {
459         /* It is allowed to have special-q on both sides, and we wish to
460          * check "cross" special-q combinations.
461          */
462 
463         // Step 1: prepare a list of potential special-q on this side.
464         // They involve only prime factors within the allowed bounds
465         std::vector<uint64_t> prime_list;
466 
467         for(unsigned int i = 0 ; i < rel.sides[side].size() ; i++) {
468             uint64_t p = mpz_get_uint64(rel.sides[side][i].p);
469 
470             // can this p be part of valid sq ?
471             if (! las.allow_composite_q) {
472                 /* this check is also done in sq_was_previously_sieved,
473                  * but it's easy enough to skip the divisibility test
474                  * when we know there's no point.
475                  */
476                 if ((p < las.dupqmin[side]) || (p >= las.dupqmax[side])) {
477                     continue;
478                 }
479             } else {
480                 if (!las.is_in_qfac_range(p))
481                     continue;
482             }
483 
484             /* projective primes are currently not allowed for composite
485              * special-q */
486             if (mpz_divisible_ui_p(mpz_poly_lc(las.cpoly->pols[side]), p))
487                 continue;
488 
489             // push it in the list of potential factors of sq
490             prime_list.push_back(p);
491         }
492 
493         for (auto const & sq : all_multiples(prime_list)) {
494             // keep sq only if it was sieved before [doing]
495             if (!sq_was_previously_sieved(las, sq.q, side, doing))
496                 continue;
497 
498             // emulate sieving for the valid sq, and check if it finds our
499             // relation.
500             las_todo_entry other = special_q_from_ab(rel.a, rel.b, sq, side);
501 
502             bool is_dupe = sq_finds_relation(las, other, rel, true, true);
503             verbose_output_print(0, VERBOSE_LEVEL,
504                     "# DUPECHECK relation is probably%s a dupe\n",
505                     is_dupe ? "" : " not");
506             if (is_dupe) return true;
507         }
508     }
509 
510     return false;
511 }
512