1 #include "cado.h" // IWYU pragma: keep
2 
3 /* This compilation units reacts to TRACK_CODE_PATH and uses macros
4  * such as WHERE_AM_I_UPDATE.
5  * This compilation unit _must_ produce different object files depending
6  * on the value of TRACK_CODE_PATH.
7  * The WHERE_AM_I_UPDATE macro itself is defined in las-where-am-i.hpp
8  */
9 
10 #include <algorithm>        // for max
11 #include <climits>          // for UINT_MAX
12 #include <cstdint>          // for uint32_t
13 #include <cstdlib>          // for abs, abort, size_t, NULL
14 #include <cstring>          // for memcpy, memset
15 #include <vector>           // for vector
16 #include "fb-types.h"       // for sublat_t
17 #include "gcd.h"       // for bin_gcd_int64_safe
18 #include "gpf.h"        // gpf_init
19 #include "las-unsieve.hpp"  // for unsieve_data, unsieve_data::pattern_t
20 #include "macros.h"         // for ASSERT_ALWAYS, no_break, MAYBE_UNUSED
21 #include "ularith.h"        // for ularith_invmod
22 #ifdef TRACE_K
23 #include "las-where-am-i.hpp"             // for where_am_I, WHERE_AM_I_UPDATE
24 #include "las-output.hpp"   // IWYU pragma: keep
25 #include "verbose.h"    // verbose_output_print
26 #endif
27 
28 static const int verify_gcd = 0; /* Enable slow but thorough test */
29 
30 /* Set every stride-th byte, starting at index 0, to 255 in an array of
31    stride unsieve_data::pattern_t's, and set all other bytes to 0. */
32 static void
minisieve(unsieve_data::pattern_t * const array,const size_t stride)33 minisieve(unsieve_data::pattern_t * const array, const size_t stride)
34 {
35     memset (array, 0, stride * sizeof(unsieve_data::pattern_t));
36     for (size_t i = 0; i < stride * sizeof(unsieve_data::pattern_t); i += stride)
37       ((unsigned char *) array)[i] = 255;
38 }
39 
unsieve_data()40 unsieve_data::unsieve_data()
41 {
42   entries = NULL;
43   Jmax = 0;
44 }
45 
unsieve_data(int logI,int logA)46 unsieve_data::unsieve_data(int logI, int logA)
47 {
48   ASSERT_ALWAYS(logI >= 0);
49   ASSERT_ALWAYS(logA >= logI);
50   Jmax = 1UL << (logA - logI);
51   /* Store largest prime factor of k in us.lpf[k], 0 for k=0, 1 for k=1 */
52   entries = new entry[Jmax];
53   entries[0] = entry(0,0);
54   entries[1] = entry(1,1);
55   gpf_init(Jmax - 1);
56   for (unsigned int k = 2U; k < Jmax; k++)
57     {
58       unsigned int p, c;
59       p = gpf_get(k);
60       c = k; do {c /= p;} while (c % p == 0);
61       entries[k] = entry(p, c);
62     }
63 
64     minisieve(pattern3, 3);
65     minisieve(pattern5, 5);
66     minisieve(pattern7, 7);
67 }
68 
unsieve_data(unsieve_data const & o)69 unsieve_data::unsieve_data(unsieve_data const & o) : Jmax(o.Jmax)
70 {
71     entries = NULL;
72     if (Jmax == 0) return;
73     entries = new entry[Jmax];
74     memcpy(entries, o.entries, Jmax * sizeof(entry));
75     memcpy(pattern3, o.pattern3, sizeof(pattern3));
76     memcpy(pattern5, o.pattern5, sizeof(pattern5));
77     memcpy(pattern7, o.pattern7, sizeof(pattern7));
78 }
79 
operator =(unsieve_data const & o)80 unsieve_data & unsieve_data::operator=(unsieve_data const & o)
81 {
82     if (Jmax) delete[] entries;
83     Jmax = o.Jmax;
84     entries = new entry[Jmax];
85     memcpy(entries, o.entries, Jmax * sizeof(entry));
86     memcpy(pattern3, o.pattern3, sizeof(pattern3));
87     memcpy(pattern5, o.pattern5, sizeof(pattern5));
88     memcpy(pattern7, o.pattern7, sizeof(pattern7));
89     return *this;
90 }
91 
~unsieve_data()92 unsieve_data::~unsieve_data()
93 {
94     if (Jmax == 0) return;
95     delete[] entries;
96 }
97 
98 static inline void
unsieve_one_prime(unsigned char * line_start,const unsigned int p,const unsigned int j,const unsigned int start_idx,const unsigned int I)99 unsieve_one_prime (unsigned char *line_start, const unsigned int p,
100                    const unsigned int j, const unsigned int start_idx,
101                    const unsigned int I)
102 {
103   unsigned int x, np = p; /* if 2|j, np=2p, else np=p */
104 
105   x = start_idx;
106   if (j % 2U == 0U)
107     {
108       np += p;
109       if (x % 2U == 0U)
110         x += p;
111     }
112   for ( ; x < I; x += np)
113     line_start[x] = 255;
114 }
115 
116 
117 static inline void
unsieve_3(unsigned char * line_start,const unsigned int start_idx,const unsigned int I,unsieve_data const & us)118 unsieve_3(unsigned char *line_start, const unsigned int start_idx,
119           const unsigned int I, unsieve_data const & us)
120 {
121   const unsigned int I_upt  = I / sizeof (unsieve_data::pattern_t);
122   unsigned int i, pattern_idx;
123   unsieve_data::pattern_t p0, p1, p2;
124   unsieve_data::pattern_t * ul_line_start = (unsieve_data::pattern_t *) line_start;
125 
126   if (sizeof(unsieve_data::pattern_t) == 4) {
127     /* -4^(-1) == 2 (mod 3) */
128     pattern_idx = (2 * start_idx) % 3;
129   } else if (sizeof(unsieve_data::pattern_t) == 8) {
130     /* -8^(-1) == 1 (mod 3) */
131     pattern_idx = start_idx;
132   } else if (sizeof(unsieve_data::pattern_t) == 16) {
133     /* -16^(-1) == 2 (mod 3) */
134     pattern_idx = (2 * start_idx) % 3;
135   } else
136     abort();
137 
138   p0 = us.pattern3[pattern_idx];
139   p1 = us.pattern3[(pattern_idx + 1) % 3];
140   p2 = us.pattern3[(pattern_idx + 2) % 3];
141 
142   ASSERT_ALWAYS(((unsigned char *)&p0)[start_idx] == 255);
143 
144   /* Apply pattern to array */
145   for (i = 0U; i < I_upt - 2U; i += 3U)
146     {
147       UNSIEVE_OR(ul_line_start[i], p0);
148       UNSIEVE_OR(ul_line_start[i + 1], p1);
149       UNSIEVE_OR(ul_line_start[i + 2], p2);
150     }
151   if (i < I_upt)
152     UNSIEVE_OR(ul_line_start[i], p0);
153   if (i + 1 < I_upt)
154     UNSIEVE_OR(ul_line_start[i + 1], p1);
155 }
156 
157 
158 static inline void
unsieve_5(unsigned char * line_start,const unsigned int start_idx,const unsigned int I,unsieve_data const & us)159 unsieve_5(unsigned char *line_start, const unsigned int start_idx,
160           const unsigned int I, unsieve_data const & us)
161 {
162   const unsigned int I_upt  = I / sizeof (unsieve_data::pattern_t);
163   unsigned int i;
164   unsieve_data::pattern_t p0, p1, p2, p3, p4;
165   unsieve_data::pattern_t * ul_line_start = (unsieve_data::pattern_t *) line_start;
166   size_t pattern_idx;
167 
168   if (sizeof(unsieve_data::pattern_t) == 4) {
169     /* -4^(-1) == 1 (mod 5) */
170     pattern_idx = start_idx;
171   } else if (sizeof(unsieve_data::pattern_t) == 8) {
172     /* -8^(-1) == 3 (mod 5) */
173     pattern_idx = (3 * start_idx) % 5;
174   } else if (sizeof(unsieve_data::pattern_t) == 16) {
175     /* -16^(-1) == -1 (mod 5) */
176     pattern_idx = (5 - start_idx) % 5;
177   } else
178     abort();
179 
180   p0 = us.pattern5[pattern_idx];
181   p1 = us.pattern5[(pattern_idx + 1) % 5];
182   p2 = us.pattern5[(pattern_idx + 2) % 5];
183   p3 = us.pattern5[(pattern_idx + 3) % 5];
184   p4 = us.pattern5[(pattern_idx + 4) % 5];
185 
186   if (start_idx < sizeof(p0)) {
187       ASSERT_ALWAYS(((unsigned char *)&p0)[start_idx] == 255);
188   } else {
189       ASSERT_ALWAYS(start_idx < 2 * sizeof(p0));
190       ASSERT_ALWAYS(((unsigned char *)&p1)[start_idx - sizeof(p0)] == 255);
191   }
192 
193   /* Apply pattern to array */
194   for (i = 0U; i < I_upt - 4U; i += 5U)
195     {
196       UNSIEVE_OR(ul_line_start[i], p0);
197       UNSIEVE_OR(ul_line_start[i + 1], p1);
198       UNSIEVE_OR(ul_line_start[i + 2], p2);
199       UNSIEVE_OR(ul_line_start[i + 3], p3);
200       UNSIEVE_OR(ul_line_start[i + 4], p4);
201     }
202   if (i < I_upt)
203     UNSIEVE_OR(ul_line_start[i], p0);
204   if (i + 1 < I_upt)
205     UNSIEVE_OR(ul_line_start[i + 1], p1);
206   if (i + 2 < I_upt)
207     UNSIEVE_OR(ul_line_start[i + 2], p2);
208   if (i + 3 < I_upt)
209     UNSIEVE_OR(ul_line_start[i + 3], p3);
210 }
211 
212 static inline void
unsieve_7(unsigned char * line_start,const unsigned int start_idx,const unsigned int I,unsieve_data const & us)213 unsieve_7(unsigned char *line_start, const unsigned int start_idx,
214           const unsigned int I, unsieve_data const & us)
215 {
216   const unsigned int I_upt  = I / sizeof (unsieve_data::pattern_t);
217   unsigned int i;
218   unsieve_data::pattern_t p0, p1, p2, p3, p4, p5, p6;
219   unsieve_data::pattern_t * ul_line_start = (unsieve_data::pattern_t *) line_start;
220   size_t pattern_idx;
221 
222   if (sizeof(unsieve_data::pattern_t) == 4) {
223     /* -4^(-1) == 5 (mod 7) */
224     pattern_idx = (5 * start_idx) % 7;
225   } else if (sizeof(unsieve_data::pattern_t) == 8) {
226     /* -8^(-1) == -1 (mod 7) */
227     pattern_idx = (7 - start_idx) % 7;
228   } else if (sizeof(unsieve_data::pattern_t) == 16) {
229     /* -16^(-1) == 3 (mod 7) */
230     pattern_idx = (3 * start_idx) % 7;
231   } else
232     abort();
233 
234   p0 = us.pattern7[pattern_idx];
235   p1 = us.pattern7[(pattern_idx + 1) % 7];
236   p2 = us.pattern7[(pattern_idx + 2) % 7];
237   p3 = us.pattern7[(pattern_idx + 3) % 7];
238   p4 = us.pattern7[(pattern_idx + 4) % 7];
239   p5 = us.pattern7[(pattern_idx + 5) % 7];
240   p6 = us.pattern7[(pattern_idx + 6) % 7];
241 
242   if (start_idx < sizeof(p0)) {
243       ASSERT_ALWAYS(((unsigned char *)&p0)[start_idx] == 255);
244   } else {
245       ASSERT_ALWAYS(start_idx < 2 * sizeof(p0));
246       ASSERT_ALWAYS(((unsigned char *)&p1)[start_idx - sizeof(p0)] == 255);
247   }
248 
249   /* Apply pattern to array */
250   for (i = 0U; i < I_upt - 6U; i += 7U)
251     {
252       UNSIEVE_OR(ul_line_start[i], p0);
253       UNSIEVE_OR(ul_line_start[i + 1], p1);
254       UNSIEVE_OR(ul_line_start[i + 2], p2);
255       UNSIEVE_OR(ul_line_start[i + 3], p3);
256       UNSIEVE_OR(ul_line_start[i + 4], p4);
257       UNSIEVE_OR(ul_line_start[i + 5], p5);
258       UNSIEVE_OR(ul_line_start[i + 6], p6);
259     }
260   if (i < I_upt)
261     UNSIEVE_OR(ul_line_start[i], p0);
262   if (i + 1 < I_upt)
263     UNSIEVE_OR(ul_line_start[i + 1], p1);
264   if (i + 2 < I_upt)
265     UNSIEVE_OR(ul_line_start[i + 2], p2);
266   if (i + 3 < I_upt)
267     UNSIEVE_OR(ul_line_start[i + 3], p3);
268   if (i + 4 < I_upt)
269     UNSIEVE_OR(ul_line_start[i + 4], p4);
270   if (i + 5 < I_upt)
271     UNSIEVE_OR(ul_line_start[i + 5], p5);
272 }
273 
274 
275 static void
unsieve_not_coprime_line(unsigned char * line_start,unsigned int j,int i0,int i1,unsigned int min_p,unsieve_data const & us)276 unsieve_not_coprime_line(unsigned char * line_start,
277                          unsigned int j,
278                          int i0, int i1,
279                          unsigned int min_p,
280                          unsieve_data const & us)
281 {
282   unsigned int p, c=j;
283   int start_idx;
284 
285   if (j == 0)
286     return;
287 
288   while (c % 2U == 0U)
289     c >>= 1;
290 
291   while (1)
292     {
293       p = us.entries[c].lpf; /* set p to largest prime factor of c */
294       if (p < min_p)
295         return;
296       start_idx = (-i0) % p; if (start_idx < 0) start_idx += p;
297       c = us.entries[c].cof;
298       if (p <= 7)
299         break;
300       unsieve_one_prime (line_start, p, j, start_idx, i1 - i0);
301     }
302 
303   if (p == 7U)
304     {
305       unsieve_7(line_start, start_idx, i1 - i0, us);
306       p = us.entries[c].lpf;
307       start_idx = (-i0) % p; if (start_idx < 0) start_idx += p;
308       c = us.entries[c].cof;
309     }
310 
311   if (p < min_p)
312     return;
313 
314   if (p == 5U)
315     {
316       unsieve_5(line_start, start_idx, i1 - i0, us);
317       p = us.entries[c].lpf;
318       start_idx = (-i0) % p; if (start_idx < 0) start_idx += p;
319       c = us.entries[c].cof;
320     }
321 
322   if (p < min_p)
323     return;
324 
325   if (p == 3U)
326       unsieve_3(line_start, start_idx, i1 - i0, us);
327 
328   ASSERT_ALWAYS(c <= 1);
329 }
330 
j_divisibility_helper(uint32_t J)331 j_divisibility_helper::j_divisibility_helper(uint32_t J)
332 {
333     /* Store largest prime factor of k in j_div[k].p, for 1 < k < J,
334        and store 0 for k=0, 1 for k=1 */
335     ASSERT_ALWAYS(J >= 2);
336     ASSERT_ALWAYS(!(J & (J-1)));        /* J must be a power of two */
337     entries.reserve(J);
338     entries.push_back({ 0U, 0U, 0U, 0U });
339     entries.push_back({ 1U, 1U, 1U, UINT_MAX });
340     for (unsigned int k = 2; k < J; k++) {
341         /* Find largest prime factor of k */
342         unsigned int p, c = k;
343         for (p = 2U; p * p <= c; p += 1U + p % 2U)
344         {
345             while (c % p == 0U)
346                 c /= p;
347             if (c == 1U)
348                 break;
349         }
350         p = (c == 1U) ? p : c;
351         c = k; do {c /= p;} while (c % p == 0);
352         unsigned int inv = p == 2 ? 0U : (unsigned int)ularith_invmod(p);
353         unsigned int bound = UINT_MAX / p;
354         entries.push_back({ p, c, inv, bound});
355     }
356 }
357 
358 static inline int
sieve_info_test_lognorm(const unsigned char C1,const unsigned char C2,const unsigned char S1,const unsigned char S2)359 sieve_info_test_lognorm (const unsigned char C1, const unsigned char C2,
360                          const unsigned char S1, const unsigned char S2)
361 {
362   return S1 <= C1 && S2 <= C2;
363 }
364 
365 /* In SS[2][x_start] ... SS[2][x_start * 2^logI - 1], look for survivors.
366    We test divisibility of the resulting i value by the trial-divided primes.
367    Return the number of survivors found. This function works for all j */
368 MAYBE_UNUSED static void
search_survivors_in_line1(unsigned char * const SS[2],const unsigned char bound[2],unsigned int j,int i0,int i1,int N MAYBE_UNUSED,j_divisibility_helper const & j_div,unsigned int td_max,std::vector<uint32_t> & survivors)369 search_survivors_in_line1(unsigned char * const SS[2],
370         const unsigned char bound[2],
371         unsigned int j,
372         int i0, int i1,
373         int N MAYBE_UNUSED, j_divisibility_helper const & j_div,
374         unsigned int td_max, std::vector<uint32_t> &survivors)
375 {
376     unsigned int div[6][2], nr_div;
377 
378     nr_div = extract_j_div(div, j, j_div, 3, td_max);
379     ASSERT_ALWAYS(nr_div <= 6);
380 
381     for (int x = 0; x < (i1 - i0); x++) {
382         if (!sieve_info_test_lognorm(bound[0], bound[1], SS[0][x], SS[1][x]))
383         {
384             SS[0][x] = 255;
385             continue;
386         }
387 
388         /* The very small prime used in the bound pattern, and unsieving larger
389            primes have not identified this as gcd(i,j) > 1. It remains to check
390            the trial-divided primes. */
391         const unsigned int i = abs (i0 + x);
392         int divides = 0;
393         switch (nr_div) {
394             // coverity[unterminated_case]
395             case 6: divides |= (i * div[5][0] <= div[5][1]);no_break();
396             // coverity[unterminated_case]
397             case 5: divides |= (i * div[4][0] <= div[4][1]);no_break();
398             // coverity[unterminated_case]
399             case 4: divides |= (i * div[3][0] <= div[3][1]);no_break();
400             // coverity[unterminated_case]
401             case 3: divides |= (i * div[2][0] <= div[2][1]);no_break();
402             // coverity[unterminated_case]
403             case 2: divides |= (i * div[1][0] <= div[1][1]);no_break();
404             // coverity[unterminated_case]
405             case 1: divides |= (i * div[0][0] <= div[0][1]);no_break();
406             case 0: break;
407         }
408 
409         if (divides) {
410             if (verify_gcd)
411                 ASSERT_ALWAYS(bin_gcd_int64_safe (i, j) != 1);
412   #ifdef TRACE_K
413             if (trace_on_spot_Nx(N, x)) {
414                 verbose_output_print(TRACE_CHANNEL, 0, "# Slot [%u] in bucket %u has non coprime (i,j)=(%d,%u)\n",
415                         x, N, i, j);
416             }
417   #endif
418             SS[0][x] = 255;
419         } else {
420             survivors.push_back(x);
421             if (verify_gcd)
422                 ASSERT_ALWAYS(bin_gcd_int64_safe (i, j) == 1);
423   #ifdef TRACE_K
424             if (trace_on_spot_Nx(N, x)) {
425                 verbose_output_print(TRACE_CHANNEL, 0, "# Slot [%u] in bucket %u is survivor with coprime (i,j)\n",
426                         x, N);
427             }
428   #endif
429         }
430       }
431 }
432 
433 
434 /* same, but with one side only.
435  *
436  *
437  * XXX I think the overall structure would need a cleanup anyway, but at
438  * least here we can refactor somewhat.
439  */
440 
441 MAYBE_UNUSED static void
search_survivors_in_line1_oneside(unsigned char * Sf,const unsigned char bound,unsigned int j,int i0,int i1,int N MAYBE_UNUSED,j_divisibility_helper const & j_div,unsigned int td_max,std::vector<uint32_t> & survivors)442 search_survivors_in_line1_oneside(unsigned char * Sf,
443         const unsigned char bound,
444         unsigned int j,
445         int i0, int i1,
446         int N MAYBE_UNUSED, j_divisibility_helper const & j_div,
447         unsigned int td_max, std::vector<uint32_t> &survivors)
448 {
449     unsigned int div[6][2], nr_div;
450 
451     nr_div = extract_j_div(div, j, j_div, 3, td_max);
452     ASSERT_ALWAYS(nr_div <= 6);
453 
454     for (int x = 0; x < (i1 - i0); x++) {
455         if (Sf[x] > bound) {
456             Sf[x] = 255;
457             continue;
458         }
459 
460         /* The very small prime used in the bound pattern, and unsieving larger
461            primes have not identified this as gcd(i,j) > 1. It remains to check
462            the trial-divided primes. */
463         const unsigned int i = abs (i0 + x);
464         int divides = 0;
465         switch (nr_div) {
466             case 6: divides |= (i * div[5][0] <= div[5][1]);no_break();
467             case 5: divides |= (i * div[4][0] <= div[4][1]);no_break();
468             case 4: divides |= (i * div[3][0] <= div[3][1]);no_break();
469             case 3: divides |= (i * div[2][0] <= div[2][1]);no_break();
470             case 2: divides |= (i * div[1][0] <= div[1][1]);no_break();
471             case 1: divides |= (i * div[0][0] <= div[0][1]);no_break();
472             case 0: break;
473         }
474 
475         if (divides) {
476             if (verify_gcd)
477                 ASSERT_ALWAYS(bin_gcd_int64_safe (i, j) != 1);
478   #ifdef TRACE_K
479             if (trace_on_spot_Nx(N, x)) {
480                 verbose_output_print(TRACE_CHANNEL, 0, "# Slot [%u] in bucket %u has non coprime (i,j)=(%d,%u)\n",
481                         x, N, i, j);
482             }
483   #endif
484             Sf[x] = 255;
485         } else {
486             survivors.push_back(x);
487             if (verify_gcd)
488                 ASSERT_ALWAYS(bin_gcd_int64_safe (i, j) == 1);
489   #ifdef TRACE_K
490             if (trace_on_spot_Nx(N, x)) {
491                 verbose_output_print(TRACE_CHANNEL, 0, "# Slot [%u] in bucket %u is survivor with coprime (i,j)\n",
492                         x, N);
493             }
494   #endif
495         }
496       }
497 }
498 
499 
500 /* linefragment is used when logI > LOG_BUCKET_REGION ; it is a positive
501  * integer multiple of 2^LOG_BUCKET_REGION ; so the sub-line is actually
502  * for:
503  * -I/2 + linefragment <= i < -I/2 + MIN(2^LOG_BUCKET_REGION, I)
504  */
505 void
search_survivors_in_line(unsigned char * const SS[2],const unsigned char bound[2],unsigned int j,int i0,int i1,int N,j_divisibility_helper const & j_div,unsigned int td_max,unsieve_data const & us,std::vector<uint32_t> & survivors,sublat_t sublat)506 search_survivors_in_line(unsigned char * const SS[2],
507         const unsigned char bound[2],
508         unsigned int j,
509         int i0, int i1,
510         int N, j_divisibility_helper const & j_div,
511         unsigned int td_max, unsieve_data const & us,
512         std::vector<uint32_t> &survivors, sublat_t sublat)
513 {
514     ASSERT_ALWAYS(SS[0] || SS[1]);
515     unsigned char * Sf = SS[0];
516 
517     if (SS[0] && SS[1]) {
518 
519         /* In line j = 0, only the coordinate (i, j) = (-1, 0) may survive */
520         // FIXME: in sublat mode, this is broken!
521         if (j == 0 && (!sublat.m)) {
522             if (i0 <= 0 && i1 > 0) {
523                 unsigned char s0 = SS[0][1-i0];
524                 unsigned char s1 = SS[1][1-i0];
525                 memset(SS[0], 255, i1 - i0);
526                 if (s0 <= bound[0] && s1 <= bound[1]) {
527                     SS[0][1 - i0] = s0;
528                     SS[1][1 - i0] = s1;
529                     survivors.push_back(1 - i0);
530                 }
531             } else {
532                 memset(SS[0], 255, i1 - i0);
533             }
534             return;
535         }
536 
537         // Naive version when we have sublattices, because unsieving is
538         // harder. TODO: implement a fast version
539         if (sublat.m) {
540             for (int x = 0; x < (i1 - i0); x++) {
541                 if (!sieve_info_test_lognorm(bound[0], bound[1], SS[0][x], SS[1][x])) {
542                     SS[0][x] = 255;
543                     continue;
544                 }
545                 const unsigned int i = abs(int(sublat.m)*(i0 + x)+int(sublat.i0));
546                 const unsigned int jj = sublat.m*j+sublat.j0;
547                 if ((((jj % 2) == 0) && ((i % 2) == 0)) ||
548                         (bin_gcd_int64_safe (i, jj) != 1)) {
549                     SS[0][x] = 255;
550                 } else {
551                     survivors.push_back(x);
552                 }
553             }
554             return;
555         }
556 
557         unsieve_not_coprime_line(Sf, j, i0, i1, td_max + 1, us);
558 
559 #if defined(HAVE_SSE2)
560         search_survivors_in_line_sse2(SS, bound, j, i0, i1, N, j_div, td_max,
561                 survivors);
562 #else
563         search_survivors_in_line1(SS, bound, j, i0, i1, N, j_div, td_max,
564                 survivors);
565 #endif
566     } else {
567         unsigned char b = bound[0];
568         if (!Sf) {
569             Sf = SS[1];
570             b = bound[1];
571         }
572         /* ok, here only Sf is non-null. We want the values below b. */
573 
574         /* In line j = 0, only the coordinate (i, j) = (-1, 0) may survive */
575         // FIXME: in sublat mode, this is broken!
576         if (j == 0 && (!sublat.m)) {
577             if (i0 <= 0 && i1 > 0) {
578                 unsigned char s = Sf[1-i0];
579                 memset(Sf, 255, i1 - i0);
580                 if (s <= b) {
581                     Sf[1 - i0] = s;
582                     survivors.push_back(1 - i0);
583                 }
584             } else {
585                 memset(Sf, 255, i1 - i0);
586             }
587             return;
588         }
589 
590         // Naive version when we have sublattices, because unsieving is
591         // harder. TODO: implement a fast version
592         if (sublat.m) {
593             for (int x = 0; x < (i1 - i0); x++) {
594                 if (Sf[x] > b) {
595                     Sf[x] = 255;
596                     continue;
597                 }
598                 const unsigned int i = abs(int(sublat.m)*(i0 + x))+int(sublat.i0);
599                 const unsigned int jj = sublat.m*j+sublat.j0;
600                 if ((((jj % 2) == 0) && ((i % 2) == 0)) ||
601                         (bin_gcd_int64_safe (i, jj) != 1)) {
602                     Sf[x] = 255;
603                 } else {
604                     survivors.push_back(x);
605                 }
606             }
607             return;
608         }
609 
610         unsieve_not_coprime_line(Sf, j, i0, i1, td_max + 1, us);
611 
612 #if defined(HAVE_SSE2)
613         search_survivors_in_line_sse2_oneside(Sf, b, j, i0, i1, N, j_div, td_max,
614                 survivors);
615 #else
616         search_survivors_in_line1_oneside(Sf, b, j, i0, i1, N, j_div, td_max,
617                 survivors);
618 #endif
619     }
620 }
621