1 #ifndef LAS_SIEVE2357_HPP 2 #define LAS_SIEVE2357_HPP 3 4 #include "cado_config.h" // for HAVE_AVX2, HAVE_SSSE3 5 6 #include <cstddef> // for size_t 7 #include <cstdint> // for uint8_t 8 #ifdef HAVE_SSE2 9 /* The base type that we'll use is good ol sse2 __m128i, but we need 10 * SSSE3 instructions to deal with them in the .cpp file */ 11 #include <emmintrin.h> // IWYU pragma: keep 12 #endif 13 #ifdef HAVE_AVX2 14 #include <immintrin.h> 15 #endif 16 #ifdef HAVE_ARM_NEON 17 #include <arm_neon.h> 18 #endif 19 20 #include "fb-types.h" // for fbprime_t 21 struct where_am_I; 22 23 24 class sieve2357base { 25 public: 26 #if defined(HAVE_AVX2) 27 typedef __m256i preferred_simd_type; 28 #elif defined(HAVE_SSSE3) 29 typedef __m128i preferred_simd_type; 30 #elif defined(HAVE_ARM_NEON) 31 typedef uint8x16_t preferred_simd_type; 32 #elif LONG_BIT == 64 /* FIXME: this is false on, e.g., MinGW. What's a better condition here? */ 33 typedef uint64_t preferred_simd_type; 34 #else 35 typedef uint32_t preferred_simd_type; 36 #endif 37 38 /* Define ordering on q which sieve2357 expect: first q=1, then powers of 2, 39 then odd primes in increasing order */ order_lt(fbprime_t q1,fbprime_t q2)40 static inline bool order_lt(fbprime_t q1, fbprime_t q2) { 41 if (q1 == 1 && q2 != 1) 42 return true; 43 if (q1 != 1 && q2 == 1) 44 return false; 45 if (q1 % 2 == 0 && q2 % 2 == 1) 46 return true; 47 if (q1 % 2 == 1 && q2 % 2 == 0) 48 return false; 49 return q1 < q2; 50 } 51 52 /* We hit sievearray[x] for x = idx + i*q with 0 <= x < arraylen */ 53 struct prime_t { 54 fbprime_t q, idx; 55 unsigned char logp; 56 }; 57 58 /* The sieve() function can either write to the sieve array, overwriting any 59 data that was previously there, or add to the sieve array. 60 If sieve2357::sieve() is to write to a sieve array that contains only 61 zeroes, then the former is faster (don't have to zero out the array and 62 sieve() saves one read and add per word). 63 If the sieve array contains non-zero data, then obviously update_add is 64 required. */ 65 enum { 66 update_set, 67 update_add 68 }; 69 }; 70 71 template <typename SIMDTYPE, typename ELEMTYPE> 72 class sieve2357: public sieve2357base { 73 private: 74 static inline void 75 sieve_odd_prime(SIMDTYPE * const result, const ELEMTYPE logp, 76 const fbprime_t stride, const fbprime_t idx, const SIMDTYPE even_mask); 77 static void 78 big_loop_set(SIMDTYPE * __restrict__ sievearray, const SIMDTYPE * const __restrict__ pattern235, 79 const SIMDTYPE * const __restrict__ pattern7, size_t l7); 80 static void 81 big_loop_add(SIMDTYPE * __restrict__ sievearray, const SIMDTYPE * const __restrict__ pattern235, 82 const SIMDTYPE * const __restrict__ pattern7, size_t l7); 83 static SIMDTYPE 84 sieve2(const fbprime_t q, const fbprime_t idx, const uint8_t logp); 85 static SIMDTYPE 86 get_even_mask(const int skip_mod_2); 87 88 public: 89 /* A predicate that tells whether a prime power q = p^k can be sieved by 90 sieve2357 with a given SIMD and element data type */ can_sieve(const fbprime_t q)91 static inline bool can_sieve(const fbprime_t q) 92 { 93 const size_t N = sizeof(SIMDTYPE) / sizeof(ELEMTYPE); 94 return q == 1 || (q % 2 == 0 && N % q == 0) || q == 3 || q == 5 95 || q == 7; 96 } 97 98 static void sieve(SIMDTYPE *sievearray, size_t arraylen, const prime_t *primes, 99 int skip_mod_2, int update_operation, where_am_I & w); 100 101 }; 102 #endif 103