1 ///
2 /// @file  FactorTable.hpp
3 /// @brief The FactorTable class combines the lpf[n] (least prime
4 ///        factor) and mu[n] (Möbius function) lookup tables into a
5 ///        single factor[n] table which furthermore only contains
6 ///        entries for numbers which are not divisible by 2, 3, 5, 7
7 ///        and 11. The factor[n] lookup table uses up to 19.25
8 ///        times less memory than the lpf[n] & mu[n] lookup tables!
9 ///        factor[n] uses only 2 bytes per entry for 32-bit numbers
10 ///        and 4 bytes per entry for 64-bit numbers.
11 ///
12 ///        The factor table concept was devised and implemented by
13 ///        Christian Bau in 2003. Note that Tomás Oliveira e Silva
14 ///        also suggests combining the mu[n] and lpf[n] lookup tables
15 ///        in his paper. However Christian Bau's FactorTable data
16 ///        structure uses only half as much memory and is also
17 ///        slightly more efficient (uses fewer instructions) than the
18 ///        data structure proposed by Tomás Oliveira e Silva.
19 ///
20 ///        What we store in the factor[n] lookup table:
21 ///
22 ///        1) INT_MAX - 1  if n = 1
23 ///        2) INT_MAX      if n is a prime
24 ///        3) 0            if moebius(n) = 0
25 ///        4) lpf - 1      if moebius(n) = 1
26 ///        5) lpf          if moebius(n) = -1
27 ///
28 ///        factor[1] = (INT_MAX - 1) because 1 contributes to the
29 ///        sum of the ordinary leaves S1(x, a) in the
30 ///        Lagarias-Miller-Odlyzko and Deleglise-Rivat algorithms.
31 ///        The values above allow to replace the 1st if statement
32 ///        below used in the S1(x, a) and S2(x, a) formulas by the
33 ///        2nd new if statement which is obviously faster.
34 ///
35 ///        * Old: if (mu[n] != 0 && prime < lpf[n])
36 ///        * New: if (prime < factor[n])
37 ///
38 /// Copyright (C) 2021 Kim Walisch, <kim.walisch@gmail.com>
39 ///
40 /// This file is distributed under the BSD License. See the COPYING
41 /// file in the top level directory.
42 ///
43 
44 #ifndef FACTORTABLE_HPP
45 #define FACTORTABLE_HPP
46 
47 #include <primecount.hpp>
48 #include <primecount-internal.hpp>
49 #include <BaseFactorTable.hpp>
50 #include <primesieve.hpp>
51 #include <imath.hpp>
52 #include <int128_t.hpp>
53 #include <pod_vector.hpp>
54 
55 #include <algorithm>
56 #include <cassert>
57 #include <limits>
58 #include <stdint.h>
59 
60 namespace {
61 
62 using namespace primecount;
63 
64 template <typename T>
65 class FactorTable : public BaseFactorTable
66 {
67 public:
68   /// Factor numbers <= y
FactorTable(int64_t y,int threads)69   FactorTable(int64_t y, int threads)
70   {
71     if (y > max())
72       throw primecount_error("y must be <= FactorTable::max()");
73 
74     y = std::max<int64_t>(1, y);
75     T T_MAX = std::numeric_limits<T>::max();
76     factor_.resize(to_index(y) + 1);
77 
78     // mu(1) = 1.
79     // 1 has zero prime factors, hence 1 has an even
80     // number of prime factors. We use the least
81     // significant bit to indicate whether the number
82     // has an even or odd number of prime factors.
83     factor_[0] = T_MAX ^ 1;
84 
85     int64_t sqrty = isqrt(y);
86     int64_t thread_threshold = (int64_t) 1e7;
87     threads = ideal_num_threads(threads, y, thread_threshold);
88     int64_t thread_distance = ceil_div(y, threads);
89     thread_distance += coprime_indexes_.size() - thread_distance % coprime_indexes_.size();
90 
91     #pragma omp parallel for num_threads(threads)
92     for (int t = 0; t < threads; t++)
93     {
94       // Thread processes interval [low, high]
95       int64_t low = thread_distance * t;
96       int64_t high = low + thread_distance;
97       low = std::max(first_coprime(), low + 1);
98       high = std::min(high, y);
99 
100       if (low <= high)
101       {
102         // Default initialize memory to all bits set
103         int64_t low_idx = to_index(low);
104         int64_t size = (to_index(high) + 1) - low_idx;
105         std::fill_n(&factor_[low_idx], size, T_MAX);
106 
107         int64_t start = first_coprime() - 1;
108         int64_t stop = high / first_coprime();
109         primesieve::iterator it(start, stop);
110 
111         while (true)
112         {
113           int64_t i = 1;
114           int64_t prime = it.next_prime();
115           int64_t multiple = next_multiple(prime, low, &i);
116           int64_t min_m = prime * first_coprime();
117 
118           if (min_m > high)
119             break;
120 
121           for (; multiple <= high; multiple = prime * to_number(i++))
122           {
123             int64_t mi = to_index(multiple);
124             // prime is smallest factor of multiple
125             if (factor_[mi] == T_MAX)
126               factor_[mi] = (T) prime;
127             // the least significant bit indicates
128             // whether multiple has an even (0) or odd (1)
129             // number of prime factors
130             else if (factor_[mi] != 0)
131               factor_[mi] ^= 1;
132           }
133 
134           if (prime <= sqrty)
135           {
136             int64_t j = 0;
137             int64_t square = prime * prime;
138             multiple = next_multiple(square, low, &j);
139 
140             // moebius(n) = 0
141             for (; multiple <= high; multiple = square * to_number(j++))
142               factor_[to_index(multiple)] = 0;
143           }
144         }
145       }
146     }
147   }
148 
149   /// mu_lpf(n) is a combination of the mu(n) (Möbius function)
150   /// and lpf(n) (least prime factor) functions.
151   /// mu_lpf(n) returns (with n = to_number(index)):
152   ///
153   /// 1) INT_MAX - 1  if n = 1
154   /// 2) INT_MAX      if n is a prime
155   /// 3) 0            if moebius(n) = 0
156   /// 4) lpf - 1      if moebius(n) = 1
157   /// 5) lpf          if moebius(n) = -1
158   ///
mu_lpf(int64_t index) const159   int64_t mu_lpf(int64_t index) const
160   {
161     return factor_[index];
162   }
163 
164   /// Get the Möbius function value of the number
165   /// n = to_number(index).
166   ///
167   /// https://en.wikipedia.org/wiki/Möbius_function
168   /// mu(n) = 1 if n is a square-free integer with an even number of prime factors.
169   /// mu(n) = −1 if n is a square-free integer with an odd number of prime factors.
170   /// mu(n) = 0 if n has a squared prime factor.
171   ///
mu(int64_t index) const172   int64_t mu(int64_t index) const
173   {
174     // mu(n) = 0 is disabled by default for performance
175     // reasons, we only enable it for testing.
176     #if defined(ENABLE_MU_0_TESTING)
177       if (factor_[index] == 0)
178         return 0;
179     #else
180       assert(factor_[index] != 0);
181     #endif
182 
183     if (factor_[index] & 1)
184       return -1;
185     else
186       return 1;
187   }
188 
max()189   static maxint_t max()
190   {
191     maxint_t T_MAX = std::numeric_limits<T>::max();
192     return ipow(T_MAX - 1, 2) - 1;
193   }
194 
195 private:
196   pod_vector<T> factor_;
197 };
198 
199 } // namespace
200 
201 #endif
202