1 /**
2  * MIT License
3  *
4  * Copyright (c) 2018 Tessil
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H
25 #define TSL_HOPSCOTCH_GROWTH_POLICY_H
26 
27 
28 #include <algorithm>
29 #include <array>
30 #include <climits>
31 #include <cmath>
32 #include <cstddef>
33 #include <iterator>
34 #include <limits>
35 #include <ratio>
36 #include <stdexcept>
37 
38 
39 namespace tsl {
40 namespace hh {
41 
42 /**
43  * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows
44  * the table to use a mask operation instead of a modulo operation to map a hash to a bucket.
45  *
46  * GrowthFactor must be a power of two >= 2.
47  */
48 template<std::size_t GrowthFactor>
49 class power_of_two_growth_policy {
50 public:
51     /**
52      * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter.
53      * This number is a minimum, the policy may update this value with a higher value if needed (but not lower).
54      *
55      * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
56      * bucket_for_hash must always return 0 in this case.
57      */
power_of_two_growth_policy(std::size_t & min_bucket_count_in_out)58     explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
59         if(min_bucket_count_in_out > max_bucket_count()) {
60             throw std::length_error("The hash table exceeds its maxmimum size.");
61         }
62 
63         if(min_bucket_count_in_out > 0) {
64             min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
65             m_mask = min_bucket_count_in_out - 1;
66         }
67         else {
68             m_mask = 0;
69         }
70     }
71 
72     /**
73      * Return the bucket [0, bucket_count()) to which the hash belongs.
74      * If bucket_count() is 0, it must always return 0.
75      */
bucket_for_hash(std::size_t hash)76     std::size_t bucket_for_hash(std::size_t hash) const noexcept {
77         return hash & m_mask;
78     }
79 
80     /**
81      * Return the bucket count to use when the bucket array grows on rehash.
82      */
next_bucket_count()83     std::size_t next_bucket_count() const {
84         if((m_mask + 1) > max_bucket_count() / GrowthFactor) {
85             throw std::length_error("The hash table exceeds its maxmimum size.");
86         }
87 
88         return (m_mask + 1) * GrowthFactor;
89     }
90 
91     /**
92      * Return the maximum number of buckets supported by the policy.
93      */
max_bucket_count()94     std::size_t max_bucket_count() const {
95         // Largest power of two.
96         return (std::numeric_limits<std::size_t>::max() / 2) + 1;
97     }
98 
99     /**
100      * Reset the growth policy as if it was created with a bucket count of 0.
101      * After a clear, the policy must always return 0 when bucket_for_hash is called.
102      */
clear()103     void clear() noexcept {
104         m_mask = 0;
105     }
106 
107 private:
round_up_to_power_of_two(std::size_t value)108     static std::size_t round_up_to_power_of_two(std::size_t value) {
109         if(is_power_of_two(value)) {
110             return value;
111         }
112 
113         if(value == 0) {
114             return 1;
115         }
116 
117         --value;
118         for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
119             value |= value >> i;
120         }
121 
122         return value + 1;
123     }
124 
is_power_of_two(std::size_t value)125     static constexpr bool is_power_of_two(std::size_t value) {
126         return value != 0 && (value & (value - 1)) == 0;
127     }
128 
129 private:
130     static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2.");
131 
132     std::size_t m_mask;
133 };
134 
135 
136 /**
137  * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
138  * to a bucket. Slower but it can be useful if you want a slower growth.
139  */
140 template<class GrowthFactor = std::ratio<3, 2>>
141 class mod_growth_policy {
142 public:
mod_growth_policy(std::size_t & min_bucket_count_in_out)143     explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
144         if(min_bucket_count_in_out > max_bucket_count()) {
145             throw std::length_error("The hash table exceeds its maxmimum size.");
146         }
147 
148         if(min_bucket_count_in_out > 0) {
149             m_mod = min_bucket_count_in_out;
150         }
151         else {
152             m_mod = 1;
153         }
154     }
155 
bucket_for_hash(std::size_t hash)156     std::size_t bucket_for_hash(std::size_t hash) const noexcept {
157         return hash % m_mod;
158     }
159 
next_bucket_count()160     std::size_t next_bucket_count() const {
161         if(m_mod == max_bucket_count()) {
162             throw std::length_error("The hash table exceeds its maxmimum size.");
163         }
164 
165         const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
166         if(!std::isnormal(next_bucket_count)) {
167             throw std::length_error("The hash table exceeds its maxmimum size.");
168         }
169 
170         if(next_bucket_count > double(max_bucket_count())) {
171             return max_bucket_count();
172         }
173         else {
174             return std::size_t(next_bucket_count);
175         }
176     }
177 
max_bucket_count()178     std::size_t max_bucket_count() const {
179         return MAX_BUCKET_COUNT;
180     }
181 
clear()182     void clear() noexcept {
183         m_mod = 1;
184     }
185 
186 private:
187     static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den;
188     static const std::size_t MAX_BUCKET_COUNT =
189             std::size_t(double(
190                     std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR
191             ));
192 
193     static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
194 
195     std::size_t m_mod;
196 };
197 
198 
199 
200 namespace detail {
201 
202 static constexpr const std::array<std::size_t, 40> PRIMES = {{
203     1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul,
204     1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
205     1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul,
206     402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
207 }};
208 
209 template<unsigned int IPrime>
mod(std::size_t hash)210 static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
211 
212 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
213 // compiler can optimize the modulo code better with a constant known at the compilation.
214 static constexpr const std::array<std::size_t(*)(std::size_t), 40> MOD_PRIME = {{
215     &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>,
216     &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>,
217     &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>,
218     &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39>
219 }};
220 
221 }
222 
223 /**
224  * Grow the hash table by using prime numbers as bucket count. Slower than tsl::hh::power_of_two_growth_policy in
225  * general but will probably distribute the values around better in the buckets with a poor hash function.
226  *
227  * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers.
228  *
229  * With a switch the code would look like:
230  * \code
231  * switch(iprime) { // iprime is the current prime of the hash table
232  *     case 0: hash % 5ul;
233  *             break;
234  *     case 1: hash % 17ul;
235  *             break;
236  *     case 2: hash % 29ul;
237  *             break;
238  *     ...
239  * }
240  * \endcode
241  *
242  * Due to the constant variable in the modulo the compiler is able to optimize the operation
243  * by a series of multiplications, substractions and shifts.
244  *
245  * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement.
246  */
247 class prime_growth_policy {
248 public:
prime_growth_policy(std::size_t & min_bucket_count_in_out)249     explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
250         auto it_prime = std::lower_bound(detail::PRIMES.begin(),
251                                          detail::PRIMES.end(), min_bucket_count_in_out);
252         if(it_prime == detail::PRIMES.end()) {
253             throw std::length_error("The hash table exceeds its maxmimum size.");
254         }
255 
256         m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
257         if(min_bucket_count_in_out > 0) {
258             min_bucket_count_in_out = *it_prime;
259         }
260         else {
261             min_bucket_count_in_out = 0;
262         }
263     }
264 
bucket_for_hash(std::size_t hash)265     std::size_t bucket_for_hash(std::size_t hash) const noexcept {
266         return detail::MOD_PRIME[m_iprime](hash);
267     }
268 
next_bucket_count()269     std::size_t next_bucket_count() const {
270         if(m_iprime + 1 >= detail::PRIMES.size()) {
271             throw std::length_error("The hash table exceeds its maxmimum size.");
272         }
273 
274         return detail::PRIMES[m_iprime + 1];
275     }
276 
max_bucket_count()277     std::size_t max_bucket_count() const {
278         return detail::PRIMES.back();
279     }
280 
clear()281     void clear() noexcept {
282         m_iprime = 0;
283     }
284 
285 private:
286     unsigned int m_iprime;
287 
288     static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
289                   "The type of m_iprime is not big enough.");
290 };
291 
292 }
293 }
294 
295 #endif
296