1 /**
2 * MIT License
3 *
4 * Copyright (c) 2018 Tessil
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H
25 #define TSL_HOPSCOTCH_GROWTH_POLICY_H
26
27
28 #include <algorithm>
29 #include <array>
30 #include <climits>
31 #include <cmath>
32 #include <cstddef>
33 #include <iterator>
34 #include <limits>
35 #include <ratio>
36 #include <stdexcept>
37
38
39 namespace tsl {
40 namespace hh {
41
42 /**
43 * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows
44 * the table to use a mask operation instead of a modulo operation to map a hash to a bucket.
45 *
46 * GrowthFactor must be a power of two >= 2.
47 */
48 template<std::size_t GrowthFactor>
49 class power_of_two_growth_policy {
50 public:
51 /**
52 * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter.
53 * This number is a minimum, the policy may update this value with a higher value if needed (but not lower).
54 *
55 * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
56 * bucket_for_hash must always return 0 in this case.
57 */
power_of_two_growth_policy(std::size_t & min_bucket_count_in_out)58 explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
59 if(min_bucket_count_in_out > max_bucket_count()) {
60 throw std::length_error("The hash table exceeds its maxmimum size.");
61 }
62
63 if(min_bucket_count_in_out > 0) {
64 min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
65 m_mask = min_bucket_count_in_out - 1;
66 }
67 else {
68 m_mask = 0;
69 }
70 }
71
72 /**
73 * Return the bucket [0, bucket_count()) to which the hash belongs.
74 * If bucket_count() is 0, it must always return 0.
75 */
bucket_for_hash(std::size_t hash)76 std::size_t bucket_for_hash(std::size_t hash) const noexcept {
77 return hash & m_mask;
78 }
79
80 /**
81 * Return the bucket count to use when the bucket array grows on rehash.
82 */
next_bucket_count()83 std::size_t next_bucket_count() const {
84 if((m_mask + 1) > max_bucket_count() / GrowthFactor) {
85 throw std::length_error("The hash table exceeds its maxmimum size.");
86 }
87
88 return (m_mask + 1) * GrowthFactor;
89 }
90
91 /**
92 * Return the maximum number of buckets supported by the policy.
93 */
max_bucket_count()94 std::size_t max_bucket_count() const {
95 // Largest power of two.
96 return (std::numeric_limits<std::size_t>::max() / 2) + 1;
97 }
98
99 /**
100 * Reset the growth policy as if it was created with a bucket count of 0.
101 * After a clear, the policy must always return 0 when bucket_for_hash is called.
102 */
clear()103 void clear() noexcept {
104 m_mask = 0;
105 }
106
107 private:
round_up_to_power_of_two(std::size_t value)108 static std::size_t round_up_to_power_of_two(std::size_t value) {
109 if(is_power_of_two(value)) {
110 return value;
111 }
112
113 if(value == 0) {
114 return 1;
115 }
116
117 --value;
118 for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
119 value |= value >> i;
120 }
121
122 return value + 1;
123 }
124
is_power_of_two(std::size_t value)125 static constexpr bool is_power_of_two(std::size_t value) {
126 return value != 0 && (value & (value - 1)) == 0;
127 }
128
129 private:
130 static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2.");
131
132 std::size_t m_mask;
133 };
134
135
136 /**
137 * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
138 * to a bucket. Slower but it can be useful if you want a slower growth.
139 */
140 template<class GrowthFactor = std::ratio<3, 2>>
141 class mod_growth_policy {
142 public:
mod_growth_policy(std::size_t & min_bucket_count_in_out)143 explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
144 if(min_bucket_count_in_out > max_bucket_count()) {
145 throw std::length_error("The hash table exceeds its maxmimum size.");
146 }
147
148 if(min_bucket_count_in_out > 0) {
149 m_mod = min_bucket_count_in_out;
150 }
151 else {
152 m_mod = 1;
153 }
154 }
155
bucket_for_hash(std::size_t hash)156 std::size_t bucket_for_hash(std::size_t hash) const noexcept {
157 return hash % m_mod;
158 }
159
next_bucket_count()160 std::size_t next_bucket_count() const {
161 if(m_mod == max_bucket_count()) {
162 throw std::length_error("The hash table exceeds its maxmimum size.");
163 }
164
165 const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
166 if(!std::isnormal(next_bucket_count)) {
167 throw std::length_error("The hash table exceeds its maxmimum size.");
168 }
169
170 if(next_bucket_count > double(max_bucket_count())) {
171 return max_bucket_count();
172 }
173 else {
174 return std::size_t(next_bucket_count);
175 }
176 }
177
max_bucket_count()178 std::size_t max_bucket_count() const {
179 return MAX_BUCKET_COUNT;
180 }
181
clear()182 void clear() noexcept {
183 m_mod = 1;
184 }
185
186 private:
187 static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den;
188 static const std::size_t MAX_BUCKET_COUNT =
189 std::size_t(double(
190 std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR
191 ));
192
193 static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
194
195 std::size_t m_mod;
196 };
197
198
199
200 namespace detail {
201
202 static constexpr const std::array<std::size_t, 40> PRIMES = {{
203 1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul,
204 1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
205 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul,
206 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
207 }};
208
209 template<unsigned int IPrime>
mod(std::size_t hash)210 static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
211
212 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
213 // compiler can optimize the modulo code better with a constant known at the compilation.
214 static constexpr const std::array<std::size_t(*)(std::size_t), 40> MOD_PRIME = {{
215 &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>,
216 &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>,
217 &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>,
218 &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39>
219 }};
220
221 }
222
223 /**
224 * Grow the hash table by using prime numbers as bucket count. Slower than tsl::hh::power_of_two_growth_policy in
225 * general but will probably distribute the values around better in the buckets with a poor hash function.
226 *
227 * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers.
228 *
229 * With a switch the code would look like:
230 * \code
231 * switch(iprime) { // iprime is the current prime of the hash table
232 * case 0: hash % 5ul;
233 * break;
234 * case 1: hash % 17ul;
235 * break;
236 * case 2: hash % 29ul;
237 * break;
238 * ...
239 * }
240 * \endcode
241 *
242 * Due to the constant variable in the modulo the compiler is able to optimize the operation
243 * by a series of multiplications, substractions and shifts.
244 *
245 * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement.
246 */
247 class prime_growth_policy {
248 public:
prime_growth_policy(std::size_t & min_bucket_count_in_out)249 explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
250 auto it_prime = std::lower_bound(detail::PRIMES.begin(),
251 detail::PRIMES.end(), min_bucket_count_in_out);
252 if(it_prime == detail::PRIMES.end()) {
253 throw std::length_error("The hash table exceeds its maxmimum size.");
254 }
255
256 m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
257 if(min_bucket_count_in_out > 0) {
258 min_bucket_count_in_out = *it_prime;
259 }
260 else {
261 min_bucket_count_in_out = 0;
262 }
263 }
264
bucket_for_hash(std::size_t hash)265 std::size_t bucket_for_hash(std::size_t hash) const noexcept {
266 return detail::MOD_PRIME[m_iprime](hash);
267 }
268
next_bucket_count()269 std::size_t next_bucket_count() const {
270 if(m_iprime + 1 >= detail::PRIMES.size()) {
271 throw std::length_error("The hash table exceeds its maxmimum size.");
272 }
273
274 return detail::PRIMES[m_iprime + 1];
275 }
276
max_bucket_count()277 std::size_t max_bucket_count() const {
278 return detail::PRIMES.back();
279 }
280
clear()281 void clear() noexcept {
282 m_iprime = 0;
283 }
284
285 private:
286 unsigned int m_iprime;
287
288 static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
289 "The type of m_iprime is not big enough.");
290 };
291
292 }
293 }
294
295 #endif
296