1 /**
2 * MIT License
3 *
4 * Copyright (c) 2018 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H
25 #define TSL_HOPSCOTCH_GROWTH_POLICY_H
26
27 #include <algorithm>
28 #include <array>
29 #include <climits>
30 #include <cmath>
31 #include <cstddef>
32 #include <cstdint>
33 #include <iterator>
34 #include <limits>
35 #include <ratio>
36 #include <stdexcept>
37
38 /**
39 * Only activate tsl_hh_assert if TSL_DEBUG is defined.
40 * This way we avoid the performance hit when NDEBUG is not defined with assert
41 * as tsl_hh_assert is used a lot (people usually compile with "-O3" and not
42 * "-O3 -DNDEBUG").
43 */
44 #ifdef TSL_DEBUG
45 #define tsl_hh_assert(expr) assert(expr)
46 #else
47 #define tsl_hh_assert(expr) (static_cast<void>(0))
48 #endif
49
50 /**
51 * If exceptions are enabled, throw the exception passed in parameter, otherwise
52 * call std::terminate.
53 */
54 #if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
55 (defined(_MSC_VER) && defined(_CPPUNWIND))) && \
56 !defined(TSL_NO_EXCEPTIONS)
57 #define TSL_HH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
58 #else
59 #define TSL_HH_NO_EXCEPTIONS
60 #ifdef NDEBUG
61 #define TSL_HH_THROW_OR_TERMINATE(ex, msg) std::terminate()
62 #else
63 #include <iostream>
64 #define TSL_HH_THROW_OR_TERMINATE(ex, msg) \
65 do { \
66 std::cerr << msg << std::endl; \
67 std::terminate(); \
68 } while (0)
69 #endif
70 #endif
71
72 namespace tsl {
73 namespace hh {
74
75 /**
76 * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a
77 * power of two. It allows the table to use a mask operation instead of a modulo
78 * operation to map a hash to a bucket.
79 *
80 * GrowthFactor must be a power of two >= 2.
81 */
82 template <std::size_t GrowthFactor>
83 class power_of_two_growth_policy {
84 public:
85 /**
86 * Called on the hash table creation and on rehash. The number of buckets for
87 * the table is passed in parameter. This number is a minimum, the policy may
88 * update this value with a higher value if needed (but not lower).
89 *
90 * If 0 is given, min_bucket_count_in_out must still be 0 after the policy
91 * creation and bucket_for_hash must always return 0 in this case.
92 */
power_of_two_growth_policy(std::size_t & min_bucket_count_in_out)93 explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
94 if (min_bucket_count_in_out > max_bucket_count()) {
95 TSL_HH_THROW_OR_TERMINATE(std::length_error,
96 "The hash table exceeds its maximum size.");
97 }
98
99 if (min_bucket_count_in_out > 0) {
100 min_bucket_count_in_out =
101 round_up_to_power_of_two(min_bucket_count_in_out);
102 m_mask = min_bucket_count_in_out - 1;
103 } else {
104 m_mask = 0;
105 }
106 }
107
108 /**
109 * Return the bucket [0, bucket_count()) to which the hash belongs.
110 * If bucket_count() is 0, it must always return 0.
111 */
bucket_for_hash(std::size_t hash)112 std::size_t bucket_for_hash(std::size_t hash) const noexcept {
113 return hash & m_mask;
114 }
115
116 /**
117 * Return the bucket count to use when the bucket array grows on rehash.
118 */
next_bucket_count()119 std::size_t next_bucket_count() const {
120 if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
121 TSL_HH_THROW_OR_TERMINATE(std::length_error,
122 "The hash table exceeds its maximum size.");
123 }
124
125 return (m_mask + 1) * GrowthFactor;
126 }
127
128 /**
129 * Return the maximum number of buckets supported by the policy.
130 */
max_bucket_count()131 std::size_t max_bucket_count() const {
132 // Largest power of two.
133 return (std::numeric_limits<std::size_t>::max() / 2) + 1;
134 }
135
136 /**
137 * Reset the growth policy as if it was created with a bucket count of 0.
138 * After a clear, the policy must always return 0 when bucket_for_hash is
139 * called.
140 */
clear()141 void clear() noexcept { m_mask = 0; }
142
143 private:
round_up_to_power_of_two(std::size_t value)144 static std::size_t round_up_to_power_of_two(std::size_t value) {
145 if (is_power_of_two(value)) {
146 return value;
147 }
148
149 if (value == 0) {
150 return 1;
151 }
152
153 --value;
154 for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
155 value |= value >> i;
156 }
157
158 return value + 1;
159 }
160
is_power_of_two(std::size_t value)161 static constexpr bool is_power_of_two(std::size_t value) {
162 return value != 0 && (value & (value - 1)) == 0;
163 }
164
165 private:
166 static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
167 "GrowthFactor must be a power of two >= 2.");
168
169 std::size_t m_mask;
170 };
171
172 /**
173 * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo
174 * to map a hash to a bucket. Slower but it can be useful if you want a slower
175 * growth.
176 */
177 template <class GrowthFactor = std::ratio<3, 2>>
178 class mod_growth_policy {
179 public:
mod_growth_policy(std::size_t & min_bucket_count_in_out)180 explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
181 if (min_bucket_count_in_out > max_bucket_count()) {
182 TSL_HH_THROW_OR_TERMINATE(std::length_error,
183 "The hash table exceeds its maximum size.");
184 }
185
186 if (min_bucket_count_in_out > 0) {
187 m_mod = min_bucket_count_in_out;
188 } else {
189 m_mod = 1;
190 }
191 }
192
bucket_for_hash(std::size_t hash)193 std::size_t bucket_for_hash(std::size_t hash) const noexcept {
194 return hash % m_mod;
195 }
196
next_bucket_count()197 std::size_t next_bucket_count() const {
198 if (m_mod == max_bucket_count()) {
199 TSL_HH_THROW_OR_TERMINATE(std::length_error,
200 "The hash table exceeds its maximum size.");
201 }
202
203 const double next_bucket_count =
204 std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
205 if (!std::isnormal(next_bucket_count)) {
206 TSL_HH_THROW_OR_TERMINATE(std::length_error,
207 "The hash table exceeds its maximum size.");
208 }
209
210 if (next_bucket_count > double(max_bucket_count())) {
211 return max_bucket_count();
212 } else {
213 return std::size_t(next_bucket_count);
214 }
215 }
216
max_bucket_count()217 std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
218
clear()219 void clear() noexcept { m_mod = 1; }
220
221 private:
222 static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
223 1.0 * GrowthFactor::num / GrowthFactor::den;
224 static const std::size_t MAX_BUCKET_COUNT =
225 std::size_t(double(std::numeric_limits<std::size_t>::max() /
226 REHASH_SIZE_MULTIPLICATION_FACTOR));
227
228 static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1,
229 "Growth factor should be >= 1.1.");
230
231 std::size_t m_mod;
232 };
233
234 namespace detail {
235
236 #if SIZE_MAX >= ULLONG_MAX
237 #define TSL_HH_NB_PRIMES 51
238 #elif SIZE_MAX >= ULONG_MAX
239 #define TSL_HH_NB_PRIMES 40
240 #else
241 #define TSL_HH_NB_PRIMES 23
242 #endif
243
244 static constexpr const std::array<std::size_t, TSL_HH_NB_PRIMES> PRIMES = {{
245 1u,
246 5u,
247 17u,
248 29u,
249 37u,
250 53u,
251 67u,
252 79u,
253 97u,
254 131u,
255 193u,
256 257u,
257 389u,
258 521u,
259 769u,
260 1031u,
261 1543u,
262 2053u,
263 3079u,
264 6151u,
265 12289u,
266 24593u,
267 49157u,
268 #if SIZE_MAX >= ULONG_MAX
269 98317ul,
270 196613ul,
271 393241ul,
272 786433ul,
273 1572869ul,
274 3145739ul,
275 6291469ul,
276 12582917ul,
277 25165843ul,
278 50331653ul,
279 100663319ul,
280 201326611ul,
281 402653189ul,
282 805306457ul,
283 1610612741ul,
284 3221225473ul,
285 4294967291ul,
286 #endif
287 #if SIZE_MAX >= ULLONG_MAX
288 6442450939ull,
289 12884901893ull,
290 25769803751ull,
291 51539607551ull,
292 103079215111ull,
293 206158430209ull,
294 412316860441ull,
295 824633720831ull,
296 1649267441651ull,
297 3298534883309ull,
298 6597069766657ull,
299 #endif
300 }};
301
302 template <unsigned int IPrime>
mod(std::size_t hash)303 static constexpr std::size_t mod(std::size_t hash) {
304 return hash % PRIMES[IPrime];
305 }
306
307 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for
308 // faster modulo as the compiler can optimize the modulo code better with a
309 // constant known at the compilation.
310 static constexpr const std::array<std::size_t (*)(std::size_t),
311 TSL_HH_NB_PRIMES>
312 MOD_PRIME = {{
313 &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>,
314 &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, &mod<11>,
315 &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>,
316 &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>,
317 #if SIZE_MAX >= ULONG_MAX
318 &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>,
319 &mod<29>, &mod<30>, &mod<31>, &mod<32>, &mod<33>, &mod<34>,
320 &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>,
321 #endif
322 #if SIZE_MAX >= ULLONG_MAX
323 &mod<40>, &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>,
324 &mod<46>, &mod<47>, &mod<48>, &mod<49>, &mod<50>,
325 #endif
326 }};
327
328 } // namespace detail
329
330 /**
331 * Grow the hash table by using prime numbers as bucket count. Slower than
332 * tsl::hh::power_of_two_growth_policy in general but will probably distribute
333 * the values around better in the buckets with a poor hash function.
334 *
335 * To allow the compiler to optimize the modulo operation, a lookup table is
336 * used with constant primes numbers.
337 *
338 * With a switch the code would look like:
339 * \code
340 * switch(iprime) { // iprime is the current prime of the hash table
341 * case 0: hash % 5ul;
342 * break;
343 * case 1: hash % 17ul;
344 * break;
345 * case 2: hash % 29ul;
346 * break;
347 * ...
348 * }
349 * \endcode
350 *
351 * Due to the constant variable in the modulo the compiler is able to optimize
352 * the operation by a series of multiplications, substractions and shifts.
353 *
354 * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34)
355 * * 5' in a 64 bits environment.
356 */
357 class prime_growth_policy {
358 public:
prime_growth_policy(std::size_t & min_bucket_count_in_out)359 explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
360 auto it_prime = std::lower_bound(
361 detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
362 if (it_prime == detail::PRIMES.end()) {
363 TSL_HH_THROW_OR_TERMINATE(std::length_error,
364 "The hash table exceeds its maximum size.");
365 }
366
367 m_iprime = static_cast<unsigned int>(
368 std::distance(detail::PRIMES.begin(), it_prime));
369 if (min_bucket_count_in_out > 0) {
370 min_bucket_count_in_out = *it_prime;
371 } else {
372 min_bucket_count_in_out = 0;
373 }
374 }
375
bucket_for_hash(std::size_t hash)376 std::size_t bucket_for_hash(std::size_t hash) const noexcept {
377 return detail::MOD_PRIME[m_iprime](hash);
378 }
379
next_bucket_count()380 std::size_t next_bucket_count() const {
381 if (m_iprime + 1 >= detail::PRIMES.size()) {
382 TSL_HH_THROW_OR_TERMINATE(std::length_error,
383 "The hash table exceeds its maximum size.");
384 }
385
386 return detail::PRIMES[m_iprime + 1];
387 }
388
max_bucket_count()389 std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
390
clear()391 void clear() noexcept { m_iprime = 0; }
392
393 private:
394 unsigned int m_iprime;
395
396 static_assert(std::numeric_limits<decltype(m_iprime)>::max() >=
397 detail::PRIMES.size(),
398 "The type of m_iprime is not big enough.");
399 };
400
401 } // namespace hh
402 } // namespace tsl
403
404 #endif
405