1 /**
2  * MIT License
3  *
4  * Copyright (c) 2018 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H
25 #define TSL_HOPSCOTCH_GROWTH_POLICY_H
26 
27 #include <algorithm>
28 #include <array>
29 #include <climits>
30 #include <cmath>
31 #include <cstddef>
32 #include <cstdint>
33 #include <iterator>
34 #include <limits>
35 #include <ratio>
36 #include <stdexcept>
37 
38 /**
39  * Only activate tsl_hh_assert if TSL_DEBUG is defined.
40  * This way we avoid the performance hit when NDEBUG is not defined with assert
41  * as tsl_hh_assert is used a lot (people usually compile with "-O3" and not
42  * "-O3 -DNDEBUG").
43  */
44 #ifdef TSL_DEBUG
45 #define tsl_hh_assert(expr) assert(expr)
46 #else
47 #define tsl_hh_assert(expr) (static_cast<void>(0))
48 #endif
49 
50 /**
51  * If exceptions are enabled, throw the exception passed in parameter, otherwise
52  * call std::terminate.
53  */
54 #if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
55      (defined(_MSC_VER) && defined(_CPPUNWIND))) &&        \
56     !defined(TSL_NO_EXCEPTIONS)
57 #define TSL_HH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
58 #else
59 #define TSL_HH_NO_EXCEPTIONS
60 #ifdef NDEBUG
61 #define TSL_HH_THROW_OR_TERMINATE(ex, msg) std::terminate()
62 #else
63 #include <iostream>
64 #define TSL_HH_THROW_OR_TERMINATE(ex, msg) \
65   do {                                     \
66     std::cerr << msg << std::endl;         \
67     std::terminate();                      \
68   } while (0)
69 #endif
70 #endif
71 
72 namespace tsl {
73 namespace hh {
74 
75 /**
76  * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a
77  * power of two. It allows the table to use a mask operation instead of a modulo
78  * operation to map a hash to a bucket.
79  *
80  * GrowthFactor must be a power of two >= 2.
81  */
82 template <std::size_t GrowthFactor>
83 class power_of_two_growth_policy {
84  public:
85   /**
86    * Called on the hash table creation and on rehash. The number of buckets for
87    * the table is passed in parameter. This number is a minimum, the policy may
88    * update this value with a higher value if needed (but not lower).
89    *
90    * If 0 is given, min_bucket_count_in_out must still be 0 after the policy
91    * creation and bucket_for_hash must always return 0 in this case.
92    */
power_of_two_growth_policy(std::size_t & min_bucket_count_in_out)93   explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
94     if (min_bucket_count_in_out > max_bucket_count()) {
95       TSL_HH_THROW_OR_TERMINATE(std::length_error,
96                                 "The hash table exceeds its maximum size.");
97     }
98 
99     if (min_bucket_count_in_out > 0) {
100       min_bucket_count_in_out =
101           round_up_to_power_of_two(min_bucket_count_in_out);
102       m_mask = min_bucket_count_in_out - 1;
103     } else {
104       m_mask = 0;
105     }
106   }
107 
108   /**
109    * Return the bucket [0, bucket_count()) to which the hash belongs.
110    * If bucket_count() is 0, it must always return 0.
111    */
bucket_for_hash(std::size_t hash)112   std::size_t bucket_for_hash(std::size_t hash) const noexcept {
113     return hash & m_mask;
114   }
115 
116   /**
117    * Return the bucket count to use when the bucket array grows on rehash.
118    */
next_bucket_count()119   std::size_t next_bucket_count() const {
120     if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
121       TSL_HH_THROW_OR_TERMINATE(std::length_error,
122                                 "The hash table exceeds its maximum size.");
123     }
124 
125     return (m_mask + 1) * GrowthFactor;
126   }
127 
128   /**
129    * Return the maximum number of buckets supported by the policy.
130    */
max_bucket_count()131   std::size_t max_bucket_count() const {
132     // Largest power of two.
133     return (std::numeric_limits<std::size_t>::max() / 2) + 1;
134   }
135 
136   /**
137    * Reset the growth policy as if it was created with a bucket count of 0.
138    * After a clear, the policy must always return 0 when bucket_for_hash is
139    * called.
140    */
clear()141   void clear() noexcept { m_mask = 0; }
142 
143  private:
round_up_to_power_of_two(std::size_t value)144   static std::size_t round_up_to_power_of_two(std::size_t value) {
145     if (is_power_of_two(value)) {
146       return value;
147     }
148 
149     if (value == 0) {
150       return 1;
151     }
152 
153     --value;
154     for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
155       value |= value >> i;
156     }
157 
158     return value + 1;
159   }
160 
is_power_of_two(std::size_t value)161   static constexpr bool is_power_of_two(std::size_t value) {
162     return value != 0 && (value & (value - 1)) == 0;
163   }
164 
165  private:
166   static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
167                 "GrowthFactor must be a power of two >= 2.");
168 
169   std::size_t m_mask;
170 };
171 
172 /**
173  * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo
174  * to map a hash to a bucket. Slower but it can be useful if you want a slower
175  * growth.
176  */
177 template <class GrowthFactor = std::ratio<3, 2>>
178 class mod_growth_policy {
179  public:
mod_growth_policy(std::size_t & min_bucket_count_in_out)180   explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
181     if (min_bucket_count_in_out > max_bucket_count()) {
182       TSL_HH_THROW_OR_TERMINATE(std::length_error,
183                                 "The hash table exceeds its maximum size.");
184     }
185 
186     if (min_bucket_count_in_out > 0) {
187       m_mod = min_bucket_count_in_out;
188     } else {
189       m_mod = 1;
190     }
191   }
192 
bucket_for_hash(std::size_t hash)193   std::size_t bucket_for_hash(std::size_t hash) const noexcept {
194     return hash % m_mod;
195   }
196 
next_bucket_count()197   std::size_t next_bucket_count() const {
198     if (m_mod == max_bucket_count()) {
199       TSL_HH_THROW_OR_TERMINATE(std::length_error,
200                                 "The hash table exceeds its maximum size.");
201     }
202 
203     const double next_bucket_count =
204         std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
205     if (!std::isnormal(next_bucket_count)) {
206       TSL_HH_THROW_OR_TERMINATE(std::length_error,
207                                 "The hash table exceeds its maximum size.");
208     }
209 
210     if (next_bucket_count > double(max_bucket_count())) {
211       return max_bucket_count();
212     } else {
213       return std::size_t(next_bucket_count);
214     }
215   }
216 
max_bucket_count()217   std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
218 
clear()219   void clear() noexcept { m_mod = 1; }
220 
221  private:
222   static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
223       1.0 * GrowthFactor::num / GrowthFactor::den;
224   static const std::size_t MAX_BUCKET_COUNT =
225       std::size_t(double(std::numeric_limits<std::size_t>::max() /
226                          REHASH_SIZE_MULTIPLICATION_FACTOR));
227 
228   static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1,
229                 "Growth factor should be >= 1.1.");
230 
231   std::size_t m_mod;
232 };
233 
234 namespace detail {
235 
236 #if SIZE_MAX >= ULLONG_MAX
237 #define TSL_HH_NB_PRIMES 51
238 #elif SIZE_MAX >= ULONG_MAX
239 #define TSL_HH_NB_PRIMES 40
240 #else
241 #define TSL_HH_NB_PRIMES 23
242 #endif
243 
244 static constexpr const std::array<std::size_t, TSL_HH_NB_PRIMES> PRIMES = {{
245     1u,
246     5u,
247     17u,
248     29u,
249     37u,
250     53u,
251     67u,
252     79u,
253     97u,
254     131u,
255     193u,
256     257u,
257     389u,
258     521u,
259     769u,
260     1031u,
261     1543u,
262     2053u,
263     3079u,
264     6151u,
265     12289u,
266     24593u,
267     49157u,
268 #if SIZE_MAX >= ULONG_MAX
269     98317ul,
270     196613ul,
271     393241ul,
272     786433ul,
273     1572869ul,
274     3145739ul,
275     6291469ul,
276     12582917ul,
277     25165843ul,
278     50331653ul,
279     100663319ul,
280     201326611ul,
281     402653189ul,
282     805306457ul,
283     1610612741ul,
284     3221225473ul,
285     4294967291ul,
286 #endif
287 #if SIZE_MAX >= ULLONG_MAX
288     6442450939ull,
289     12884901893ull,
290     25769803751ull,
291     51539607551ull,
292     103079215111ull,
293     206158430209ull,
294     412316860441ull,
295     824633720831ull,
296     1649267441651ull,
297     3298534883309ull,
298     6597069766657ull,
299 #endif
300 }};
301 
302 template <unsigned int IPrime>
mod(std::size_t hash)303 static constexpr std::size_t mod(std::size_t hash) {
304   return hash % PRIMES[IPrime];
305 }
306 
307 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for
308 // faster modulo as the compiler can optimize the modulo code better with a
309 // constant known at the compilation.
310 static constexpr const std::array<std::size_t (*)(std::size_t),
311                                   TSL_HH_NB_PRIMES>
312     MOD_PRIME = {{
313         &mod<0>,  &mod<1>,  &mod<2>,  &mod<3>,  &mod<4>,  &mod<5>,
314         &mod<6>,  &mod<7>,  &mod<8>,  &mod<9>,  &mod<10>, &mod<11>,
315         &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>,
316         &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>,
317 #if SIZE_MAX >= ULONG_MAX
318         &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>,
319         &mod<29>, &mod<30>, &mod<31>, &mod<32>, &mod<33>, &mod<34>,
320         &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>,
321 #endif
322 #if SIZE_MAX >= ULLONG_MAX
323         &mod<40>, &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>,
324         &mod<46>, &mod<47>, &mod<48>, &mod<49>, &mod<50>,
325 #endif
326     }};
327 
328 }  // namespace detail
329 
330 /**
331  * Grow the hash table by using prime numbers as bucket count. Slower than
332  * tsl::hh::power_of_two_growth_policy in general but will probably distribute
333  * the values around better in the buckets with a poor hash function.
334  *
335  * To allow the compiler to optimize the modulo operation, a lookup table is
336  * used with constant primes numbers.
337  *
338  * With a switch the code would look like:
339  * \code
340  * switch(iprime) { // iprime is the current prime of the hash table
341  *     case 0: hash % 5ul;
342  *             break;
343  *     case 1: hash % 17ul;
344  *             break;
345  *     case 2: hash % 29ul;
346  *             break;
347  *     ...
348  * }
349  * \endcode
350  *
351  * Due to the constant variable in the modulo the compiler is able to optimize
352  * the operation by a series of multiplications, substractions and shifts.
353  *
354  * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34)
355  * * 5' in a 64 bits environment.
356  */
357 class prime_growth_policy {
358  public:
prime_growth_policy(std::size_t & min_bucket_count_in_out)359   explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
360     auto it_prime = std::lower_bound(
361         detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
362     if (it_prime == detail::PRIMES.end()) {
363       TSL_HH_THROW_OR_TERMINATE(std::length_error,
364                                 "The hash table exceeds its maximum size.");
365     }
366 
367     m_iprime = static_cast<unsigned int>(
368         std::distance(detail::PRIMES.begin(), it_prime));
369     if (min_bucket_count_in_out > 0) {
370       min_bucket_count_in_out = *it_prime;
371     } else {
372       min_bucket_count_in_out = 0;
373     }
374   }
375 
bucket_for_hash(std::size_t hash)376   std::size_t bucket_for_hash(std::size_t hash) const noexcept {
377     return detail::MOD_PRIME[m_iprime](hash);
378   }
379 
next_bucket_count()380   std::size_t next_bucket_count() const {
381     if (m_iprime + 1 >= detail::PRIMES.size()) {
382       TSL_HH_THROW_OR_TERMINATE(std::length_error,
383                                 "The hash table exceeds its maximum size.");
384     }
385 
386     return detail::PRIMES[m_iprime + 1];
387   }
388 
max_bucket_count()389   std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
390 
clear()391   void clear() noexcept { m_iprime = 0; }
392 
393  private:
394   unsigned int m_iprime;
395 
396   static_assert(std::numeric_limits<decltype(m_iprime)>::max() >=
397                     detail::PRIMES.size(),
398                 "The type of m_iprime is not big enough.");
399 };
400 
401 }  // namespace hh
402 }  // namespace tsl
403 
404 #endif
405