1 /**
2  * MIT License
3  *
4  * Copyright (c) 2017 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_ROBIN_GROWTH_POLICY_H
25 #define TSL_ROBIN_GROWTH_POLICY_H
26 
27 
28 #include <algorithm>
29 #include <array>
30 #include <climits>
31 #include <cmath>
32 #include <cstddef>
33 #include <cstdint>
34 #include <iterator>
35 #include <limits>
36 #include <ratio>
37 #include <stdexcept>
38 
39 
40 #ifdef TSL_DEBUG
41 #    define tsl_rh_assert(expr) assert(expr)
42 #else
43 #    define tsl_rh_assert(expr) (static_cast<void>(0))
44 #endif
45 
46 
47 /**
48  * If exceptions are enabled, throw the exception passed in parameter, otherwise call std::terminate.
49  */
50 #if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (defined (_MSC_VER) && defined (_CPPUNWIND))) && !defined(TSL_NO_EXCEPTIONS)
51 #    define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
52 #else
53 #    define TSL_RH_NO_EXCEPTIONS
54 #    ifdef NDEBUG
55 #        define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate()
56 #    else
57 #        include <iostream>
58 #        define TSL_RH_THROW_OR_TERMINATE(ex, msg) do { std::cerr << msg << std::endl; std::terminate(); } while(0)
59 #    endif
60 #endif
61 
62 
63 #if defined(__GNUC__) || defined(__clang__)
64 #    define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true))
65 #else
66 #    define TSL_RH_LIKELY(exp) (exp)
67 #endif
68 
69 
70 namespace tsl {
71 namespace rh {
72 
73 /**
74  * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows
75  * the table to use a mask operation instead of a modulo operation to map a hash to a bucket.
76  *
77  * GrowthFactor must be a power of two >= 2.
78  */
79 template<std::size_t GrowthFactor>
80 class power_of_two_growth_policy {
81 public:
82     /**
83      * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter.
84      * This number is a minimum, the policy may update this value with a higher value if needed (but not lower).
85      *
86      * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
87      * bucket_for_hash must always return 0 in this case.
88      */
power_of_two_growth_policy(std::size_t & min_bucket_count_in_out)89     explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
90         if(min_bucket_count_in_out > max_bucket_count()) {
91             TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
92         }
93 
94         if(min_bucket_count_in_out > 0) {
95             min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
96             m_mask = min_bucket_count_in_out - 1;
97         }
98         else {
99             m_mask = 0;
100         }
101     }
102 
103     /**
104      * Return the bucket [0, bucket_count()) to which the hash belongs.
105      * If bucket_count() is 0, it must always return 0.
106      */
bucket_for_hash(std::size_t hash)107     std::size_t bucket_for_hash(std::size_t hash) const noexcept {
108         return hash & m_mask;
109     }
110 
111     /**
112      * Return the number of buckets that should be used on next growth.
113      */
next_bucket_count()114     std::size_t next_bucket_count() const {
115         if((m_mask + 1) > max_bucket_count() / GrowthFactor) {
116             TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
117         }
118 
119         return (m_mask + 1) * GrowthFactor;
120     }
121 
122     /**
123      * Return the maximum number of buckets supported by the policy.
124      */
max_bucket_count()125     std::size_t max_bucket_count() const {
126         // Largest power of two.
127         return (std::numeric_limits<std::size_t>::max() / 2) + 1;
128     }
129 
130     /**
131      * Reset the growth policy as if it was created with a bucket count of 0.
132      * After a clear, the policy must always return 0 when bucket_for_hash is called.
133      */
clear()134     void clear() noexcept {
135         m_mask = 0;
136     }
137 
138 private:
round_up_to_power_of_two(std::size_t value)139     static std::size_t round_up_to_power_of_two(std::size_t value) {
140         if(is_power_of_two(value)) {
141             return value;
142         }
143 
144         if(value == 0) {
145             return 1;
146         }
147 
148         --value;
149         for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
150             value |= value >> i;
151         }
152 
153         return value + 1;
154     }
155 
is_power_of_two(std::size_t value)156     static constexpr bool is_power_of_two(std::size_t value) {
157         return value != 0 && (value & (value - 1)) == 0;
158     }
159 
160 protected:
161     static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2.");
162 
163     std::size_t m_mask;
164 };
165 
166 
167 /**
168  * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
169  * to a bucket. Slower but it can be useful if you want a slower growth.
170  */
171 template<class GrowthFactor = std::ratio<3, 2>>
172 class mod_growth_policy {
173 public:
mod_growth_policy(std::size_t & min_bucket_count_in_out)174     explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
175         if(min_bucket_count_in_out > max_bucket_count()) {
176             TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
177         }
178 
179         if(min_bucket_count_in_out > 0) {
180             m_mod = min_bucket_count_in_out;
181         }
182         else {
183             m_mod = 1;
184         }
185     }
186 
bucket_for_hash(std::size_t hash)187     std::size_t bucket_for_hash(std::size_t hash) const noexcept {
188         return hash % m_mod;
189     }
190 
next_bucket_count()191     std::size_t next_bucket_count() const {
192         if(m_mod == max_bucket_count()) {
193             TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
194         }
195 
196         const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
197         if(!std::isnormal(next_bucket_count)) {
198             TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
199         }
200 
201         if(next_bucket_count > double(max_bucket_count())) {
202             return max_bucket_count();
203         }
204         else {
205             return std::size_t(next_bucket_count);
206         }
207     }
208 
max_bucket_count()209     std::size_t max_bucket_count() const {
210         return MAX_BUCKET_COUNT;
211     }
212 
clear()213     void clear() noexcept {
214         m_mod = 1;
215     }
216 
217 private:
218     static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den;
219     static const std::size_t MAX_BUCKET_COUNT =
220             std::size_t(double(
221                     std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR
222             ));
223 
224     static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
225 
226     std::size_t m_mod;
227 };
228 
229 
230 
231 namespace detail {
232 
233 #if SIZE_MAX >= ULLONG_MAX
234 #define TSL_RH_NB_PRIMES 51
235 #elif SIZE_MAX >= ULONG_MAX
236 #define TSL_RH_NB_PRIMES 40
237 #else
238 #define TSL_RH_NB_PRIMES 23
239 #endif
240 
241 static constexpr const std::array<std::size_t, TSL_RH_NB_PRIMES> PRIMES = {{
242     1u, 5u, 17u, 29u, 37u, 53u, 67u, 79u, 97u, 131u, 193u, 257u, 389u, 521u, 769u, 1031u,
243     1543u, 2053u, 3079u, 6151u, 12289u, 24593u, 49157u,
244 #if SIZE_MAX >= ULONG_MAX
245     98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul,
246     25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
247     3221225473ul, 4294967291ul,
248 #endif
249 #if SIZE_MAX >= ULLONG_MAX
250     6442450939ull, 12884901893ull, 25769803751ull, 51539607551ull, 103079215111ull, 206158430209ull,
251     412316860441ull, 824633720831ull, 1649267441651ull, 3298534883309ull, 6597069766657ull,
252 #endif
253 }};
254 
255 template<unsigned int IPrime>
mod(std::size_t hash)256 static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
257 
258 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
259 // compiler can optimize the modulo code better with a constant known at the compilation.
260 static constexpr const std::array<std::size_t(*)(std::size_t), TSL_RH_NB_PRIMES> MOD_PRIME = {{
261     &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>,
262     &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>,
263     &mod<21>, &mod<22>,
264 #if SIZE_MAX >= ULONG_MAX
265     &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, &mod<31>, &mod<32>,
266     &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39>,
267 #endif
268 #if SIZE_MAX >= ULLONG_MAX
269     &mod<40>, &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>, &mod<46>, &mod<47>, &mod<48>, &mod<49>,
270     &mod<50>,
271 #endif
272 }};
273 
274 }
275 
276 /**
277  * Grow the hash table by using prime numbers as bucket count. Slower than tsl::rh::power_of_two_growth_policy in
278  * general but will probably distribute the values around better in the buckets with a poor hash function.
279  *
280  * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers.
281  *
282  * With a switch the code would look like:
283  * \code
284  * switch(iprime) { // iprime is the current prime of the hash table
285  *     case 0: hash % 5ul;
286  *             break;
287  *     case 1: hash % 17ul;
288  *             break;
289  *     case 2: hash % 29ul;
290  *             break;
291  *     ...
292  * }
293  * \endcode
294  *
295  * Due to the constant variable in the modulo the compiler is able to optimize the operation
296  * by a series of multiplications, substractions and shifts.
297  *
298  * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environment.
299  */
300 class prime_growth_policy {
301 public:
prime_growth_policy(std::size_t & min_bucket_count_in_out)302     explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
303         auto it_prime = std::lower_bound(detail::PRIMES.begin(),
304                                          detail::PRIMES.end(), min_bucket_count_in_out);
305         if(it_prime == detail::PRIMES.end()) {
306             TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
307         }
308 
309         m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
310         if(min_bucket_count_in_out > 0) {
311             min_bucket_count_in_out = *it_prime;
312         }
313         else {
314             min_bucket_count_in_out = 0;
315         }
316     }
317 
bucket_for_hash(std::size_t hash)318     std::size_t bucket_for_hash(std::size_t hash) const noexcept {
319         return detail::MOD_PRIME[m_iprime](hash);
320     }
321 
next_bucket_count()322     std::size_t next_bucket_count() const {
323         if(m_iprime + 1 >= detail::PRIMES.size()) {
324             TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
325         }
326 
327         return detail::PRIMES[m_iprime + 1];
328     }
329 
max_bucket_count()330     std::size_t max_bucket_count() const {
331         return detail::PRIMES.back();
332     }
333 
clear()334     void clear() noexcept {
335         m_iprime = 0;
336     }
337 
338 private:
339     unsigned int m_iprime;
340 
341     static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
342                   "The type of m_iprime is not big enough.");
343 };
344 
345 }
346 }
347 
348 #endif
349