1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  */
16 
17 #pragma once
18 
19 /** \file
20  * \ingroup bli
21  *
22  * A specialization of `blender::DefaultHash<T>` provides a hash function for values of type T.
23  * This hash function is used by default in hash table implementations in blenlib.
24  *
25  * The actual hash function is in the `operator()` method of `DefaultHash<T>`. The following code
26  * computes the hash of some value using DefaultHash.
27  *
28  *   T value = ...;
29  *   DefaultHash<T> hash_function;
30  *   uint32_t hash = hash_function(value);
31  *
32  * Hash table implementations like blender::Set support heterogeneous key lookups. That means that
33  * one can do a lookup with a key of type A in a hash table that stores keys of type B. This is
34  * commonly done when B is std::string, because the conversion from e.g. a #StringRef to
35  * std::string can be costly and is unnecessary. To make this work, values of type A and B that
36  * compare equal have to have the same hash value. This is achieved by defining potentially
37  * multiple `operator()` in a specialization of #DefaultHash. All those methods have to compute the
38  * same hash for values that compare equal.
39  *
40  * The computed hash is an unsigned 64 bit integer. Ideally, the hash function would generate
41  * uniformly random hash values for a set of keys. However, in many cases trivial hash functions
42  * are faster and produce a good enough distribution. In general it is better when more information
43  * is in the lower bits of the hash. By choosing a good probing strategy, the effects of a bad hash
44  * function are less noticeable though. In this context a good probing strategy is one that takes
45  * all bits of the hash into account eventually. One has to check on a case by case basis to see if
46  * a better but more expensive or trivial hash function works better.
47  *
48  * There are three main ways to provide a hash table implementation with a custom hash function.
49  *
50  * - When you want to provide a default hash function for your own custom type: Add a `hash`
51  *   member function to it. The function should return `uint64_t` and take no arguments. This
52  *   method will be called by the default implementation of #DefaultHash. It will automatically be
53  *   used by hash table implementations.
54  *
55  * - When you want to provide a default hash function for a type that you cannot modify: Add a new
56  *   specialization to the #DefaultHash struct. This can be done by writing code like below in
57  *   either global or BLI namespace.
58  *
59  *     template<> struct blender::DefaultHash<TheType> {
60  *       uint64_t operator()(const TheType &value) const {
61  *         return ...;
62  *       }
63  *     };
64  *
65  * - When you want to provide a different hash function for a type that already has a default hash
66  *   function: Implement a struct like the one below and pass it as template parameter to the hash
67  *   table explicitly.
68  *
69  *     struct MyCustomHash {
70  *       uint64_t operator()(const TheType &value) const {
71  *         return ...;
72  *       }
73  *     };
74  */
75 
76 #include <functional>
77 #include <memory>
78 #include <string>
79 #include <utility>
80 
81 #include "BLI_math_base.h"
82 #include "BLI_string_ref.hh"
83 #include "BLI_utildefines.h"
84 
85 namespace blender {
86 
87 /**
88  * If there is no other specialization of #DefaultHash for a given type, try to call `hash()` on
89  * the value. If there is no such method, this will result in a compiler error. Usually that means
90  * that you have to implement a hash function using one of three strategies listed above.
91  */
92 template<typename T> struct DefaultHash {
operator ()blender::DefaultHash93   uint64_t operator()(const T &value) const
94   {
95     return value.hash();
96   }
97 };
98 
99 /**
100  * Use the same hash function for const and non const variants of a type.
101  */
102 template<typename T> struct DefaultHash<const T> {
operator ()blender::DefaultHash103   uint64_t operator()(const T &value) const
104   {
105     return DefaultHash<T>{}(value);
106   }
107 };
108 
109 #define TRIVIAL_DEFAULT_INT_HASH(TYPE) \
110   template<> struct DefaultHash<TYPE> { \
111     uint64_t operator()(TYPE value) const \
112     { \
113       return static_cast<uint64_t>(value); \
114     } \
115   }
116 
117 /**
118  * We cannot make any assumptions about the distribution of keys, so use a trivial hash function by
119  * default. The default probing strategy is designed to take all bits of the hash into account
120  * to avoid worst case behavior when the lower bits are all zero. Special hash functions can be
121  * implemented when more knowledge about a specific key distribution is available.
122  */
123 TRIVIAL_DEFAULT_INT_HASH(int8_t);
124 TRIVIAL_DEFAULT_INT_HASH(uint8_t);
125 TRIVIAL_DEFAULT_INT_HASH(int16_t);
126 TRIVIAL_DEFAULT_INT_HASH(uint16_t);
127 TRIVIAL_DEFAULT_INT_HASH(int32_t);
128 TRIVIAL_DEFAULT_INT_HASH(uint32_t);
129 TRIVIAL_DEFAULT_INT_HASH(int64_t);
130 TRIVIAL_DEFAULT_INT_HASH(uint64_t);
131 
132 /**
133  * One should try to avoid using floats as keys in hash tables, but sometimes it is convenient.
134  */
135 template<> struct DefaultHash<float> {
operator ()blender::DefaultHash136   uint64_t operator()(float value) const
137   {
138     return *reinterpret_cast<uint32_t *>(&value);
139   }
140 };
141 
142 template<> struct DefaultHash<bool> {
operator ()blender::DefaultHash143   uint64_t operator()(bool value) const
144   {
145     return static_cast<uint64_t>((value != false) * 1298191);
146   }
147 };
148 
hash_string(StringRef str)149 inline uint64_t hash_string(StringRef str)
150 {
151   uint64_t hash = 5381;
152   for (char c : str) {
153     hash = hash * 33 + c;
154   }
155   return hash;
156 }
157 
158 template<> struct DefaultHash<std::string> {
159   /**
160    * Take a #StringRef as parameter to support heterogeneous lookups in hash table implementations
161    * when std::string is used as key.
162    */
operator ()blender::DefaultHash163   uint64_t operator()(StringRef value) const
164   {
165     return hash_string(value);
166   }
167 };
168 
169 template<> struct DefaultHash<StringRef> {
operator ()blender::DefaultHash170   uint64_t operator()(StringRef value) const
171   {
172     return hash_string(value);
173   }
174 };
175 
176 template<> struct DefaultHash<StringRefNull> {
operator ()blender::DefaultHash177   uint64_t operator()(StringRef value) const
178   {
179     return hash_string(value);
180   }
181 };
182 
183 template<> struct DefaultHash<std::string_view> {
operator ()blender::DefaultHash184   uint64_t operator()(StringRef value) const
185   {
186     return hash_string(value);
187   }
188 };
189 
190 /**
191  * While we cannot guarantee that the lower 4 bits of a pointer are zero, it is often the case.
192  */
193 template<typename T> struct DefaultHash<T *> {
operator ()blender::DefaultHash194   uint64_t operator()(const T *value) const
195   {
196     uintptr_t ptr = reinterpret_cast<uintptr_t>(value);
197     uint64_t hash = static_cast<uint64_t>(ptr >> 4);
198     return hash;
199   }
200 };
201 
202 template<typename T> struct DefaultHash<std::unique_ptr<T>> {
operator ()blender::DefaultHash203   uint64_t operator()(const std::unique_ptr<T> &value) const
204   {
205     return DefaultHash<T *>{}(value.get());
206   }
207 };
208 
209 template<typename T1, typename T2> struct DefaultHash<std::pair<T1, T2>> {
operator ()blender::DefaultHash210   uint64_t operator()(const std::pair<T1, T2> &value) const
211   {
212     uint64_t hash1 = DefaultHash<T1>{}(value.first);
213     uint64_t hash2 = DefaultHash<T2>{}(value.second);
214     return hash1 ^ (hash2 * 33);
215   }
216 };
217 
218 }  // namespace blender
219