1 #pragma once
2
3 #include <cstdint> /* uint32_t */
4 #include <cstring>
5 #include <vector>
6
7 #include "ext/xxhash.h"
8 #include "Common/CommonFuncs.h"
9 #include "Common/Log.h"
10
11 // TODO: Try hardware CRC. Unfortunately not available on older Intels or ARM32.
12 // Seems to be ubiquitous on ARM64 though.
13 template<class K>
HashKey(const K & k)14 inline uint32_t HashKey(const K &k) {
15 return XXH3_64bits(&k, sizeof(k)) & 0xFFFFFFFF;
16 }
17 template<class K>
KeyEquals(const K & a,const K & b)18 inline bool KeyEquals(const K &a, const K &b) {
19 return !memcmp(&a, &b, sizeof(K));
20 }
21
22 enum class BucketState : uint8_t {
23 FREE,
24 TAKEN,
25 REMOVED, // for linear probing to work (and removal during deletion) we need tombstones
26 };
27
28 // Uses linear probing for cache-friendliness. Not segregating values from keys because
29 // we always use very small values, so it's probably better to have them in the same
30 // cache-line as the corresponding key.
31 // Enforces that value are pointers to make sure that combined storage makes sense.
32 template <class Key, class Value, Value NullValue>
33 class DenseHashMap {
34 public:
DenseHashMap(int initialCapacity)35 DenseHashMap(int initialCapacity) : capacity_(initialCapacity) {
36 map.resize(initialCapacity);
37 state.resize(initialCapacity);
38 }
39
40 // Returns nullptr if no entry was found.
Get(const Key & key)41 Value Get(const Key &key) {
42 uint32_t mask = capacity_ - 1;
43 uint32_t pos = HashKey(key) & mask;
44 // No? Let's go into search mode. Linear probing.
45 uint32_t p = pos;
46 while (true) {
47 if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key))
48 return map[p].value;
49 else if (state[p] == BucketState::FREE)
50 return NullValue;
51 p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
52 if (p == pos) {
53 _assert_msg_(false, "DenseHashMap: Hit full on Get()");
54 }
55 }
56 return NullValue;
57 }
58
59 // Returns false if we already had the key! Which is a bit different.
Insert(const Key & key,Value value)60 bool Insert(const Key &key, Value value) {
61 // Check load factor, resize if necessary. We never shrink.
62 if (count_ > capacity_ / 2) {
63 Grow(2);
64 }
65 uint32_t mask = capacity_ - 1;
66 uint32_t pos = HashKey(key) & mask;
67 uint32_t p = pos;
68 while (true) {
69 if (state[p] == BucketState::TAKEN) {
70 if (KeyEquals(key, map[p].key)) {
71 // Bad! We already got this one. Let's avoid this case.
72 _assert_msg_(false, "DenseHashMap: Duplicate key inserted");
73 return false;
74 }
75 // continue looking....
76 } else {
77 // Got a place, either removed or FREE.
78 break;
79 }
80 p = (p + 1) & mask;
81 if (p == pos) {
82 // FULL! Error. Should not happen thanks to Grow().
83 _assert_msg_(false, "DenseHashMap: Hit full on Insert()");
84 }
85 }
86 if (state[p] == BucketState::REMOVED) {
87 removedCount_--;
88 }
89 state[p] = BucketState::TAKEN;
90 map[p].key = key;
91 map[p].value = value;
92 count_++;
93 return true;
94 }
95
Remove(const Key & key)96 bool Remove(const Key &key) {
97 uint32_t mask = capacity_ - 1;
98 uint32_t pos = HashKey(key) & mask;
99 uint32_t p = pos;
100 while (state[p] != BucketState::FREE) {
101 if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
102 // Got it! Mark it as removed.
103 state[p] = BucketState::REMOVED;
104 removedCount_++;
105 count_--;
106 return true;
107 }
108 p = (p + 1) & mask;
109 if (p == pos) {
110 // FULL! Error. Should not happen.
111 _assert_msg_(false, "DenseHashMap: Hit full on Remove()");
112 }
113 }
114 return false;
115 }
116
size()117 size_t size() const {
118 return count_;
119 }
120
121 template<class T>
Iterate(T func)122 inline void Iterate(T func) const {
123 for (size_t i = 0; i < map.size(); i++) {
124 if (state[i] == BucketState::TAKEN) {
125 func(map[i].key, map[i].value);
126 }
127 }
128 }
129
Clear()130 void Clear() {
131 memset(state.data(), (int)BucketState::FREE, state.size());
132 count_ = 0;
133 removedCount_ = 0;
134 }
135
Rebuild()136 void Rebuild() {
137 Grow(1);
138 }
139
Maintain()140 void Maintain() {
141 // Heuristic
142 if (removedCount_ >= capacity_ / 4) {
143 Rebuild();
144 }
145 }
146
147 private:
Grow(int factor)148 void Grow(int factor) {
149 // We simply move out the existing data, then we re-insert the old.
150 // This is extremely non-atomic and will need synchronization.
151 std::vector<Pair> old = std::move(map);
152 std::vector<BucketState> oldState = std::move(state);
153 // Can't assume move will clear, it just may clear.
154 map.clear();
155 state.clear();
156
157 int oldCount = count_;
158 capacity_ *= factor;
159 map.resize(capacity_);
160 state.resize(capacity_);
161 count_ = 0; // Insert will update it.
162 removedCount_ = 0;
163 for (size_t i = 0; i < old.size(); i++) {
164 if (oldState[i] == BucketState::TAKEN) {
165 Insert(old[i].key, old[i].value);
166 }
167 }
168 _assert_msg_(oldCount == count_, "DenseHashMap: count should not change in Grow()");
169 }
170 struct Pair {
171 Key key;
172 Value value;
173 };
174 std::vector<Pair> map;
175 std::vector<BucketState> state;
176 int capacity_;
177 int count_ = 0;
178 int removedCount_ = 0;
179 };
180
181 // Like the above, uses linear probing for cache-friendliness.
182 // Does not perform hashing at all so expects well-distributed keys.
183 template <class Value, Value NullValue>
184 class PrehashMap {
185 public:
PrehashMap(int initialCapacity)186 PrehashMap(int initialCapacity) : capacity_(initialCapacity) {
187 map.resize(initialCapacity);
188 state.resize(initialCapacity);
189 }
190
191 // Returns nullptr if no entry was found.
Get(uint32_t hash)192 Value Get(uint32_t hash) {
193 uint32_t mask = capacity_ - 1;
194 uint32_t pos = hash & mask;
195 // No? Let's go into search mode. Linear probing.
196 uint32_t p = pos;
197 while (true) {
198 if (state[p] == BucketState::TAKEN && hash == map[p].hash)
199 return map[p].value;
200 else if (state[p] == BucketState::FREE)
201 return NullValue;
202 p = (p + 1) & mask; // If the state is REMOVED, we just keep on walking.
203 if (p == pos) {
204 _assert_msg_(false, "PrehashMap: Hit full on Get()");
205 }
206 }
207 return NullValue;
208 }
209
210 // Returns false if we already had the key! Which is a bit different.
Insert(uint32_t hash,Value value)211 bool Insert(uint32_t hash, Value value) {
212 // Check load factor, resize if necessary. We never shrink.
213 if (count_ > capacity_ / 2) {
214 Grow(2);
215 }
216 uint32_t mask = capacity_ - 1;
217 uint32_t pos = hash & mask;
218 uint32_t p = pos;
219 while (state[p] != BucketState::FREE) {
220 if (state[p] == BucketState::TAKEN) {
221 if (hash == map[p].hash)
222 return false; // Bad!
223 } else {
224 // Got a place, either removed or FREE.
225 break;
226 }
227 p = (p + 1) & mask;
228 if (p == pos) {
229 // FULL! Error. Should not happen thanks to Grow().
230 _assert_msg_(false, "PrehashMap: Hit full on Insert()");
231 }
232 }
233 if (state[p] == BucketState::REMOVED) {
234 removedCount_--;
235 }
236 state[p] = BucketState::TAKEN;
237 map[p].hash = hash;
238 map[p].value = value;
239 count_++;
240 return true;
241 }
242
Remove(uint32_t hash)243 bool Remove(uint32_t hash) {
244 uint32_t mask = capacity_ - 1;
245 uint32_t pos = hash & mask;
246 uint32_t p = pos;
247 while (state[p] != BucketState::FREE) {
248 if (state[p] == BucketState::TAKEN && hash == map[p].hash) {
249 // Got it!
250 state[p] = BucketState::REMOVED;
251 removedCount_++;
252 count_--;
253 return true;
254 }
255 p = (p + 1) & mask;
256 if (p == pos) {
257 _assert_msg_(false, "PrehashMap: Hit full on Remove()");
258 }
259 }
260 return false;
261 }
262
size()263 size_t size() {
264 return count_;
265 }
266
267 template<class T>
Iterate(T func)268 void Iterate(T func) const {
269 for (size_t i = 0; i < map.size(); i++) {
270 if (state[i] == BucketState::TAKEN) {
271 func(map[i].hash, map[i].value);
272 }
273 }
274 }
275
Clear()276 void Clear() {
277 memset(state.data(), (int)BucketState::FREE, state.size());
278 count_ = 0;
279 removedCount_ = 0;
280 }
281
282 // Gets rid of REMOVED tombstones, making lookups somewhat more efficient.
Rebuild()283 void Rebuild() {
284 Grow(1);
285 }
286
Maintain()287 void Maintain() {
288 // Heuristic
289 if (removedCount_ >= capacity_ / 4) {
290 Rebuild();
291 }
292 }
293
294 private:
Grow(int factor)295 void Grow(int factor) {
296 // We simply move out the existing data, then we re-insert the old.
297 // This is extremely non-atomic and will need synchronization.
298 std::vector<Pair> old = std::move(map);
299 std::vector<BucketState> oldState = std::move(state);
300 // Can't assume move will clear, it just may clear.
301 map.clear();
302 state.clear();
303
304 int oldCount = count_;
305 int oldCapacity = capacity_;
306 capacity_ *= factor;
307 map.resize(capacity_);
308 state.resize(capacity_);
309 count_ = 0; // Insert will update it.
310 removedCount_ = 0;
311 for (size_t i = 0; i < old.size(); i++) {
312 if (oldState[i] == BucketState::TAKEN) {
313 Insert(old[i].hash, old[i].value);
314 }
315 }
316 INFO_LOG(G3D, "Grew hashmap capacity from %d to %d", oldCapacity, capacity_);
317 _assert_msg_(oldCount == count_, "PrehashMap: count should not change in Grow()");
318 }
319 struct Pair {
320 uint32_t hash;
321 Value value;
322 };
323 std::vector<Pair> map;
324 std::vector<BucketState> state;
325 int capacity_;
326 int count_ = 0;
327 int removedCount_ = 0;
328 };
329