1 #pragma once
2 
3 #include <cstdint> /* uint32_t */
4 #include <cstring>
5 #include <vector>
6 
7 #include "ext/xxhash.h"
8 #include "Common/CommonFuncs.h"
9 #include "Common/Log.h"
10 
11 // TODO: Try hardware CRC. Unfortunately not available on older Intels or ARM32.
12 // Seems to be ubiquitous on ARM64 though.
13 template<class K>
HashKey(const K & k)14 inline uint32_t HashKey(const K &k) {
15 	return XXH3_64bits(&k, sizeof(k)) & 0xFFFFFFFF;
16 }
17 template<class K>
KeyEquals(const K & a,const K & b)18 inline bool KeyEquals(const K &a, const K &b) {
19 	return !memcmp(&a, &b, sizeof(K));
20 }
21 
22 enum class BucketState : uint8_t {
23 	FREE,
24 	TAKEN,
25 	REMOVED,  // for linear probing to work (and removal during deletion) we need tombstones
26 };
27 
28 // Uses linear probing for cache-friendliness. Not segregating values from keys because
29 // we always use very small values, so it's probably better to have them in the same
30 // cache-line as the corresponding key.
31 // Enforces that value are pointers to make sure that combined storage makes sense.
32 template <class Key, class Value, Value NullValue>
33 class DenseHashMap {
34 public:
DenseHashMap(int initialCapacity)35 	DenseHashMap(int initialCapacity) : capacity_(initialCapacity) {
36 		map.resize(initialCapacity);
37 		state.resize(initialCapacity);
38 	}
39 
40 	// Returns nullptr if no entry was found.
Get(const Key & key)41 	Value Get(const Key &key) {
42 		uint32_t mask = capacity_ - 1;
43 		uint32_t pos = HashKey(key) & mask;
44 		// No? Let's go into search mode. Linear probing.
45 		uint32_t p = pos;
46 		while (true) {
47 			if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key))
48 				return map[p].value;
49 			else if (state[p] == BucketState::FREE)
50 				return NullValue;
51 			p = (p + 1) & mask;  // If the state is REMOVED, we just keep on walking.
52 			if (p == pos) {
53 				_assert_msg_(false, "DenseHashMap: Hit full on Get()");
54 			}
55 		}
56 		return NullValue;
57 	}
58 
59 	// Returns false if we already had the key! Which is a bit different.
Insert(const Key & key,Value value)60 	bool Insert(const Key &key, Value value) {
61 		// Check load factor, resize if necessary. We never shrink.
62 		if (count_ > capacity_ / 2) {
63 			Grow(2);
64 		}
65 		uint32_t mask = capacity_ - 1;
66 		uint32_t pos = HashKey(key) & mask;
67 		uint32_t p = pos;
68 		while (true) {
69 			if (state[p] == BucketState::TAKEN) {
70 				if (KeyEquals(key, map[p].key)) {
71 					// Bad! We already got this one. Let's avoid this case.
72 					_assert_msg_(false, "DenseHashMap: Duplicate key inserted");
73 					return false;
74 				}
75 				// continue looking....
76 			} else {
77 				// Got a place, either removed or FREE.
78 				break;
79 			}
80 			p = (p + 1) & mask;
81 			if (p == pos) {
82 				// FULL! Error. Should not happen thanks to Grow().
83 				_assert_msg_(false, "DenseHashMap: Hit full on Insert()");
84 			}
85 		}
86 		if (state[p] == BucketState::REMOVED) {
87 			removedCount_--;
88 		}
89 		state[p] = BucketState::TAKEN;
90 		map[p].key = key;
91 		map[p].value = value;
92 		count_++;
93 		return true;
94 	}
95 
Remove(const Key & key)96 	bool Remove(const Key &key) {
97 		uint32_t mask = capacity_ - 1;
98 		uint32_t pos = HashKey(key) & mask;
99 		uint32_t p = pos;
100 		while (state[p] != BucketState::FREE) {
101 			if (state[p] == BucketState::TAKEN && KeyEquals(key, map[p].key)) {
102 				// Got it! Mark it as removed.
103 				state[p] = BucketState::REMOVED;
104 				removedCount_++;
105 				count_--;
106 				return true;
107 			}
108 			p = (p + 1) & mask;
109 			if (p == pos) {
110 				// FULL! Error. Should not happen.
111 				_assert_msg_(false, "DenseHashMap: Hit full on Remove()");
112 			}
113 		}
114 		return false;
115 	}
116 
size()117 	size_t size() const {
118 		return count_;
119 	}
120 
121 	template<class T>
Iterate(T func)122 	inline void Iterate(T func) const {
123 		for (size_t i = 0; i < map.size(); i++) {
124 			if (state[i] == BucketState::TAKEN) {
125 				func(map[i].key, map[i].value);
126 			}
127 		}
128 	}
129 
Clear()130 	void Clear() {
131 		memset(state.data(), (int)BucketState::FREE, state.size());
132 		count_ = 0;
133 		removedCount_ = 0;
134 	}
135 
Rebuild()136 	void Rebuild() {
137 		Grow(1);
138 	}
139 
Maintain()140 	void Maintain() {
141 		// Heuristic
142 		if (removedCount_ >= capacity_ / 4) {
143 			Rebuild();
144 		}
145 	}
146 
147 private:
Grow(int factor)148 	void Grow(int factor) {
149 		// We simply move out the existing data, then we re-insert the old.
150 		// This is extremely non-atomic and will need synchronization.
151 		std::vector<Pair> old = std::move(map);
152 		std::vector<BucketState> oldState = std::move(state);
153 		// Can't assume move will clear, it just may clear.
154 		map.clear();
155 		state.clear();
156 
157 		int oldCount = count_;
158 		capacity_ *= factor;
159 		map.resize(capacity_);
160 		state.resize(capacity_);
161 		count_ = 0;  // Insert will update it.
162 		removedCount_ = 0;
163 		for (size_t i = 0; i < old.size(); i++) {
164 			if (oldState[i] == BucketState::TAKEN) {
165 				Insert(old[i].key, old[i].value);
166 			}
167 		}
168 		_assert_msg_(oldCount == count_, "DenseHashMap: count should not change in Grow()");
169 	}
170 	struct Pair {
171 		Key key;
172 		Value value;
173 	};
174 	std::vector<Pair> map;
175 	std::vector<BucketState> state;
176 	int capacity_;
177 	int count_ = 0;
178 	int removedCount_ = 0;
179 };
180 
181 // Like the above, uses linear probing for cache-friendliness.
182 // Does not perform hashing at all so expects well-distributed keys.
183 template <class Value, Value NullValue>
184 class PrehashMap {
185 public:
PrehashMap(int initialCapacity)186 	PrehashMap(int initialCapacity) : capacity_(initialCapacity) {
187 		map.resize(initialCapacity);
188 		state.resize(initialCapacity);
189 	}
190 
191 	// Returns nullptr if no entry was found.
Get(uint32_t hash)192 	Value Get(uint32_t hash) {
193 		uint32_t mask = capacity_ - 1;
194 		uint32_t pos = hash & mask;
195 		// No? Let's go into search mode. Linear probing.
196 		uint32_t p = pos;
197 		while (true) {
198 			if (state[p] == BucketState::TAKEN && hash == map[p].hash)
199 				return map[p].value;
200 			else if (state[p] == BucketState::FREE)
201 				return NullValue;
202 			p = (p + 1) & mask;  // If the state is REMOVED, we just keep on walking.
203 			if (p == pos) {
204 				_assert_msg_(false, "PrehashMap: Hit full on Get()");
205 			}
206 		}
207 		return NullValue;
208 	}
209 
210 	// Returns false if we already had the key! Which is a bit different.
Insert(uint32_t hash,Value value)211 	bool Insert(uint32_t hash, Value value) {
212 		// Check load factor, resize if necessary. We never shrink.
213 		if (count_ > capacity_ / 2) {
214 			Grow(2);
215 		}
216 		uint32_t mask = capacity_ - 1;
217 		uint32_t pos = hash & mask;
218 		uint32_t p = pos;
219 		while (state[p] != BucketState::FREE) {
220 			if (state[p] == BucketState::TAKEN) {
221 				if (hash == map[p].hash)
222 					return false;  // Bad!
223 			} else {
224 				// Got a place, either removed or FREE.
225 				break;
226 			}
227 			p = (p + 1) & mask;
228 			if (p == pos) {
229 				// FULL! Error. Should not happen thanks to Grow().
230 				_assert_msg_(false, "PrehashMap: Hit full on Insert()");
231 			}
232 		}
233 		if (state[p] == BucketState::REMOVED) {
234 			removedCount_--;
235 		}
236 		state[p] = BucketState::TAKEN;
237 		map[p].hash = hash;
238 		map[p].value = value;
239 		count_++;
240 		return true;
241 	}
242 
Remove(uint32_t hash)243 	bool Remove(uint32_t hash) {
244 		uint32_t mask = capacity_ - 1;
245 		uint32_t pos = hash & mask;
246 		uint32_t p = pos;
247 		while (state[p] != BucketState::FREE) {
248 			if (state[p] == BucketState::TAKEN && hash == map[p].hash) {
249 				// Got it!
250 				state[p] = BucketState::REMOVED;
251 				removedCount_++;
252 				count_--;
253 				return true;
254 			}
255 			p = (p + 1) & mask;
256 			if (p == pos) {
257 				_assert_msg_(false, "PrehashMap: Hit full on Remove()");
258 			}
259 		}
260 		return false;
261 	}
262 
size()263 	size_t size() {
264 		return count_;
265 	}
266 
267 	template<class T>
Iterate(T func)268 	void Iterate(T func) const {
269 		for (size_t i = 0; i < map.size(); i++) {
270 			if (state[i] == BucketState::TAKEN) {
271 				func(map[i].hash, map[i].value);
272 			}
273 		}
274 	}
275 
Clear()276 	void Clear() {
277 		memset(state.data(), (int)BucketState::FREE, state.size());
278 		count_ = 0;
279 		removedCount_ = 0;
280 	}
281 
282 	// Gets rid of REMOVED tombstones, making lookups somewhat more efficient.
Rebuild()283 	void Rebuild() {
284 		Grow(1);
285 	}
286 
Maintain()287 	void Maintain() {
288 		// Heuristic
289 		if (removedCount_ >= capacity_ / 4) {
290 			Rebuild();
291 		}
292 	}
293 
294 private:
Grow(int factor)295 	void Grow(int factor) {
296 		// We simply move out the existing data, then we re-insert the old.
297 		// This is extremely non-atomic and will need synchronization.
298 		std::vector<Pair> old = std::move(map);
299 		std::vector<BucketState> oldState = std::move(state);
300 		// Can't assume move will clear, it just may clear.
301 		map.clear();
302 		state.clear();
303 
304 		int oldCount = count_;
305 		int oldCapacity = capacity_;
306 		capacity_ *= factor;
307 		map.resize(capacity_);
308 		state.resize(capacity_);
309 		count_ = 0;  // Insert will update it.
310 		removedCount_ = 0;
311 		for (size_t i = 0; i < old.size(); i++) {
312 			if (oldState[i] == BucketState::TAKEN) {
313 				Insert(old[i].hash, old[i].value);
314 			}
315 		}
316 		INFO_LOG(G3D, "Grew hashmap capacity from %d to %d", oldCapacity, capacity_);
317 		_assert_msg_(oldCount == count_, "PrehashMap: count should not change in Grow()");
318 	}
319 	struct Pair {
320 		uint32_t hash;
321 		Value value;
322 	};
323 	std::vector<Pair> map;
324 	std::vector<BucketState> state;
325 	int capacity_;
326 	int count_ = 0;
327 	int removedCount_ = 0;
328 };
329