1 #ifndef AWS_COMMON_HASH_TABLE_H
2 #define AWS_COMMON_HASH_TABLE_H
3 
4 /**
5  * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
6  * SPDX-License-Identifier: Apache-2.0.
7  */
8 
9 #include <aws/common/common.h>
10 
11 #include <stddef.h>
12 
13 #define AWS_COMMON_HASH_TABLE_ITER_CONTINUE (1 << 0)
14 #define AWS_COMMON_HASH_TABLE_ITER_DELETE (1 << 1)
15 #define AWS_COMMON_HASH_TABLE_ITER_ERROR (1 << 2)
16 
17 /**
18  * Hash table data structure. This module provides an automatically resizing
19  * hash table implementation for general purpose use. The hash table stores a
20  * mapping between void * keys and values; it is expected that in most cases,
21  * these will point to a structure elsewhere in the heap, instead of inlining a
22  * key or value into the hash table element itself.
23  *
24  * Currently, this hash table implements a variant of robin hood hashing, but
25  * we do not guarantee that this won't change in the future.
26  *
27  * Associated with each hash function are four callbacks:
28  *
29  *   hash_fn - A hash function from the keys to a uint64_t. It is critical that
30  *      the hash function for a key does not change while the key is in the hash
31  *      table; violating this results in undefined behavior. Collisions are
32  *      tolerated, though naturally with reduced performance.
33  *
34  *   equals_fn - An equality comparison function. This function must be
35  *      reflexive and consistent with hash_fn.
36  *
37  *   destroy_key_fn, destroy_value_fn - Optional callbacks invoked when the
38  *      table is cleared or cleaned up and at the caller's option when an element
39  *      is removed from the table. Either or both may be set to NULL, which
40  *      has the same effect as a no-op destroy function.
41  *
42  * This datastructure can be safely moved between threads, subject to the
43  * requirements of the underlying allocator. It is also safe to invoke
44  * non-mutating operations on the hash table from multiple threads. A suitable
45  * memory barrier must be used when transitioning from single-threaded mutating
46  * usage to multithreaded usage.
47  */
48 struct hash_table_state; /* Opaque pointer */
49 struct aws_hash_table {
50     struct hash_table_state *p_impl;
51 };
52 
53 /**
54  * Represents an element in the hash table. Various operations on the hash
55  * table may provide pointers to elements stored within the hash table;
56  * generally, calling code may alter value, but must not alter key (or any
57  * information used to compute key's hash code).
58  *
59  * Pointers to elements within the hash are invalidated whenever an operation
60  * which may change the number of elements in the hash is invoked (i.e. put,
61  * delete, clear, and clean_up), regardless of whether the number of elements
62  * actually changes.
63  */
64 struct aws_hash_element {
65     const void *key;
66     void *value;
67 };
68 
69 enum aws_hash_iter_status {
70     AWS_HASH_ITER_STATUS_DONE,
71     AWS_HASH_ITER_STATUS_DELETE_CALLED,
72     AWS_HASH_ITER_STATUS_READY_FOR_USE,
73 };
74 
75 struct aws_hash_iter {
76     const struct aws_hash_table *map;
77     struct aws_hash_element element;
78     size_t slot;
79     size_t limit;
80     enum aws_hash_iter_status status;
81     /*
82      * Reserving extra fields for binary compatibility with future expansion of
83      * iterator in case hash table implementation changes.
84      */
85     int unused_0;
86     void *unused_1;
87     void *unused_2;
88 };
89 
90 /**
91  * Prototype for a key hashing function pointer.
92  */
93 typedef uint64_t(aws_hash_fn)(const void *key);
94 
95 /**
96  * Prototype for a hash table equality check function pointer.
97  *
98  * This type is usually used for a function that compares two hash table
99  * keys, but note that the same type is used for a function that compares
100  * two hash table values in aws_hash_table_eq.
101  *
102  * Equality functions used in a hash table must be reflexive (i.e., a == b if
103  * and only if b == a), and must be consistent with the hash function in use.
104  */
105 typedef bool(aws_hash_callback_eq_fn)(const void *a, const void *b);
106 
107 /**
108  * Prototype for a hash table key or value destructor function pointer.
109  *
110  * This function is used to destroy elements in the hash table when the
111  * table is cleared or cleaned up.
112  *
113  * Note that functions which remove individual elements from the hash
114  * table provide options of whether or not to invoke the destructors
115  * on the key and value of a removed element.
116  */
117 typedef void(aws_hash_callback_destroy_fn)(void *key_or_value);
118 
119 AWS_EXTERN_C_BEGIN
120 
121 /**
122  * Initializes a hash map with initial capacity for 'size' elements
123  * without resizing. Uses hash_fn to compute the hash of each element.
124  * equals_fn to compute equality of two keys.  Whenever an element is
125  * removed without being returned, destroy_key_fn is run on the pointer
126  * to the key and destroy_value_fn is run on the pointer to the value.
127  * Either or both may be NULL if a callback is not desired in this case.
128  */
129 AWS_COMMON_API
130 int aws_hash_table_init(
131     struct aws_hash_table *map,
132     struct aws_allocator *alloc,
133     size_t size,
134     aws_hash_fn *hash_fn,
135     aws_hash_callback_eq_fn *equals_fn,
136     aws_hash_callback_destroy_fn *destroy_key_fn,
137     aws_hash_callback_destroy_fn *destroy_value_fn);
138 
139 /**
140  * Deletes every element from map and frees all associated memory.
141  * destroy_fn will be called for each element.  aws_hash_table_init
142  * must be called before reusing the hash table.
143  *
144  * This method is idempotent.
145  */
146 AWS_COMMON_API
147 void aws_hash_table_clean_up(struct aws_hash_table *map);
148 
149 /**
150  * Safely swaps two hash tables. Note that we swap the entirety of the hash
151  * table, including which allocator is associated.
152  *
153  * Neither hash table is required to be initialized; if one or both is
154  * uninitialized, then the uninitialized state is also swapped.
155  */
156 AWS_COMMON_API
157 void aws_hash_table_swap(struct aws_hash_table *AWS_RESTRICT a, struct aws_hash_table *AWS_RESTRICT b);
158 
159 /**
160  * Moves the hash table in 'from' to 'to'. After this move, 'from' will
161  * be identical to the state of the original 'to' hash table, and 'to'
162  * will be in the same state as if it had been passed to aws_hash_table_clean_up
163  * (that is, it will have no memory allocated, and it will be safe to
164  * either discard it or call aws_hash_table_clean_up again).
165  *
166  * Note that 'to' will not be cleaned up. You should make sure that 'to'
167  * is either uninitialized or cleaned up before moving a hashtable into
168  * it.
169  */
170 AWS_COMMON_API
171 void aws_hash_table_move(struct aws_hash_table *AWS_RESTRICT to, struct aws_hash_table *AWS_RESTRICT from);
172 
173 /**
174  * Returns the current number of entries in the table.
175  */
176 AWS_COMMON_API
177 size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map);
178 
179 /**
180  * Returns an iterator to be used for iterating through a hash table.
181  * Iterator will already point to the first element of the table it finds,
182  * which can be accessed as iter.element.
183  *
184  * This function cannot fail, but if there are no elements in the table,
185  * the returned iterator will return true for aws_hash_iter_done(&iter).
186  */
187 AWS_COMMON_API
188 struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map);
189 
190 /**
191  * Returns true if iterator is done iterating through table, false otherwise.
192  * If this is true, the iterator will not include an element of the table.
193  */
194 AWS_COMMON_API
195 bool aws_hash_iter_done(const struct aws_hash_iter *iter);
196 
197 /**
198  * Updates iterator so that it points to next element of hash table.
199  *
200  * This and the two previous functions are designed to be used together with
201  * the following idiom:
202  *
203  * for (struct aws_hash_iter iter = aws_hash_iter_begin(&map);
204  *      !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) {
205  *     const key_type key = *(const key_type *)iter.element.key;
206  *     value_type value = *(value_type *)iter.element.value;
207  *     // etc.
208  * }
209  *
210  * Note that calling this on an iter which is "done" is idempotent:
211  * i.e. it will return another iter which is "done".
212  */
213 AWS_COMMON_API
214 void aws_hash_iter_next(struct aws_hash_iter *iter);
215 
216 /**
217  * Deletes the element currently pointed-to by the hash iterator.
218  * After calling this method, the element member of the iterator
219  * should not be accessed until the next call to aws_hash_iter_next.
220  *
221  * @param destroy_contents If true, the destructors for the key and value
222  *  will be called.
223  */
224 AWS_COMMON_API
225 void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents);
226 
227 /**
228  * Attempts to locate an element at key.  If the element is found, a
229  * pointer to the value is placed in *p_elem; if it is not found,
230  * *pElem is set to NULL. Either way, AWS_OP_SUCCESS is returned.
231  *
232  * This method does not change the state of the hash table. Therefore, it
233  * is safe to call _find from multiple threads on the same hash table,
234  * provided no mutating operations happen in parallel.
235  *
236  * Calling code may update the value in the hash table by modifying **pElem
237  * after a successful find. However, this pointer is not guaranteed to
238  * remain usable after a subsequent call to _put, _delete, _clear, or
239  * _clean_up.
240  */
241 
242 AWS_COMMON_API
243 int aws_hash_table_find(const struct aws_hash_table *map, const void *key, struct aws_hash_element **p_elem);
244 
245 /**
246  * Attempts to locate an element at key. If no such element was found,
247  * creates a new element, with value initialized to NULL. In either case, a
248  * pointer to the element is placed in *p_elem.
249  *
250  * If was_created is non-NULL, *was_created is set to 0 if an existing
251  * element was found, or 1 is a new element was created.
252  *
253  * Returns AWS_OP_SUCCESS if an item was found or created.
254  * Raises AWS_ERROR_OOM if hash table expansion was required and memory
255  * allocation failed.
256  */
257 AWS_COMMON_API
258 int aws_hash_table_create(
259     struct aws_hash_table *map,
260     const void *key,
261     struct aws_hash_element **p_elem,
262     int *was_created);
263 
264 /**
265  * Inserts a new element at key, with the given value. If another element
266  * exists at that key, the old element will be overwritten; both old key and
267  * value objects will be destroyed.
268  *
269  * If was_created is non-NULL, *was_created is set to 0 if an existing
270  * element was found, or 1 is a new element was created.
271  *
272  * Returns AWS_OP_SUCCESS if an item was found or created.
273  * Raises AWS_ERROR_OOM if hash table expansion was required and memory
274  */
275 AWS_COMMON_API
276 int aws_hash_table_put(struct aws_hash_table *map, const void *key, void *value, int *was_created);
277 
278 /**
279  * Removes element at key. Always returns AWS_OP_SUCCESS.
280  *
281  * If pValue is non-NULL, the existing value (if any) is moved into
282  * (*value) before removing from the table, and destroy_fn is _not_
283  * invoked. If pValue is NULL, then (if the element existed) destroy_fn
284  * will be invoked on the element being removed.
285  *
286  * If was_present is non-NULL, it is set to 0 if the element was
287  * not present, or 1 if it was present (and is now removed).
288  */
289 AWS_COMMON_API
290 int aws_hash_table_remove(
291     struct aws_hash_table *map,
292     const void *key,
293     struct aws_hash_element *p_value,
294     int *was_present);
295 
296 /**
297  * Removes element already known (typically by find()).
298  *
299  * p_value should point to a valid element returned by create() or find().
300  *
301  * NOTE: DO NOT call this method from inside of a aws_hash_table_foreach callback, return
302  * AWS_COMMON_HASH_TABLE_ITER_DELETE instead.
303  */
304 AWS_COMMON_API
305 int aws_hash_table_remove_element(struct aws_hash_table *map, struct aws_hash_element *p_value);
306 
307 /**
308  * Iterates through every element in the map and invokes the callback on
309  * that item. Iteration is performed in an arbitrary, implementation-defined
310  * order, and is not guaranteed to be consistent across invocations.
311  *
312  * The callback may change the value associated with the key by overwriting
313  * the value pointed-to by value. In this case, the on_element_removed
314  * callback will not be invoked, unless the callback invokes
315  * AWS_COMMON_HASH_TABLE_ITER_DELETE (in which case the on_element_removed
316  * is given the updated value).
317  *
318  * The callback must return a bitmask of zero or more of the following values
319  * ORed together:
320  *
321  * # AWS_COMMON_HASH_TABLE_ITER_CONTINUE - Continues iteration to the next
322  *     element (if not set, iteration stops)
323  * # AWS_COMMON_HASH_TABLE_ITER_DELETE   - Deletes the current value and
324  *     continues iteration.  destroy_fn will NOT be invoked.
325  * # AWS_COMMON_HASH_TABLE_ITER_ERROR   - Stop iteration with error.
326  *     No action will be taken for the current value and the value before this.
327  *     No rolling back. The deleted value before will NOT be back.
328  *     aws_hash_table_foreach returns AWS_OP_ERR after stropping the iteration.
329  *
330  * Invoking any method which may change the contents of the hashtable
331  * during iteration results in undefined behavior. However, you may safely
332  * invoke non-mutating operations during an iteration.
333  *
334  * This operation is mutating only if AWS_COMMON_HASH_TABLE_ITER_DELETE
335  * is returned at some point during iteration. Otherwise, it is non-mutating
336  * and is safe to invoke in parallel with other non-mutating operations.
337  */
338 
339 AWS_COMMON_API
340 int aws_hash_table_foreach(
341     struct aws_hash_table *map,
342     int (*callback)(void *context, struct aws_hash_element *p_element),
343     void *context);
344 
345 /**
346  * Compares two hash tables for equality. Both hash tables must have equivalent
347  * key comparators; values will be compared using the comparator passed into this
348  * function. The key hash function does not need to be equivalent between the
349  * two hash tables.
350  */
351 AWS_COMMON_API
352 bool aws_hash_table_eq(
353     const struct aws_hash_table *a,
354     const struct aws_hash_table *b,
355     aws_hash_callback_eq_fn *value_eq);
356 
357 /**
358  * Removes every element from the hash map. destroy_fn will be called for
359  * each element.
360  */
361 AWS_COMMON_API
362 void aws_hash_table_clear(struct aws_hash_table *map);
363 
364 /**
365  * Convenience hash function for NULL-terminated C-strings
366  */
367 AWS_COMMON_API
368 uint64_t aws_hash_c_string(const void *item);
369 
370 /**
371  * Convenience hash function for struct aws_strings.
372  * Hash is same as used on the string bytes by aws_hash_c_string.
373  */
374 AWS_COMMON_API
375 uint64_t aws_hash_string(const void *item);
376 
377 /**
378  * Convenience hash function for struct aws_byte_cursor.
379  * Hash is same as used on the string bytes by aws_hash_c_string.
380  */
381 AWS_COMMON_API
382 uint64_t aws_hash_byte_cursor_ptr(const void *item);
383 
384 /**
385  * Convenience hash function which hashes the pointer value directly,
386  * without dereferencing.  This can be used in cases where pointer identity
387  * is desired, or where a uintptr_t is encoded into a const void *.
388  */
389 AWS_COMMON_API
390 uint64_t aws_hash_ptr(const void *item);
391 
392 AWS_COMMON_API
393 uint64_t aws_hash_combine(uint64_t item1, uint64_t item2);
394 
395 /**
396  * Convenience eq callback for NULL-terminated C-strings
397  */
398 AWS_COMMON_API
399 bool aws_hash_callback_c_str_eq(const void *a, const void *b);
400 
401 /**
402  * Convenience eq callback for AWS strings
403  */
404 AWS_COMMON_API
405 bool aws_hash_callback_string_eq(const void *a, const void *b);
406 
407 /**
408  * Convenience destroy callback for AWS strings
409  */
410 AWS_COMMON_API
411 void aws_hash_callback_string_destroy(void *a);
412 
413 /**
414  * Equality function which compares pointer equality.
415  */
416 AWS_COMMON_API
417 bool aws_ptr_eq(const void *a, const void *b);
418 
419 /**
420  * Best-effort check of hash_table_state data-structure invariants
421  */
422 AWS_COMMON_API
423 bool aws_hash_table_is_valid(const struct aws_hash_table *map);
424 
425 /**
426  * Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants.
427  */
428 AWS_COMMON_API
429 bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter);
430 
431 AWS_EXTERN_C_END
432 
433 #endif /* AWS_COMMON_HASH_TABLE_H */
434