1 /**
2  * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3  * SPDX-License-Identifier: Apache-2.0.
4  */
5 
6 /* For more information on how the RH hash works and in particular how we do
7  * deletions, see:
8  * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
9  */
10 
11 #include <aws/common/hash_table.h>
12 #include <aws/common/math.h>
13 #include <aws/common/private/hash_table_impl.h>
14 #include <aws/common/string.h>
15 
16 #include <limits.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 
20 /* Include lookup3.c so we can (potentially) inline it and make use of the mix()
21  * macro. */
22 #include <aws/common/private/lookup3.inl>
23 
s_suppress_unused_lookup3_func_warnings(void)24 static void s_suppress_unused_lookup3_func_warnings(void) {
25     /* We avoid making changes to lookup3 if we can avoid it, but since it has functions
26      * we're not using, reference them somewhere to suppress the unused function warning.
27      */
28     (void)hashword;
29     (void)hashword2;
30     (void)hashlittle;
31     (void)hashbig;
32 }
33 
34 /**
35  * Calculate the hash for the given key.
36  * Ensures a reasonable semantics for null keys.
37  * Ensures that no object ever hashes to 0, which is the sentinal value for an empty hash element.
38  */
s_hash_for(struct hash_table_state * state,const void * key)39 static uint64_t s_hash_for(struct hash_table_state *state, const void *key) {
40     AWS_PRECONDITION(hash_table_state_is_valid(state));
41     s_suppress_unused_lookup3_func_warnings();
42 
43     if (key == NULL) {
44         /* The best answer */
45         return 42;
46     }
47 
48     uint64_t hash_code = state->hash_fn(key);
49     if (!hash_code) {
50         hash_code = 1;
51     }
52     AWS_RETURN_WITH_POSTCONDITION(hash_code, hash_code != 0);
53 }
54 
55 /**
56  * Check equality of two objects, with a reasonable semantics for null.
57  */
s_safe_eq_check(aws_hash_callback_eq_fn * equals_fn,const void * a,const void * b)58 static bool s_safe_eq_check(aws_hash_callback_eq_fn *equals_fn, const void *a, const void *b) {
59     /* Short circuit if the pointers are the same */
60     if (a == b) {
61         return true;
62     }
63     /* If one but not both are null, the objects are not equal */
64     if (a == NULL || b == NULL) {
65         return false;
66     }
67     /* If both are non-null, call the underlying equals fn */
68     return equals_fn(a, b);
69 }
70 
71 /**
72  * Check equality of two hash keys, with a reasonable semantics for null keys.
73  */
s_hash_keys_eq(struct hash_table_state * state,const void * a,const void * b)74 static bool s_hash_keys_eq(struct hash_table_state *state, const void *a, const void *b) {
75     AWS_PRECONDITION(hash_table_state_is_valid(state));
76     bool rval = s_safe_eq_check(state->equals_fn, a, b);
77     AWS_RETURN_WITH_POSTCONDITION(rval, hash_table_state_is_valid(state));
78 }
79 
s_index_for(struct hash_table_state * map,struct hash_table_entry * entry)80 static size_t s_index_for(struct hash_table_state *map, struct hash_table_entry *entry) {
81     AWS_PRECONDITION(hash_table_state_is_valid(map));
82     size_t index = entry - map->slots;
83     AWS_RETURN_WITH_POSTCONDITION(index, index < map->size && hash_table_state_is_valid(map));
84 }
85 
86 #if 0
87 /* Useful debugging code for anyone working on this in the future */
88 static uint64_t s_distance(struct hash_table_state *state, int index) {
89     return (index - state->slots[index].hash_code) & state->mask;
90 }
91 
92 void hash_dump(struct aws_hash_table *tbl) {
93     struct hash_table_state *state = tbl->p_impl;
94 
95     printf("Dumping hash table contents:\n");
96 
97     for (int i = 0; i < state->size; i++) {
98         printf("%7d: ", i);
99         struct hash_table_entry *e = &state->slots[i];
100         if (!e->hash_code) {
101             printf("EMPTY\n");
102         } else {
103             printf("k: %p v: %p hash_code: %lld displacement: %lld\n",
104                 e->element.key, e->element.value, e->hash_code,
105                 (i - e->hash_code) & state->mask);
106         }
107     }
108 }
109 #endif
110 
111 #if 0
112 /* Not currently exposed as an API. Should we have something like this? Useful for benchmarks */
113 AWS_COMMON_API
114 void aws_hash_table_print_stats(struct aws_hash_table *table) {
115     struct hash_table_state *state = table->p_impl;
116     uint64_t total_disp = 0;
117     uint64_t max_disp = 0;
118 
119     printf("\n=== Hash table statistics ===\n");
120     printf("Table size: %zu/%zu (max load %zu, remaining %zu)\n", state->entry_count, state->size, state->max_load, state->max_load - state->entry_count);
121     printf("Load factor: %02.2lf%% (max %02.2lf%%)\n",
122         100.0 * ((double)state->entry_count / (double)state->size),
123         state->max_load_factor);
124 
125     for (size_t i = 0; i < state->size; i++) {
126         if (state->slots[i].hash_code) {
127             int displacement = distance(state, i);
128             total_disp += displacement;
129             if (displacement > max_disp) {
130                 max_disp = displacement;
131             }
132         }
133     }
134 
135     size_t *disp_counts = calloc(sizeof(*disp_counts), max_disp + 1);
136 
137     for (size_t i = 0; i < state->size; i++) {
138         if (state->slots[i].hash_code) {
139             disp_counts[distance(state, i)]++;
140         }
141     }
142 
143     uint64_t median = 0;
144     uint64_t passed = 0;
145     for (uint64_t i = 0; i <= max_disp && passed < total_disp / 2; i++) {
146         median = i;
147         passed += disp_counts[i];
148     }
149 
150     printf("Displacement statistics: Avg %02.2lf max %llu median %llu\n", (double)total_disp / (double)state->entry_count, max_disp, median);
151     for (uint64_t i = 0; i <= max_disp; i++) {
152         printf("Displacement %2lld: %zu entries\n", i, disp_counts[i]);
153     }
154     free(disp_counts);
155     printf("\n");
156 }
157 #endif
158 
aws_hash_table_get_entry_count(const struct aws_hash_table * map)159 size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map) {
160     struct hash_table_state *state = map->p_impl;
161     return state->entry_count;
162 }
163 
164 /* Given a header template, allocates space for a hash table of the appropriate
165  * size, and copies the state header into this allocated memory, which is
166  * returned.
167  */
s_alloc_state(const struct hash_table_state * template)168 static struct hash_table_state *s_alloc_state(const struct hash_table_state *template) {
169     size_t required_bytes;
170     if (hash_table_state_required_bytes(template->size, &required_bytes)) {
171         return NULL;
172     }
173 
174     /* An empty slot has hashcode 0. So this marks all slots as empty */
175     struct hash_table_state *state = aws_mem_calloc(template->alloc, 1, required_bytes);
176 
177     if (state == NULL) {
178         return state;
179     }
180 
181     *state = *template;
182     return state;
183 }
184 
185 /* Computes the correct size and max_load based on a requested size. */
s_update_template_size(struct hash_table_state * template,size_t expected_elements)186 static int s_update_template_size(struct hash_table_state *template, size_t expected_elements) {
187     size_t min_size = expected_elements;
188 
189     if (min_size < 2) {
190         min_size = 2;
191     }
192 
193     /* size is always a power of 2 */
194     size_t size;
195     if (aws_round_up_to_power_of_two(min_size, &size)) {
196         return AWS_OP_ERR;
197     }
198 
199     /* Update the template once we've calculated everything successfully */
200     template->size = size;
201     template->max_load = (size_t)(template->max_load_factor * (double)template->size);
202     /* Ensure that there is always at least one empty slot in the hash table */
203     if (template->max_load >= size) {
204         template->max_load = size - 1;
205     }
206 
207     /* Since size is a power of 2: (index & (size - 1)) == (index % size) */
208     template->mask = size - 1;
209 
210     return AWS_OP_SUCCESS;
211 }
212 
aws_hash_table_init(struct aws_hash_table * map,struct aws_allocator * alloc,size_t size,aws_hash_fn * hash_fn,aws_hash_callback_eq_fn * equals_fn,aws_hash_callback_destroy_fn * destroy_key_fn,aws_hash_callback_destroy_fn * destroy_value_fn)213 int aws_hash_table_init(
214     struct aws_hash_table *map,
215     struct aws_allocator *alloc,
216     size_t size,
217     aws_hash_fn *hash_fn,
218     aws_hash_callback_eq_fn *equals_fn,
219     aws_hash_callback_destroy_fn *destroy_key_fn,
220     aws_hash_callback_destroy_fn *destroy_value_fn) {
221     AWS_PRECONDITION(map != NULL);
222     AWS_PRECONDITION(alloc != NULL);
223     AWS_PRECONDITION(hash_fn != NULL);
224     AWS_PRECONDITION(equals_fn != NULL);
225     struct hash_table_state template;
226     template.hash_fn = hash_fn;
227     template.equals_fn = equals_fn;
228     template.destroy_key_fn = destroy_key_fn;
229     template.destroy_value_fn = destroy_value_fn;
230     template.alloc = alloc;
231 
232     template.entry_count = 0;
233     template.max_load_factor = 0.95; /* TODO - make configurable? */
234 
235     if (s_update_template_size(&template, size)) {
236         return AWS_OP_ERR;
237     }
238     map->p_impl = s_alloc_state(&template);
239 
240     if (!map->p_impl) {
241         return AWS_OP_ERR;
242     }
243 
244     AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
245 }
246 
aws_hash_table_clean_up(struct aws_hash_table * map)247 void aws_hash_table_clean_up(struct aws_hash_table *map) {
248     AWS_PRECONDITION(map != NULL);
249     AWS_PRECONDITION(
250         map->p_impl == NULL || aws_hash_table_is_valid(map),
251         "Input aws_hash_table [map] must be valid or hash_table_state pointer [map->p_impl] must be NULL, in case "
252         "aws_hash_table_clean_up was called twice.");
253     struct hash_table_state *state = map->p_impl;
254 
255     /* Ensure that we're idempotent */
256     if (!state) {
257         return;
258     }
259 
260     aws_hash_table_clear(map);
261     aws_mem_release(map->p_impl->alloc, map->p_impl);
262 
263     map->p_impl = NULL;
264     AWS_POSTCONDITION(map->p_impl == NULL);
265 }
266 
aws_hash_table_swap(struct aws_hash_table * AWS_RESTRICT a,struct aws_hash_table * AWS_RESTRICT b)267 void aws_hash_table_swap(struct aws_hash_table *AWS_RESTRICT a, struct aws_hash_table *AWS_RESTRICT b) {
268     AWS_PRECONDITION(a != b);
269     struct aws_hash_table tmp = *a;
270     *a = *b;
271     *b = tmp;
272 }
273 
aws_hash_table_move(struct aws_hash_table * AWS_RESTRICT to,struct aws_hash_table * AWS_RESTRICT from)274 void aws_hash_table_move(struct aws_hash_table *AWS_RESTRICT to, struct aws_hash_table *AWS_RESTRICT from) {
275     AWS_PRECONDITION(to != NULL);
276     AWS_PRECONDITION(from != NULL);
277     AWS_PRECONDITION(to != from);
278     AWS_PRECONDITION(aws_hash_table_is_valid(from));
279 
280     *to = *from;
281     AWS_ZERO_STRUCT(*from);
282     AWS_POSTCONDITION(aws_hash_table_is_valid(to));
283 }
284 
285 /* Tries to find where the requested key is or where it should go if put.
286  * Returns AWS_ERROR_SUCCESS if the item existed (leaving it in *entry),
287  * or AWS_ERROR_HASHTBL_ITEM_NOT_FOUND if it did not (putting its destination
288  * in *entry). Note that this does not take care of displacing whatever was in
289  * that entry before.
290  *
291  * probe_idx is set to the probe index of the entry found.
292  */
293 
294 static int s_find_entry1(
295     struct hash_table_state *state,
296     uint64_t hash_code,
297     const void *key,
298     struct hash_table_entry **p_entry,
299     size_t *p_probe_idx);
300 
301 /* Inlined fast path: Check the first slot, only. */
302 /* TODO: Force inlining? */
s_find_entry(struct hash_table_state * state,uint64_t hash_code,const void * key,struct hash_table_entry ** p_entry,size_t * p_probe_idx)303 static int inline s_find_entry(
304     struct hash_table_state *state,
305     uint64_t hash_code,
306     const void *key,
307     struct hash_table_entry **p_entry,
308     size_t *p_probe_idx) {
309     struct hash_table_entry *entry = &state->slots[hash_code & state->mask];
310 
311     if (entry->hash_code == 0) {
312         if (p_probe_idx) {
313             *p_probe_idx = 0;
314         }
315         *p_entry = entry;
316         return AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
317     }
318 
319     if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
320         if (p_probe_idx) {
321             *p_probe_idx = 0;
322         }
323         *p_entry = entry;
324         return AWS_OP_SUCCESS;
325     }
326 
327     return s_find_entry1(state, hash_code, key, p_entry, p_probe_idx);
328 }
329 
s_find_entry1(struct hash_table_state * state,uint64_t hash_code,const void * key,struct hash_table_entry ** p_entry,size_t * p_probe_idx)330 static int s_find_entry1(
331     struct hash_table_state *state,
332     uint64_t hash_code,
333     const void *key,
334     struct hash_table_entry **p_entry,
335     size_t *p_probe_idx) {
336     size_t probe_idx = 1;
337     /* If we find a deleted entry, we record that index and return it as our probe index (i.e. we'll keep searching to
338      * see if it already exists, but if not we'll overwrite the deleted entry).
339      */
340 
341     int rv;
342     struct hash_table_entry *entry;
343     /* This loop is guaranteed to terminate because entry_probe is bounded above by state->mask (i.e. state->size - 1).
344      * Since probe_idx increments every loop iteration, it will become larger than entry_probe after at most state->size
345      * transitions and the loop will exit (if it hasn't already)
346      */
347     while (1) {
348 #ifdef CBMC
349 #    pragma CPROVER check push
350 #    pragma CPROVER check disable "unsigned-overflow"
351 #endif
352         uint64_t index = (hash_code + probe_idx) & state->mask;
353 #ifdef CBMC
354 #    pragma CPROVER check pop
355 #endif
356         entry = &state->slots[index];
357         if (!entry->hash_code) {
358             rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
359             break;
360         }
361 
362         if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
363             rv = AWS_ERROR_SUCCESS;
364             break;
365         }
366 
367 #ifdef CBMC
368 #    pragma CPROVER check push
369 #    pragma CPROVER check disable "unsigned-overflow"
370 #endif
371         uint64_t entry_probe = (index - entry->hash_code) & state->mask;
372 #ifdef CBMC
373 #    pragma CPROVER check pop
374 #endif
375 
376         if (entry_probe < probe_idx) {
377             /* We now know that our target entry cannot exist; if it did exist,
378              * it would be at the current location as it has a higher probe
379              * length than the entry we are examining and thus would have
380              * preempted that item
381              */
382             rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
383             break;
384         }
385 
386         probe_idx++;
387     }
388 
389     *p_entry = entry;
390     if (p_probe_idx) {
391         *p_probe_idx = probe_idx;
392     }
393 
394     return rv;
395 }
396 
aws_hash_table_find(const struct aws_hash_table * map,const void * key,struct aws_hash_element ** p_elem)397 int aws_hash_table_find(const struct aws_hash_table *map, const void *key, struct aws_hash_element **p_elem) {
398     AWS_PRECONDITION(aws_hash_table_is_valid(map));
399     AWS_PRECONDITION(AWS_OBJECT_PTR_IS_WRITABLE(p_elem), "Input aws_hash_element pointer [p_elem] must be writable.");
400 
401     struct hash_table_state *state = map->p_impl;
402     uint64_t hash_code = s_hash_for(state, key);
403     struct hash_table_entry *entry;
404 
405     int rv = s_find_entry(state, hash_code, key, &entry, NULL);
406 
407     if (rv == AWS_ERROR_SUCCESS) {
408         *p_elem = &entry->element;
409     } else {
410         *p_elem = NULL;
411     }
412     AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
413 }
414 
415 /**
416  * Attempts to find a home for the given entry.
417  * If the entry was empty (i.e. hash-code of 0), then the function does nothing and returns NULL
418  * Otherwise, it emplaces the item, and returns a pointer to the newly emplaced entry.
419  * This function is only called after the hash-table has been expanded to fit the new element,
420  * so it should never fail.
421  */
s_emplace_item(struct hash_table_state * state,struct hash_table_entry entry,size_t probe_idx)422 static struct hash_table_entry *s_emplace_item(
423     struct hash_table_state *state,
424     struct hash_table_entry entry,
425     size_t probe_idx) {
426     AWS_PRECONDITION(hash_table_state_is_valid(state));
427 
428     if (entry.hash_code == 0) {
429         AWS_RETURN_WITH_POSTCONDITION(NULL, hash_table_state_is_valid(state));
430     }
431 
432     struct hash_table_entry *rval = NULL;
433 
434     /* Since a valid hash_table has at least one empty element, this loop will always terminate in at most linear time
435      */
436     while (entry.hash_code != 0) {
437 #ifdef CBMC
438 #    pragma CPROVER check push
439 #    pragma CPROVER check disable "unsigned-overflow"
440 #endif
441         size_t index = (size_t)(entry.hash_code + probe_idx) & state->mask;
442 #ifdef CBMC
443 #    pragma CPROVER check pop
444 #endif
445         struct hash_table_entry *victim = &state->slots[index];
446 
447 #ifdef CBMC
448 #    pragma CPROVER check push
449 #    pragma CPROVER check disable "unsigned-overflow"
450 #endif
451         size_t victim_probe_idx = (size_t)(index - victim->hash_code) & state->mask;
452 #ifdef CBMC
453 #    pragma CPROVER check pop
454 #endif
455 
456         if (!victim->hash_code || victim_probe_idx < probe_idx) {
457             /* The first thing we emplace is the entry itself. A pointer to its location becomes the rval */
458             if (!rval) {
459                 rval = victim;
460             }
461 
462             struct hash_table_entry tmp = *victim;
463             *victim = entry;
464             entry = tmp;
465 
466             probe_idx = victim_probe_idx + 1;
467         } else {
468             probe_idx++;
469         }
470     }
471 
472     AWS_RETURN_WITH_POSTCONDITION(
473         rval,
474         hash_table_state_is_valid(state) && rval >= &state->slots[0] && rval < &state->slots[state->size],
475         "Output hash_table_entry pointer [rval] must point in the slots of [state].");
476 }
477 
s_expand_table(struct aws_hash_table * map)478 static int s_expand_table(struct aws_hash_table *map) {
479     struct hash_table_state *old_state = map->p_impl;
480     struct hash_table_state template = *old_state;
481 
482     size_t new_size;
483     if (aws_mul_size_checked(template.size, 2, &new_size)) {
484         return AWS_OP_ERR;
485     }
486 
487     if (s_update_template_size(&template, new_size)) {
488         return AWS_OP_ERR;
489     }
490 
491     struct hash_table_state *new_state = s_alloc_state(&template);
492     if (!new_state) {
493         return AWS_OP_ERR;
494     }
495 
496     for (size_t i = 0; i < old_state->size; i++) {
497         struct hash_table_entry entry = old_state->slots[i];
498         if (entry.hash_code) {
499             /* We can directly emplace since we know we won't put the same item twice */
500             s_emplace_item(new_state, entry, 0);
501         }
502     }
503 
504     map->p_impl = new_state;
505     aws_mem_release(new_state->alloc, old_state);
506 
507     return AWS_OP_SUCCESS;
508 }
509 
aws_hash_table_create(struct aws_hash_table * map,const void * key,struct aws_hash_element ** p_elem,int * was_created)510 int aws_hash_table_create(
511     struct aws_hash_table *map,
512     const void *key,
513     struct aws_hash_element **p_elem,
514     int *was_created) {
515 
516     struct hash_table_state *state = map->p_impl;
517     uint64_t hash_code = s_hash_for(state, key);
518     struct hash_table_entry *entry;
519     size_t probe_idx;
520     int ignored;
521     if (!was_created) {
522         was_created = &ignored;
523     }
524 
525     int rv = s_find_entry(state, hash_code, key, &entry, &probe_idx);
526 
527     if (rv == AWS_ERROR_SUCCESS) {
528         if (p_elem) {
529             *p_elem = &entry->element;
530         }
531         *was_created = 0;
532         return AWS_OP_SUCCESS;
533     }
534 
535     /* Okay, we need to add an entry. Check the load factor first. */
536     size_t incr_entry_count;
537     if (aws_add_size_checked(state->entry_count, 1, &incr_entry_count)) {
538         return AWS_OP_ERR;
539     }
540     if (incr_entry_count > state->max_load) {
541         rv = s_expand_table(map);
542         if (rv != AWS_OP_SUCCESS) {
543             /* Any error was already raised in expand_table */
544             return rv;
545         }
546         state = map->p_impl;
547         /* If we expanded the table, we need to discard the probe index returned from find_entry,
548          * as it's likely that we can find a more desirable slot. If we don't, then later gets will
549          * terminate before reaching our probe index.
550 
551          * n.b. currently we ignore this probe_idx subsequently, but leaving
552          this here so we don't
553          * forget when we optimize later. */
554         probe_idx = 0;
555     }
556 
557     state->entry_count++;
558     struct hash_table_entry new_entry;
559     new_entry.element.key = key;
560     new_entry.element.value = NULL;
561     new_entry.hash_code = hash_code;
562 
563     entry = s_emplace_item(state, new_entry, probe_idx);
564 
565     if (p_elem) {
566         *p_elem = &entry->element;
567     }
568 
569     *was_created = 1;
570 
571     return AWS_OP_SUCCESS;
572 }
573 
574 AWS_COMMON_API
aws_hash_table_put(struct aws_hash_table * map,const void * key,void * value,int * was_created)575 int aws_hash_table_put(struct aws_hash_table *map, const void *key, void *value, int *was_created) {
576     struct aws_hash_element *p_elem;
577     int was_created_fallback;
578 
579     if (!was_created) {
580         was_created = &was_created_fallback;
581     }
582 
583     if (aws_hash_table_create(map, key, &p_elem, was_created)) {
584         return AWS_OP_ERR;
585     }
586 
587     /*
588      * aws_hash_table_create might resize the table, which results in map->p_impl changing.
589      * It is therefore important to wait to read p_impl until after we return.
590      */
591     struct hash_table_state *state = map->p_impl;
592 
593     if (!*was_created) {
594         if (p_elem->key != key && state->destroy_key_fn) {
595             state->destroy_key_fn((void *)p_elem->key);
596         }
597 
598         if (state->destroy_value_fn) {
599             state->destroy_value_fn((void *)p_elem->value);
600         }
601     }
602 
603     p_elem->key = key;
604     p_elem->value = value;
605 
606     return AWS_OP_SUCCESS;
607 }
608 
609 /* Clears an entry. Does _not_ invoke destructor callbacks.
610  * Returns the last slot touched (note that if we wrap, we'll report an index
611  * lower than the original entry's index)
612  */
s_remove_entry(struct hash_table_state * state,struct hash_table_entry * entry)613 static size_t s_remove_entry(struct hash_table_state *state, struct hash_table_entry *entry) {
614     AWS_PRECONDITION(hash_table_state_is_valid(state));
615     AWS_PRECONDITION(state->entry_count > 0);
616     AWS_PRECONDITION(
617         entry >= &state->slots[0] && entry < &state->slots[state->size],
618         "Input hash_table_entry [entry] pointer must point in the available slots.");
619     state->entry_count--;
620 
621     /* Shift subsequent entries back until we find an entry that belongs at its
622      * current position. This is important to ensure that subsequent searches
623      * don't terminate at the removed element.
624      */
625     size_t index = s_index_for(state, entry);
626     /* There is always at least one empty slot in the hash table, so this loop always terminates */
627     while (1) {
628         size_t next_index = (index + 1) & state->mask;
629 
630         /* If we hit an empty slot, stop */
631         if (!state->slots[next_index].hash_code) {
632             break;
633         }
634         /* If the next slot is at the start of the probe sequence, stop.
635          * We know that nothing with an earlier home slot is after this;
636          * otherwise this index-zero entry would have been evicted from its
637          * home.
638          */
639         if ((state->slots[next_index].hash_code & state->mask) == next_index) {
640             break;
641         }
642 
643         /* Okay, shift this one back */
644         state->slots[index] = state->slots[next_index];
645         index = next_index;
646     }
647 
648     /* Clear the entry we shifted out of */
649     AWS_ZERO_STRUCT(state->slots[index]);
650     AWS_RETURN_WITH_POSTCONDITION(index, hash_table_state_is_valid(state) && index <= state->size);
651 }
652 
aws_hash_table_remove(struct aws_hash_table * map,const void * key,struct aws_hash_element * p_value,int * was_present)653 int aws_hash_table_remove(
654     struct aws_hash_table *map,
655     const void *key,
656     struct aws_hash_element *p_value,
657     int *was_present) {
658     AWS_PRECONDITION(aws_hash_table_is_valid(map));
659     AWS_PRECONDITION(
660         p_value == NULL || AWS_OBJECT_PTR_IS_WRITABLE(p_value), "Input pointer [p_value] must be NULL or writable.");
661     AWS_PRECONDITION(
662         was_present == NULL || AWS_OBJECT_PTR_IS_WRITABLE(was_present),
663         "Input pointer [was_present] must be NULL or writable.");
664 
665     struct hash_table_state *state = map->p_impl;
666     uint64_t hash_code = s_hash_for(state, key);
667     struct hash_table_entry *entry;
668     int ignored;
669 
670     if (!was_present) {
671         was_present = &ignored;
672     }
673 
674     int rv = s_find_entry(state, hash_code, key, &entry, NULL);
675 
676     if (rv != AWS_ERROR_SUCCESS) {
677         *was_present = 0;
678         AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
679     }
680 
681     *was_present = 1;
682 
683     if (p_value) {
684         *p_value = entry->element;
685     } else {
686         if (state->destroy_key_fn) {
687             state->destroy_key_fn((void *)entry->element.key);
688         }
689         if (state->destroy_value_fn) {
690             state->destroy_value_fn(entry->element.value);
691         }
692     }
693     s_remove_entry(state, entry);
694 
695     AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
696 }
697 
aws_hash_table_remove_element(struct aws_hash_table * map,struct aws_hash_element * p_value)698 int aws_hash_table_remove_element(struct aws_hash_table *map, struct aws_hash_element *p_value) {
699     AWS_PRECONDITION(aws_hash_table_is_valid(map));
700     AWS_PRECONDITION(p_value != NULL);
701 
702     struct hash_table_state *state = map->p_impl;
703     struct hash_table_entry *entry = AWS_CONTAINER_OF(p_value, struct hash_table_entry, element);
704 
705     s_remove_entry(state, entry);
706 
707     AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
708 }
709 
aws_hash_table_foreach(struct aws_hash_table * map,int (* callback)(void * context,struct aws_hash_element * pElement),void * context)710 int aws_hash_table_foreach(
711     struct aws_hash_table *map,
712     int (*callback)(void *context, struct aws_hash_element *pElement),
713     void *context) {
714 
715     for (struct aws_hash_iter iter = aws_hash_iter_begin(map); !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) {
716         int rv = callback(context, &iter.element);
717         if (rv & AWS_COMMON_HASH_TABLE_ITER_ERROR) {
718             int error = aws_last_error();
719             if (error == AWS_ERROR_SUCCESS) {
720                 aws_raise_error(AWS_ERROR_UNKNOWN);
721             }
722             return AWS_OP_ERR;
723         }
724 
725         if (rv & AWS_COMMON_HASH_TABLE_ITER_DELETE) {
726             aws_hash_iter_delete(&iter, false);
727         }
728 
729         if (!(rv & AWS_COMMON_HASH_TABLE_ITER_CONTINUE)) {
730             break;
731         }
732     }
733 
734     return AWS_OP_SUCCESS;
735 }
736 
aws_hash_table_eq(const struct aws_hash_table * a,const struct aws_hash_table * b,aws_hash_callback_eq_fn * value_eq)737 bool aws_hash_table_eq(
738     const struct aws_hash_table *a,
739     const struct aws_hash_table *b,
740     aws_hash_callback_eq_fn *value_eq) {
741     AWS_PRECONDITION(aws_hash_table_is_valid(a));
742     AWS_PRECONDITION(aws_hash_table_is_valid(b));
743     AWS_PRECONDITION(value_eq != NULL);
744 
745     if (aws_hash_table_get_entry_count(a) != aws_hash_table_get_entry_count(b)) {
746         AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
747     }
748 
749     /*
750      * Now that we have established that the two tables have the same number of
751      * entries, we can simply iterate one and compare against the same key in
752      * the other.
753      */
754     for (size_t i = 0; i < a->p_impl->size; ++i) {
755         const struct hash_table_entry *const a_entry = &a->p_impl->slots[i];
756         if (a_entry->hash_code == 0) {
757             continue;
758         }
759 
760         struct aws_hash_element *b_element = NULL;
761 
762         aws_hash_table_find(b, a_entry->element.key, &b_element);
763 
764         if (!b_element) {
765             /* Key is present in A only */
766             AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
767         }
768 
769         if (!s_safe_eq_check(value_eq, a_entry->element.value, b_element->value)) {
770             AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
771         }
772     }
773     AWS_RETURN_WITH_POSTCONDITION(true, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
774 }
775 
776 /**
777  * Given an iterator, and a start slot, find the next available filled slot if it exists
778  * Otherwise, return an iter that will return true for aws_hash_iter_done().
779  * Note that aws_hash_iter_is_valid() need not hold on entry to the function, since
780  * it can be called on a partially constructed iter from aws_hash_iter_begin().
781  *
782  * Note that calling this on an iterator which is "done" is idempotent: it will return another
783  * iterator which is "done".
784  */
s_get_next_element(struct aws_hash_iter * iter,size_t start_slot)785 static inline void s_get_next_element(struct aws_hash_iter *iter, size_t start_slot) {
786     AWS_PRECONDITION(iter != NULL);
787     AWS_PRECONDITION(aws_hash_table_is_valid(iter->map));
788     struct hash_table_state *state = iter->map->p_impl;
789     size_t limit = iter->limit;
790 
791     for (size_t i = start_slot; i < limit; i++) {
792         struct hash_table_entry *entry = &state->slots[i];
793 
794         if (entry->hash_code) {
795             iter->element = entry->element;
796             iter->slot = i;
797             iter->status = AWS_HASH_ITER_STATUS_READY_FOR_USE;
798             return;
799         }
800     }
801     iter->element.key = NULL;
802     iter->element.value = NULL;
803     iter->slot = iter->limit;
804     iter->status = AWS_HASH_ITER_STATUS_DONE;
805     AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
806 }
807 
aws_hash_iter_begin(const struct aws_hash_table * map)808 struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map) {
809     AWS_PRECONDITION(aws_hash_table_is_valid(map));
810     struct hash_table_state *state = map->p_impl;
811     struct aws_hash_iter iter;
812     AWS_ZERO_STRUCT(iter);
813     iter.map = map;
814     iter.limit = state->size;
815     s_get_next_element(&iter, 0);
816     AWS_RETURN_WITH_POSTCONDITION(
817         iter,
818         aws_hash_iter_is_valid(&iter) &&
819             (iter.status == AWS_HASH_ITER_STATUS_DONE || iter.status == AWS_HASH_ITER_STATUS_READY_FOR_USE),
820         "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
821 }
822 
aws_hash_iter_done(const struct aws_hash_iter * iter)823 bool aws_hash_iter_done(const struct aws_hash_iter *iter) {
824     AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
825     AWS_PRECONDITION(
826         iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
827         "Input aws_hash_iter [iter] must either be done, or ready to use.");
828     /*
829      * SIZE_MAX is a valid (non-terminal) value for iter->slot in the event that
830      * we delete slot 0. See comments in aws_hash_iter_delete.
831      *
832      * As such we must use == rather than >= here.
833      */
834     bool rval = (iter->slot == iter->limit);
835     AWS_POSTCONDITION(
836         iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
837         "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
838     AWS_POSTCONDITION(
839         rval == (iter->status == AWS_HASH_ITER_STATUS_DONE),
840         "Output bool [rval] must be true if and only if the status of [iter] is DONE.");
841     AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
842     return rval;
843 }
844 
aws_hash_iter_next(struct aws_hash_iter * iter)845 void aws_hash_iter_next(struct aws_hash_iter *iter) {
846     AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
847 #ifdef CBMC
848 #    pragma CPROVER check push
849 #    pragma CPROVER check disable "unsigned-overflow"
850 #endif
851     s_get_next_element(iter, iter->slot + 1);
852 #ifdef CBMC
853 #    pragma CPROVER check pop
854 #endif
855     AWS_POSTCONDITION(
856         iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
857         "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
858     AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
859 }
860 
aws_hash_iter_delete(struct aws_hash_iter * iter,bool destroy_contents)861 void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents) {
862     AWS_PRECONDITION(
863         iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, "Input aws_hash_iter [iter] must be ready for use.");
864     AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
865     AWS_PRECONDITION(
866         iter->map->p_impl->entry_count > 0,
867         "The hash_table_state pointed by input [iter] must contain at least one entry.");
868 
869     struct hash_table_state *state = iter->map->p_impl;
870     if (destroy_contents) {
871         if (state->destroy_key_fn) {
872             state->destroy_key_fn((void *)iter->element.key);
873         }
874         if (state->destroy_value_fn) {
875             state->destroy_value_fn(iter->element.value);
876         }
877     }
878 
879     size_t last_index = s_remove_entry(state, &state->slots[iter->slot]);
880 
881     /* If we shifted elements that are not part of the window we intend to iterate
882      * over, it means we shifted an element that we already visited into the
883      * iter->limit - 1 position. To avoid double iteration, we'll now reduce the
884      * limit to compensate.
885      *
886      * Note that last_index cannot equal iter->slot, because slots[iter->slot]
887      * is empty before we start walking the table.
888      */
889     if (last_index < iter->slot || last_index >= iter->limit) {
890         iter->limit--;
891     }
892 
893     /*
894      * After removing this entry, the next entry might be in the same slot, or
895      * in some later slot, or we might have no further entries.
896      *
897      * We also expect that the caller will call aws_hash_iter_done and aws_hash_iter_next
898      * after this delete call. This gets a bit tricky if we just deleted the value
899      * in slot 0, and a new value has shifted in.
900      *
901      * To deal with this, we'll just step back one slot, and let _next start iteration
902      * at our current slot. Note that if we just deleted slot 0, this will result in
903      * underflowing to SIZE_MAX; we have to take care in aws_hash_iter_done to avoid
904      * treating this as an end-of-iteration condition.
905      */
906 #ifdef CBMC
907 #    pragma CPROVER check push
908 #    pragma CPROVER check disable "unsigned-overflow"
909 #endif
910     iter->slot--;
911 #ifdef CBMC
912 #    pragma CPROVER check pop
913 #endif
914     iter->status = AWS_HASH_ITER_STATUS_DELETE_CALLED;
915     AWS_POSTCONDITION(
916         iter->status == AWS_HASH_ITER_STATUS_DELETE_CALLED,
917         "The status of output aws_hash_iter [iter] must be DELETE_CALLED.");
918     AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
919 }
920 
aws_hash_table_clear(struct aws_hash_table * map)921 void aws_hash_table_clear(struct aws_hash_table *map) {
922     AWS_PRECONDITION(aws_hash_table_is_valid(map));
923     struct hash_table_state *state = map->p_impl;
924 
925     /* Check that we have at least one destructor before iterating over the table */
926     if (state->destroy_key_fn || state->destroy_value_fn) {
927         for (size_t i = 0; i < state->size; ++i) {
928             struct hash_table_entry *entry = &state->slots[i];
929             if (!entry->hash_code) {
930                 continue;
931             }
932             if (state->destroy_key_fn) {
933                 state->destroy_key_fn((void *)entry->element.key);
934             }
935             if (state->destroy_value_fn) {
936                 state->destroy_value_fn(entry->element.value);
937             }
938         }
939     }
940     /* Since hash code 0 represents an empty slot we can just zero out the
941      * entire table. */
942     memset(state->slots, 0, sizeof(*state->slots) * state->size);
943 
944     state->entry_count = 0;
945     AWS_POSTCONDITION(aws_hash_table_is_valid(map));
946 }
947 
aws_hash_c_string(const void * item)948 uint64_t aws_hash_c_string(const void *item) {
949     AWS_PRECONDITION(aws_c_string_is_valid(item));
950     const char *str = item;
951 
952     /* first digits of pi in hex */
953     uint32_t b = 0x3243F6A8, c = 0x885A308D;
954     hashlittle2(str, strlen(str), &c, &b);
955 
956     return ((uint64_t)b << 32) | c;
957 }
958 
aws_hash_string(const void * item)959 uint64_t aws_hash_string(const void *item) {
960     AWS_PRECONDITION(aws_string_is_valid(item));
961     const struct aws_string *str = item;
962 
963     /* first digits of pi in hex */
964     uint32_t b = 0x3243F6A8, c = 0x885A308D;
965     hashlittle2(aws_string_bytes(str), str->len, &c, &b);
966     AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_string_is_valid(str));
967 }
968 
aws_hash_byte_cursor_ptr(const void * item)969 uint64_t aws_hash_byte_cursor_ptr(const void *item) {
970     AWS_PRECONDITION(aws_byte_cursor_is_valid(item));
971     const struct aws_byte_cursor *cur = item;
972 
973     /* first digits of pi in hex */
974     uint32_t b = 0x3243F6A8, c = 0x885A308D;
975     hashlittle2(cur->ptr, cur->len, &c, &b);
976     AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_byte_cursor_is_valid(cur));
977 }
978 
aws_hash_ptr(const void * item)979 uint64_t aws_hash_ptr(const void *item) {
980     /* Since the numeric value of the pointer is considered, not the memory behind it, 0 is an acceptable value */
981     /* first digits of e in hex
982      * 2.b7e 1516 28ae d2a6 */
983     uint32_t b = 0x2b7e1516, c = 0x28aed2a6;
984 
985     hashlittle2(&item, sizeof(item), &c, &b);
986 
987     return ((uint64_t)b << 32) | c;
988 }
989 
aws_hash_combine(uint64_t item1,uint64_t item2)990 uint64_t aws_hash_combine(uint64_t item1, uint64_t item2) {
991     uint32_t b = item2 & 0xFFFFFFFF; /* LSB */
992     uint32_t c = item2 >> 32;        /* MSB */
993 
994     hashlittle2(&item1, sizeof(item1), &c, &b);
995     return ((uint64_t)b << 32) | c;
996 }
997 
aws_hash_callback_c_str_eq(const void * a,const void * b)998 bool aws_hash_callback_c_str_eq(const void *a, const void *b) {
999     AWS_PRECONDITION(aws_c_string_is_valid(a));
1000     AWS_PRECONDITION(aws_c_string_is_valid(b));
1001     bool rval = !strcmp(a, b);
1002     AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b));
1003 }
1004 
aws_hash_callback_string_eq(const void * a,const void * b)1005 bool aws_hash_callback_string_eq(const void *a, const void *b) {
1006     AWS_PRECONDITION(aws_string_is_valid(a));
1007     AWS_PRECONDITION(aws_string_is_valid(b));
1008     bool rval = aws_string_eq(a, b);
1009     AWS_RETURN_WITH_POSTCONDITION(rval, aws_string_is_valid(a) && aws_string_is_valid(b));
1010 }
1011 
aws_hash_callback_string_destroy(void * a)1012 void aws_hash_callback_string_destroy(void *a) {
1013     AWS_PRECONDITION(aws_string_is_valid(a));
1014     aws_string_destroy(a);
1015 }
1016 
aws_ptr_eq(const void * a,const void * b)1017 bool aws_ptr_eq(const void *a, const void *b) {
1018     return a == b;
1019 }
1020 
1021 /**
1022  * Best-effort check of hash_table_state data-structure invariants
1023  * Some invariants, such as that the number of entries is actually the
1024  * same as the entry_count field, would require a loop to check
1025  */
aws_hash_table_is_valid(const struct aws_hash_table * map)1026 bool aws_hash_table_is_valid(const struct aws_hash_table *map) {
1027     return map && map->p_impl && hash_table_state_is_valid(map->p_impl);
1028 }
1029 
1030 /**
1031  * Best-effort check of hash_table_state data-structure invariants
1032  * Some invariants, such as that the number of entries is actually the
1033  * same as the entry_count field, would require a loop to check
1034  */
hash_table_state_is_valid(const struct hash_table_state * map)1035 bool hash_table_state_is_valid(const struct hash_table_state *map) {
1036     if (!map) {
1037         return false;
1038     }
1039     bool hash_fn_nonnull = (map->hash_fn != NULL);
1040     bool equals_fn_nonnull = (map->equals_fn != NULL);
1041     /*destroy_key_fn and destroy_value_fn are both allowed to be NULL*/
1042     bool alloc_nonnull = (map->alloc != NULL);
1043     bool size_at_least_two = (map->size >= 2);
1044     bool size_is_power_of_two = aws_is_power_of_two(map->size);
1045     bool entry_count = (map->entry_count <= map->max_load);
1046     bool max_load = (map->max_load < map->size);
1047     bool mask_is_correct = (map->mask == (map->size - 1));
1048     bool max_load_factor_bounded = map->max_load_factor == 0.95; //(map->max_load_factor < 1.0);
1049     bool slots_allocated = AWS_MEM_IS_WRITABLE(&map->slots[0], sizeof(map->slots[0]) * map->size);
1050 
1051     return hash_fn_nonnull && equals_fn_nonnull && alloc_nonnull && size_at_least_two && size_is_power_of_two &&
1052            entry_count && max_load && mask_is_correct && max_load_factor_bounded && slots_allocated;
1053 }
1054 
1055 /**
1056  * Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants.
1057  */
aws_hash_iter_is_valid(const struct aws_hash_iter * iter)1058 bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter) {
1059     if (!iter) {
1060         return false;
1061     }
1062     if (!iter->map) {
1063         return false;
1064     }
1065     if (!aws_hash_table_is_valid(iter->map)) {
1066         return false;
1067     }
1068     if (iter->limit > iter->map->p_impl->size) {
1069         return false;
1070     }
1071 
1072     switch (iter->status) {
1073         case AWS_HASH_ITER_STATUS_DONE:
1074             /* Done iff slot == limit */
1075             return iter->slot == iter->limit;
1076         case AWS_HASH_ITER_STATUS_DELETE_CALLED:
1077             /* iter->slot can underflow to SIZE_MAX after a delete
1078              * see the comments for aws_hash_iter_delete() */
1079             return iter->slot <= iter->limit || iter->slot == SIZE_MAX;
1080         case AWS_HASH_ITER_STATUS_READY_FOR_USE:
1081             /* A slot must point to a valid location (i.e. hash_code != 0) */
1082             return iter->slot < iter->limit && iter->map->p_impl->slots[iter->slot].hash_code != 0;
1083     }
1084     /* Invalid status code */
1085     return false;
1086 }
1087 
1088 /**
1089  * Determine the total number of bytes needed for a hash-table with
1090  * "size" slots. If the result would overflow a size_t, return
1091  * AWS_OP_ERR; otherwise, return AWS_OP_SUCCESS with the result in
1092  * "required_bytes".
1093  */
hash_table_state_required_bytes(size_t size,size_t * required_bytes)1094 int hash_table_state_required_bytes(size_t size, size_t *required_bytes) {
1095 
1096     size_t elemsize;
1097     if (aws_mul_size_checked(size, sizeof(struct hash_table_entry), &elemsize)) {
1098         return AWS_OP_ERR;
1099     }
1100 
1101     if (aws_add_size_checked(elemsize, sizeof(struct hash_table_state), required_bytes)) {
1102         return AWS_OP_ERR;
1103     }
1104 
1105     return AWS_OP_SUCCESS;
1106 }
1107