1 /**
2 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 * SPDX-License-Identifier: Apache-2.0.
4 */
5
6 /* For more information on how the RH hash works and in particular how we do
7 * deletions, see:
8 * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
9 */
10
11 #include <aws/common/hash_table.h>
12 #include <aws/common/math.h>
13 #include <aws/common/private/hash_table_impl.h>
14 #include <aws/common/string.h>
15
16 #include <limits.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19
20 /* Include lookup3.c so we can (potentially) inline it and make use of the mix()
21 * macro. */
22 #include <aws/common/private/lookup3.inl>
23
s_suppress_unused_lookup3_func_warnings(void)24 static void s_suppress_unused_lookup3_func_warnings(void) {
25 /* We avoid making changes to lookup3 if we can avoid it, but since it has functions
26 * we're not using, reference them somewhere to suppress the unused function warning.
27 */
28 (void)hashword;
29 (void)hashword2;
30 (void)hashlittle;
31 (void)hashbig;
32 }
33
34 /**
35 * Calculate the hash for the given key.
36 * Ensures a reasonable semantics for null keys.
37 * Ensures that no object ever hashes to 0, which is the sentinal value for an empty hash element.
38 */
s_hash_for(struct hash_table_state * state,const void * key)39 static uint64_t s_hash_for(struct hash_table_state *state, const void *key) {
40 AWS_PRECONDITION(hash_table_state_is_valid(state));
41 s_suppress_unused_lookup3_func_warnings();
42
43 if (key == NULL) {
44 /* The best answer */
45 return 42;
46 }
47
48 uint64_t hash_code = state->hash_fn(key);
49 if (!hash_code) {
50 hash_code = 1;
51 }
52 AWS_RETURN_WITH_POSTCONDITION(hash_code, hash_code != 0);
53 }
54
55 /**
56 * Check equality of two objects, with a reasonable semantics for null.
57 */
s_safe_eq_check(aws_hash_callback_eq_fn * equals_fn,const void * a,const void * b)58 static bool s_safe_eq_check(aws_hash_callback_eq_fn *equals_fn, const void *a, const void *b) {
59 /* Short circuit if the pointers are the same */
60 if (a == b) {
61 return true;
62 }
63 /* If one but not both are null, the objects are not equal */
64 if (a == NULL || b == NULL) {
65 return false;
66 }
67 /* If both are non-null, call the underlying equals fn */
68 return equals_fn(a, b);
69 }
70
71 /**
72 * Check equality of two hash keys, with a reasonable semantics for null keys.
73 */
s_hash_keys_eq(struct hash_table_state * state,const void * a,const void * b)74 static bool s_hash_keys_eq(struct hash_table_state *state, const void *a, const void *b) {
75 AWS_PRECONDITION(hash_table_state_is_valid(state));
76 bool rval = s_safe_eq_check(state->equals_fn, a, b);
77 AWS_RETURN_WITH_POSTCONDITION(rval, hash_table_state_is_valid(state));
78 }
79
s_index_for(struct hash_table_state * map,struct hash_table_entry * entry)80 static size_t s_index_for(struct hash_table_state *map, struct hash_table_entry *entry) {
81 AWS_PRECONDITION(hash_table_state_is_valid(map));
82 size_t index = entry - map->slots;
83 AWS_RETURN_WITH_POSTCONDITION(index, index < map->size && hash_table_state_is_valid(map));
84 }
85
86 #if 0
87 /* Useful debugging code for anyone working on this in the future */
88 static uint64_t s_distance(struct hash_table_state *state, int index) {
89 return (index - state->slots[index].hash_code) & state->mask;
90 }
91
92 void hash_dump(struct aws_hash_table *tbl) {
93 struct hash_table_state *state = tbl->p_impl;
94
95 printf("Dumping hash table contents:\n");
96
97 for (int i = 0; i < state->size; i++) {
98 printf("%7d: ", i);
99 struct hash_table_entry *e = &state->slots[i];
100 if (!e->hash_code) {
101 printf("EMPTY\n");
102 } else {
103 printf("k: %p v: %p hash_code: %lld displacement: %lld\n",
104 e->element.key, e->element.value, e->hash_code,
105 (i - e->hash_code) & state->mask);
106 }
107 }
108 }
109 #endif
110
111 #if 0
112 /* Not currently exposed as an API. Should we have something like this? Useful for benchmarks */
113 AWS_COMMON_API
114 void aws_hash_table_print_stats(struct aws_hash_table *table) {
115 struct hash_table_state *state = table->p_impl;
116 uint64_t total_disp = 0;
117 uint64_t max_disp = 0;
118
119 printf("\n=== Hash table statistics ===\n");
120 printf("Table size: %zu/%zu (max load %zu, remaining %zu)\n", state->entry_count, state->size, state->max_load, state->max_load - state->entry_count);
121 printf("Load factor: %02.2lf%% (max %02.2lf%%)\n",
122 100.0 * ((double)state->entry_count / (double)state->size),
123 state->max_load_factor);
124
125 for (size_t i = 0; i < state->size; i++) {
126 if (state->slots[i].hash_code) {
127 int displacement = distance(state, i);
128 total_disp += displacement;
129 if (displacement > max_disp) {
130 max_disp = displacement;
131 }
132 }
133 }
134
135 size_t *disp_counts = calloc(sizeof(*disp_counts), max_disp + 1);
136
137 for (size_t i = 0; i < state->size; i++) {
138 if (state->slots[i].hash_code) {
139 disp_counts[distance(state, i)]++;
140 }
141 }
142
143 uint64_t median = 0;
144 uint64_t passed = 0;
145 for (uint64_t i = 0; i <= max_disp && passed < total_disp / 2; i++) {
146 median = i;
147 passed += disp_counts[i];
148 }
149
150 printf("Displacement statistics: Avg %02.2lf max %llu median %llu\n", (double)total_disp / (double)state->entry_count, max_disp, median);
151 for (uint64_t i = 0; i <= max_disp; i++) {
152 printf("Displacement %2lld: %zu entries\n", i, disp_counts[i]);
153 }
154 free(disp_counts);
155 printf("\n");
156 }
157 #endif
158
aws_hash_table_get_entry_count(const struct aws_hash_table * map)159 size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map) {
160 struct hash_table_state *state = map->p_impl;
161 return state->entry_count;
162 }
163
164 /* Given a header template, allocates space for a hash table of the appropriate
165 * size, and copies the state header into this allocated memory, which is
166 * returned.
167 */
s_alloc_state(const struct hash_table_state * template)168 static struct hash_table_state *s_alloc_state(const struct hash_table_state *template) {
169 size_t required_bytes;
170 if (hash_table_state_required_bytes(template->size, &required_bytes)) {
171 return NULL;
172 }
173
174 /* An empty slot has hashcode 0. So this marks all slots as empty */
175 struct hash_table_state *state = aws_mem_calloc(template->alloc, 1, required_bytes);
176
177 if (state == NULL) {
178 return state;
179 }
180
181 *state = *template;
182 return state;
183 }
184
185 /* Computes the correct size and max_load based on a requested size. */
s_update_template_size(struct hash_table_state * template,size_t expected_elements)186 static int s_update_template_size(struct hash_table_state *template, size_t expected_elements) {
187 size_t min_size = expected_elements;
188
189 if (min_size < 2) {
190 min_size = 2;
191 }
192
193 /* size is always a power of 2 */
194 size_t size;
195 if (aws_round_up_to_power_of_two(min_size, &size)) {
196 return AWS_OP_ERR;
197 }
198
199 /* Update the template once we've calculated everything successfully */
200 template->size = size;
201 template->max_load = (size_t)(template->max_load_factor * (double)template->size);
202 /* Ensure that there is always at least one empty slot in the hash table */
203 if (template->max_load >= size) {
204 template->max_load = size - 1;
205 }
206
207 /* Since size is a power of 2: (index & (size - 1)) == (index % size) */
208 template->mask = size - 1;
209
210 return AWS_OP_SUCCESS;
211 }
212
aws_hash_table_init(struct aws_hash_table * map,struct aws_allocator * alloc,size_t size,aws_hash_fn * hash_fn,aws_hash_callback_eq_fn * equals_fn,aws_hash_callback_destroy_fn * destroy_key_fn,aws_hash_callback_destroy_fn * destroy_value_fn)213 int aws_hash_table_init(
214 struct aws_hash_table *map,
215 struct aws_allocator *alloc,
216 size_t size,
217 aws_hash_fn *hash_fn,
218 aws_hash_callback_eq_fn *equals_fn,
219 aws_hash_callback_destroy_fn *destroy_key_fn,
220 aws_hash_callback_destroy_fn *destroy_value_fn) {
221 AWS_PRECONDITION(map != NULL);
222 AWS_PRECONDITION(alloc != NULL);
223 AWS_PRECONDITION(hash_fn != NULL);
224 AWS_PRECONDITION(equals_fn != NULL);
225 struct hash_table_state template;
226 template.hash_fn = hash_fn;
227 template.equals_fn = equals_fn;
228 template.destroy_key_fn = destroy_key_fn;
229 template.destroy_value_fn = destroy_value_fn;
230 template.alloc = alloc;
231
232 template.entry_count = 0;
233 template.max_load_factor = 0.95; /* TODO - make configurable? */
234
235 if (s_update_template_size(&template, size)) {
236 return AWS_OP_ERR;
237 }
238 map->p_impl = s_alloc_state(&template);
239
240 if (!map->p_impl) {
241 return AWS_OP_ERR;
242 }
243
244 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
245 }
246
aws_hash_table_clean_up(struct aws_hash_table * map)247 void aws_hash_table_clean_up(struct aws_hash_table *map) {
248 AWS_PRECONDITION(map != NULL);
249 AWS_PRECONDITION(
250 map->p_impl == NULL || aws_hash_table_is_valid(map),
251 "Input aws_hash_table [map] must be valid or hash_table_state pointer [map->p_impl] must be NULL, in case "
252 "aws_hash_table_clean_up was called twice.");
253 struct hash_table_state *state = map->p_impl;
254
255 /* Ensure that we're idempotent */
256 if (!state) {
257 return;
258 }
259
260 aws_hash_table_clear(map);
261 aws_mem_release(map->p_impl->alloc, map->p_impl);
262
263 map->p_impl = NULL;
264 AWS_POSTCONDITION(map->p_impl == NULL);
265 }
266
aws_hash_table_swap(struct aws_hash_table * AWS_RESTRICT a,struct aws_hash_table * AWS_RESTRICT b)267 void aws_hash_table_swap(struct aws_hash_table *AWS_RESTRICT a, struct aws_hash_table *AWS_RESTRICT b) {
268 AWS_PRECONDITION(a != b);
269 struct aws_hash_table tmp = *a;
270 *a = *b;
271 *b = tmp;
272 }
273
aws_hash_table_move(struct aws_hash_table * AWS_RESTRICT to,struct aws_hash_table * AWS_RESTRICT from)274 void aws_hash_table_move(struct aws_hash_table *AWS_RESTRICT to, struct aws_hash_table *AWS_RESTRICT from) {
275 AWS_PRECONDITION(to != NULL);
276 AWS_PRECONDITION(from != NULL);
277 AWS_PRECONDITION(to != from);
278 AWS_PRECONDITION(aws_hash_table_is_valid(from));
279
280 *to = *from;
281 AWS_ZERO_STRUCT(*from);
282 AWS_POSTCONDITION(aws_hash_table_is_valid(to));
283 }
284
285 /* Tries to find where the requested key is or where it should go if put.
286 * Returns AWS_ERROR_SUCCESS if the item existed (leaving it in *entry),
287 * or AWS_ERROR_HASHTBL_ITEM_NOT_FOUND if it did not (putting its destination
288 * in *entry). Note that this does not take care of displacing whatever was in
289 * that entry before.
290 *
291 * probe_idx is set to the probe index of the entry found.
292 */
293
294 static int s_find_entry1(
295 struct hash_table_state *state,
296 uint64_t hash_code,
297 const void *key,
298 struct hash_table_entry **p_entry,
299 size_t *p_probe_idx);
300
301 /* Inlined fast path: Check the first slot, only. */
302 /* TODO: Force inlining? */
s_find_entry(struct hash_table_state * state,uint64_t hash_code,const void * key,struct hash_table_entry ** p_entry,size_t * p_probe_idx)303 static int inline s_find_entry(
304 struct hash_table_state *state,
305 uint64_t hash_code,
306 const void *key,
307 struct hash_table_entry **p_entry,
308 size_t *p_probe_idx) {
309 struct hash_table_entry *entry = &state->slots[hash_code & state->mask];
310
311 if (entry->hash_code == 0) {
312 if (p_probe_idx) {
313 *p_probe_idx = 0;
314 }
315 *p_entry = entry;
316 return AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
317 }
318
319 if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
320 if (p_probe_idx) {
321 *p_probe_idx = 0;
322 }
323 *p_entry = entry;
324 return AWS_OP_SUCCESS;
325 }
326
327 return s_find_entry1(state, hash_code, key, p_entry, p_probe_idx);
328 }
329
s_find_entry1(struct hash_table_state * state,uint64_t hash_code,const void * key,struct hash_table_entry ** p_entry,size_t * p_probe_idx)330 static int s_find_entry1(
331 struct hash_table_state *state,
332 uint64_t hash_code,
333 const void *key,
334 struct hash_table_entry **p_entry,
335 size_t *p_probe_idx) {
336 size_t probe_idx = 1;
337 /* If we find a deleted entry, we record that index and return it as our probe index (i.e. we'll keep searching to
338 * see if it already exists, but if not we'll overwrite the deleted entry).
339 */
340
341 int rv;
342 struct hash_table_entry *entry;
343 /* This loop is guaranteed to terminate because entry_probe is bounded above by state->mask (i.e. state->size - 1).
344 * Since probe_idx increments every loop iteration, it will become larger than entry_probe after at most state->size
345 * transitions and the loop will exit (if it hasn't already)
346 */
347 while (1) {
348 #ifdef CBMC
349 # pragma CPROVER check push
350 # pragma CPROVER check disable "unsigned-overflow"
351 #endif
352 uint64_t index = (hash_code + probe_idx) & state->mask;
353 #ifdef CBMC
354 # pragma CPROVER check pop
355 #endif
356 entry = &state->slots[index];
357 if (!entry->hash_code) {
358 rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
359 break;
360 }
361
362 if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
363 rv = AWS_ERROR_SUCCESS;
364 break;
365 }
366
367 #ifdef CBMC
368 # pragma CPROVER check push
369 # pragma CPROVER check disable "unsigned-overflow"
370 #endif
371 uint64_t entry_probe = (index - entry->hash_code) & state->mask;
372 #ifdef CBMC
373 # pragma CPROVER check pop
374 #endif
375
376 if (entry_probe < probe_idx) {
377 /* We now know that our target entry cannot exist; if it did exist,
378 * it would be at the current location as it has a higher probe
379 * length than the entry we are examining and thus would have
380 * preempted that item
381 */
382 rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
383 break;
384 }
385
386 probe_idx++;
387 }
388
389 *p_entry = entry;
390 if (p_probe_idx) {
391 *p_probe_idx = probe_idx;
392 }
393
394 return rv;
395 }
396
aws_hash_table_find(const struct aws_hash_table * map,const void * key,struct aws_hash_element ** p_elem)397 int aws_hash_table_find(const struct aws_hash_table *map, const void *key, struct aws_hash_element **p_elem) {
398 AWS_PRECONDITION(aws_hash_table_is_valid(map));
399 AWS_PRECONDITION(AWS_OBJECT_PTR_IS_WRITABLE(p_elem), "Input aws_hash_element pointer [p_elem] must be writable.");
400
401 struct hash_table_state *state = map->p_impl;
402 uint64_t hash_code = s_hash_for(state, key);
403 struct hash_table_entry *entry;
404
405 int rv = s_find_entry(state, hash_code, key, &entry, NULL);
406
407 if (rv == AWS_ERROR_SUCCESS) {
408 *p_elem = &entry->element;
409 } else {
410 *p_elem = NULL;
411 }
412 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
413 }
414
415 /**
416 * Attempts to find a home for the given entry.
417 * If the entry was empty (i.e. hash-code of 0), then the function does nothing and returns NULL
418 * Otherwise, it emplaces the item, and returns a pointer to the newly emplaced entry.
419 * This function is only called after the hash-table has been expanded to fit the new element,
420 * so it should never fail.
421 */
s_emplace_item(struct hash_table_state * state,struct hash_table_entry entry,size_t probe_idx)422 static struct hash_table_entry *s_emplace_item(
423 struct hash_table_state *state,
424 struct hash_table_entry entry,
425 size_t probe_idx) {
426 AWS_PRECONDITION(hash_table_state_is_valid(state));
427
428 if (entry.hash_code == 0) {
429 AWS_RETURN_WITH_POSTCONDITION(NULL, hash_table_state_is_valid(state));
430 }
431
432 struct hash_table_entry *rval = NULL;
433
434 /* Since a valid hash_table has at least one empty element, this loop will always terminate in at most linear time
435 */
436 while (entry.hash_code != 0) {
437 #ifdef CBMC
438 # pragma CPROVER check push
439 # pragma CPROVER check disable "unsigned-overflow"
440 #endif
441 size_t index = (size_t)(entry.hash_code + probe_idx) & state->mask;
442 #ifdef CBMC
443 # pragma CPROVER check pop
444 #endif
445 struct hash_table_entry *victim = &state->slots[index];
446
447 #ifdef CBMC
448 # pragma CPROVER check push
449 # pragma CPROVER check disable "unsigned-overflow"
450 #endif
451 size_t victim_probe_idx = (size_t)(index - victim->hash_code) & state->mask;
452 #ifdef CBMC
453 # pragma CPROVER check pop
454 #endif
455
456 if (!victim->hash_code || victim_probe_idx < probe_idx) {
457 /* The first thing we emplace is the entry itself. A pointer to its location becomes the rval */
458 if (!rval) {
459 rval = victim;
460 }
461
462 struct hash_table_entry tmp = *victim;
463 *victim = entry;
464 entry = tmp;
465
466 probe_idx = victim_probe_idx + 1;
467 } else {
468 probe_idx++;
469 }
470 }
471
472 AWS_RETURN_WITH_POSTCONDITION(
473 rval,
474 hash_table_state_is_valid(state) && rval >= &state->slots[0] && rval < &state->slots[state->size],
475 "Output hash_table_entry pointer [rval] must point in the slots of [state].");
476 }
477
s_expand_table(struct aws_hash_table * map)478 static int s_expand_table(struct aws_hash_table *map) {
479 struct hash_table_state *old_state = map->p_impl;
480 struct hash_table_state template = *old_state;
481
482 size_t new_size;
483 if (aws_mul_size_checked(template.size, 2, &new_size)) {
484 return AWS_OP_ERR;
485 }
486
487 if (s_update_template_size(&template, new_size)) {
488 return AWS_OP_ERR;
489 }
490
491 struct hash_table_state *new_state = s_alloc_state(&template);
492 if (!new_state) {
493 return AWS_OP_ERR;
494 }
495
496 for (size_t i = 0; i < old_state->size; i++) {
497 struct hash_table_entry entry = old_state->slots[i];
498 if (entry.hash_code) {
499 /* We can directly emplace since we know we won't put the same item twice */
500 s_emplace_item(new_state, entry, 0);
501 }
502 }
503
504 map->p_impl = new_state;
505 aws_mem_release(new_state->alloc, old_state);
506
507 return AWS_OP_SUCCESS;
508 }
509
aws_hash_table_create(struct aws_hash_table * map,const void * key,struct aws_hash_element ** p_elem,int * was_created)510 int aws_hash_table_create(
511 struct aws_hash_table *map,
512 const void *key,
513 struct aws_hash_element **p_elem,
514 int *was_created) {
515
516 struct hash_table_state *state = map->p_impl;
517 uint64_t hash_code = s_hash_for(state, key);
518 struct hash_table_entry *entry;
519 size_t probe_idx;
520 int ignored;
521 if (!was_created) {
522 was_created = &ignored;
523 }
524
525 int rv = s_find_entry(state, hash_code, key, &entry, &probe_idx);
526
527 if (rv == AWS_ERROR_SUCCESS) {
528 if (p_elem) {
529 *p_elem = &entry->element;
530 }
531 *was_created = 0;
532 return AWS_OP_SUCCESS;
533 }
534
535 /* Okay, we need to add an entry. Check the load factor first. */
536 size_t incr_entry_count;
537 if (aws_add_size_checked(state->entry_count, 1, &incr_entry_count)) {
538 return AWS_OP_ERR;
539 }
540 if (incr_entry_count > state->max_load) {
541 rv = s_expand_table(map);
542 if (rv != AWS_OP_SUCCESS) {
543 /* Any error was already raised in expand_table */
544 return rv;
545 }
546 state = map->p_impl;
547 /* If we expanded the table, we need to discard the probe index returned from find_entry,
548 * as it's likely that we can find a more desirable slot. If we don't, then later gets will
549 * terminate before reaching our probe index.
550
551 * n.b. currently we ignore this probe_idx subsequently, but leaving
552 this here so we don't
553 * forget when we optimize later. */
554 probe_idx = 0;
555 }
556
557 state->entry_count++;
558 struct hash_table_entry new_entry;
559 new_entry.element.key = key;
560 new_entry.element.value = NULL;
561 new_entry.hash_code = hash_code;
562
563 entry = s_emplace_item(state, new_entry, probe_idx);
564
565 if (p_elem) {
566 *p_elem = &entry->element;
567 }
568
569 *was_created = 1;
570
571 return AWS_OP_SUCCESS;
572 }
573
574 AWS_COMMON_API
aws_hash_table_put(struct aws_hash_table * map,const void * key,void * value,int * was_created)575 int aws_hash_table_put(struct aws_hash_table *map, const void *key, void *value, int *was_created) {
576 struct aws_hash_element *p_elem;
577 int was_created_fallback;
578
579 if (!was_created) {
580 was_created = &was_created_fallback;
581 }
582
583 if (aws_hash_table_create(map, key, &p_elem, was_created)) {
584 return AWS_OP_ERR;
585 }
586
587 /*
588 * aws_hash_table_create might resize the table, which results in map->p_impl changing.
589 * It is therefore important to wait to read p_impl until after we return.
590 */
591 struct hash_table_state *state = map->p_impl;
592
593 if (!*was_created) {
594 if (p_elem->key != key && state->destroy_key_fn) {
595 state->destroy_key_fn((void *)p_elem->key);
596 }
597
598 if (state->destroy_value_fn) {
599 state->destroy_value_fn((void *)p_elem->value);
600 }
601 }
602
603 p_elem->key = key;
604 p_elem->value = value;
605
606 return AWS_OP_SUCCESS;
607 }
608
609 /* Clears an entry. Does _not_ invoke destructor callbacks.
610 * Returns the last slot touched (note that if we wrap, we'll report an index
611 * lower than the original entry's index)
612 */
s_remove_entry(struct hash_table_state * state,struct hash_table_entry * entry)613 static size_t s_remove_entry(struct hash_table_state *state, struct hash_table_entry *entry) {
614 AWS_PRECONDITION(hash_table_state_is_valid(state));
615 AWS_PRECONDITION(state->entry_count > 0);
616 AWS_PRECONDITION(
617 entry >= &state->slots[0] && entry < &state->slots[state->size],
618 "Input hash_table_entry [entry] pointer must point in the available slots.");
619 state->entry_count--;
620
621 /* Shift subsequent entries back until we find an entry that belongs at its
622 * current position. This is important to ensure that subsequent searches
623 * don't terminate at the removed element.
624 */
625 size_t index = s_index_for(state, entry);
626 /* There is always at least one empty slot in the hash table, so this loop always terminates */
627 while (1) {
628 size_t next_index = (index + 1) & state->mask;
629
630 /* If we hit an empty slot, stop */
631 if (!state->slots[next_index].hash_code) {
632 break;
633 }
634 /* If the next slot is at the start of the probe sequence, stop.
635 * We know that nothing with an earlier home slot is after this;
636 * otherwise this index-zero entry would have been evicted from its
637 * home.
638 */
639 if ((state->slots[next_index].hash_code & state->mask) == next_index) {
640 break;
641 }
642
643 /* Okay, shift this one back */
644 state->slots[index] = state->slots[next_index];
645 index = next_index;
646 }
647
648 /* Clear the entry we shifted out of */
649 AWS_ZERO_STRUCT(state->slots[index]);
650 AWS_RETURN_WITH_POSTCONDITION(index, hash_table_state_is_valid(state) && index <= state->size);
651 }
652
aws_hash_table_remove(struct aws_hash_table * map,const void * key,struct aws_hash_element * p_value,int * was_present)653 int aws_hash_table_remove(
654 struct aws_hash_table *map,
655 const void *key,
656 struct aws_hash_element *p_value,
657 int *was_present) {
658 AWS_PRECONDITION(aws_hash_table_is_valid(map));
659 AWS_PRECONDITION(
660 p_value == NULL || AWS_OBJECT_PTR_IS_WRITABLE(p_value), "Input pointer [p_value] must be NULL or writable.");
661 AWS_PRECONDITION(
662 was_present == NULL || AWS_OBJECT_PTR_IS_WRITABLE(was_present),
663 "Input pointer [was_present] must be NULL or writable.");
664
665 struct hash_table_state *state = map->p_impl;
666 uint64_t hash_code = s_hash_for(state, key);
667 struct hash_table_entry *entry;
668 int ignored;
669
670 if (!was_present) {
671 was_present = &ignored;
672 }
673
674 int rv = s_find_entry(state, hash_code, key, &entry, NULL);
675
676 if (rv != AWS_ERROR_SUCCESS) {
677 *was_present = 0;
678 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
679 }
680
681 *was_present = 1;
682
683 if (p_value) {
684 *p_value = entry->element;
685 } else {
686 if (state->destroy_key_fn) {
687 state->destroy_key_fn((void *)entry->element.key);
688 }
689 if (state->destroy_value_fn) {
690 state->destroy_value_fn(entry->element.value);
691 }
692 }
693 s_remove_entry(state, entry);
694
695 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
696 }
697
aws_hash_table_remove_element(struct aws_hash_table * map,struct aws_hash_element * p_value)698 int aws_hash_table_remove_element(struct aws_hash_table *map, struct aws_hash_element *p_value) {
699 AWS_PRECONDITION(aws_hash_table_is_valid(map));
700 AWS_PRECONDITION(p_value != NULL);
701
702 struct hash_table_state *state = map->p_impl;
703 struct hash_table_entry *entry = AWS_CONTAINER_OF(p_value, struct hash_table_entry, element);
704
705 s_remove_entry(state, entry);
706
707 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
708 }
709
aws_hash_table_foreach(struct aws_hash_table * map,int (* callback)(void * context,struct aws_hash_element * pElement),void * context)710 int aws_hash_table_foreach(
711 struct aws_hash_table *map,
712 int (*callback)(void *context, struct aws_hash_element *pElement),
713 void *context) {
714
715 for (struct aws_hash_iter iter = aws_hash_iter_begin(map); !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) {
716 int rv = callback(context, &iter.element);
717 if (rv & AWS_COMMON_HASH_TABLE_ITER_ERROR) {
718 int error = aws_last_error();
719 if (error == AWS_ERROR_SUCCESS) {
720 aws_raise_error(AWS_ERROR_UNKNOWN);
721 }
722 return AWS_OP_ERR;
723 }
724
725 if (rv & AWS_COMMON_HASH_TABLE_ITER_DELETE) {
726 aws_hash_iter_delete(&iter, false);
727 }
728
729 if (!(rv & AWS_COMMON_HASH_TABLE_ITER_CONTINUE)) {
730 break;
731 }
732 }
733
734 return AWS_OP_SUCCESS;
735 }
736
aws_hash_table_eq(const struct aws_hash_table * a,const struct aws_hash_table * b,aws_hash_callback_eq_fn * value_eq)737 bool aws_hash_table_eq(
738 const struct aws_hash_table *a,
739 const struct aws_hash_table *b,
740 aws_hash_callback_eq_fn *value_eq) {
741 AWS_PRECONDITION(aws_hash_table_is_valid(a));
742 AWS_PRECONDITION(aws_hash_table_is_valid(b));
743 AWS_PRECONDITION(value_eq != NULL);
744
745 if (aws_hash_table_get_entry_count(a) != aws_hash_table_get_entry_count(b)) {
746 AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
747 }
748
749 /*
750 * Now that we have established that the two tables have the same number of
751 * entries, we can simply iterate one and compare against the same key in
752 * the other.
753 */
754 for (size_t i = 0; i < a->p_impl->size; ++i) {
755 const struct hash_table_entry *const a_entry = &a->p_impl->slots[i];
756 if (a_entry->hash_code == 0) {
757 continue;
758 }
759
760 struct aws_hash_element *b_element = NULL;
761
762 aws_hash_table_find(b, a_entry->element.key, &b_element);
763
764 if (!b_element) {
765 /* Key is present in A only */
766 AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
767 }
768
769 if (!s_safe_eq_check(value_eq, a_entry->element.value, b_element->value)) {
770 AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
771 }
772 }
773 AWS_RETURN_WITH_POSTCONDITION(true, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
774 }
775
776 /**
777 * Given an iterator, and a start slot, find the next available filled slot if it exists
778 * Otherwise, return an iter that will return true for aws_hash_iter_done().
779 * Note that aws_hash_iter_is_valid() need not hold on entry to the function, since
780 * it can be called on a partially constructed iter from aws_hash_iter_begin().
781 *
782 * Note that calling this on an iterator which is "done" is idempotent: it will return another
783 * iterator which is "done".
784 */
s_get_next_element(struct aws_hash_iter * iter,size_t start_slot)785 static inline void s_get_next_element(struct aws_hash_iter *iter, size_t start_slot) {
786 AWS_PRECONDITION(iter != NULL);
787 AWS_PRECONDITION(aws_hash_table_is_valid(iter->map));
788 struct hash_table_state *state = iter->map->p_impl;
789 size_t limit = iter->limit;
790
791 for (size_t i = start_slot; i < limit; i++) {
792 struct hash_table_entry *entry = &state->slots[i];
793
794 if (entry->hash_code) {
795 iter->element = entry->element;
796 iter->slot = i;
797 iter->status = AWS_HASH_ITER_STATUS_READY_FOR_USE;
798 return;
799 }
800 }
801 iter->element.key = NULL;
802 iter->element.value = NULL;
803 iter->slot = iter->limit;
804 iter->status = AWS_HASH_ITER_STATUS_DONE;
805 AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
806 }
807
aws_hash_iter_begin(const struct aws_hash_table * map)808 struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map) {
809 AWS_PRECONDITION(aws_hash_table_is_valid(map));
810 struct hash_table_state *state = map->p_impl;
811 struct aws_hash_iter iter;
812 AWS_ZERO_STRUCT(iter);
813 iter.map = map;
814 iter.limit = state->size;
815 s_get_next_element(&iter, 0);
816 AWS_RETURN_WITH_POSTCONDITION(
817 iter,
818 aws_hash_iter_is_valid(&iter) &&
819 (iter.status == AWS_HASH_ITER_STATUS_DONE || iter.status == AWS_HASH_ITER_STATUS_READY_FOR_USE),
820 "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
821 }
822
aws_hash_iter_done(const struct aws_hash_iter * iter)823 bool aws_hash_iter_done(const struct aws_hash_iter *iter) {
824 AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
825 AWS_PRECONDITION(
826 iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
827 "Input aws_hash_iter [iter] must either be done, or ready to use.");
828 /*
829 * SIZE_MAX is a valid (non-terminal) value for iter->slot in the event that
830 * we delete slot 0. See comments in aws_hash_iter_delete.
831 *
832 * As such we must use == rather than >= here.
833 */
834 bool rval = (iter->slot == iter->limit);
835 AWS_POSTCONDITION(
836 iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
837 "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
838 AWS_POSTCONDITION(
839 rval == (iter->status == AWS_HASH_ITER_STATUS_DONE),
840 "Output bool [rval] must be true if and only if the status of [iter] is DONE.");
841 AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
842 return rval;
843 }
844
aws_hash_iter_next(struct aws_hash_iter * iter)845 void aws_hash_iter_next(struct aws_hash_iter *iter) {
846 AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
847 #ifdef CBMC
848 # pragma CPROVER check push
849 # pragma CPROVER check disable "unsigned-overflow"
850 #endif
851 s_get_next_element(iter, iter->slot + 1);
852 #ifdef CBMC
853 # pragma CPROVER check pop
854 #endif
855 AWS_POSTCONDITION(
856 iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
857 "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
858 AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
859 }
860
aws_hash_iter_delete(struct aws_hash_iter * iter,bool destroy_contents)861 void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents) {
862 AWS_PRECONDITION(
863 iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, "Input aws_hash_iter [iter] must be ready for use.");
864 AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
865 AWS_PRECONDITION(
866 iter->map->p_impl->entry_count > 0,
867 "The hash_table_state pointed by input [iter] must contain at least one entry.");
868
869 struct hash_table_state *state = iter->map->p_impl;
870 if (destroy_contents) {
871 if (state->destroy_key_fn) {
872 state->destroy_key_fn((void *)iter->element.key);
873 }
874 if (state->destroy_value_fn) {
875 state->destroy_value_fn(iter->element.value);
876 }
877 }
878
879 size_t last_index = s_remove_entry(state, &state->slots[iter->slot]);
880
881 /* If we shifted elements that are not part of the window we intend to iterate
882 * over, it means we shifted an element that we already visited into the
883 * iter->limit - 1 position. To avoid double iteration, we'll now reduce the
884 * limit to compensate.
885 *
886 * Note that last_index cannot equal iter->slot, because slots[iter->slot]
887 * is empty before we start walking the table.
888 */
889 if (last_index < iter->slot || last_index >= iter->limit) {
890 iter->limit--;
891 }
892
893 /*
894 * After removing this entry, the next entry might be in the same slot, or
895 * in some later slot, or we might have no further entries.
896 *
897 * We also expect that the caller will call aws_hash_iter_done and aws_hash_iter_next
898 * after this delete call. This gets a bit tricky if we just deleted the value
899 * in slot 0, and a new value has shifted in.
900 *
901 * To deal with this, we'll just step back one slot, and let _next start iteration
902 * at our current slot. Note that if we just deleted slot 0, this will result in
903 * underflowing to SIZE_MAX; we have to take care in aws_hash_iter_done to avoid
904 * treating this as an end-of-iteration condition.
905 */
906 #ifdef CBMC
907 # pragma CPROVER check push
908 # pragma CPROVER check disable "unsigned-overflow"
909 #endif
910 iter->slot--;
911 #ifdef CBMC
912 # pragma CPROVER check pop
913 #endif
914 iter->status = AWS_HASH_ITER_STATUS_DELETE_CALLED;
915 AWS_POSTCONDITION(
916 iter->status == AWS_HASH_ITER_STATUS_DELETE_CALLED,
917 "The status of output aws_hash_iter [iter] must be DELETE_CALLED.");
918 AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
919 }
920
aws_hash_table_clear(struct aws_hash_table * map)921 void aws_hash_table_clear(struct aws_hash_table *map) {
922 AWS_PRECONDITION(aws_hash_table_is_valid(map));
923 struct hash_table_state *state = map->p_impl;
924
925 /* Check that we have at least one destructor before iterating over the table */
926 if (state->destroy_key_fn || state->destroy_value_fn) {
927 for (size_t i = 0; i < state->size; ++i) {
928 struct hash_table_entry *entry = &state->slots[i];
929 if (!entry->hash_code) {
930 continue;
931 }
932 if (state->destroy_key_fn) {
933 state->destroy_key_fn((void *)entry->element.key);
934 }
935 if (state->destroy_value_fn) {
936 state->destroy_value_fn(entry->element.value);
937 }
938 }
939 }
940 /* Since hash code 0 represents an empty slot we can just zero out the
941 * entire table. */
942 memset(state->slots, 0, sizeof(*state->slots) * state->size);
943
944 state->entry_count = 0;
945 AWS_POSTCONDITION(aws_hash_table_is_valid(map));
946 }
947
aws_hash_c_string(const void * item)948 uint64_t aws_hash_c_string(const void *item) {
949 AWS_PRECONDITION(aws_c_string_is_valid(item));
950 const char *str = item;
951
952 /* first digits of pi in hex */
953 uint32_t b = 0x3243F6A8, c = 0x885A308D;
954 hashlittle2(str, strlen(str), &c, &b);
955
956 return ((uint64_t)b << 32) | c;
957 }
958
aws_hash_string(const void * item)959 uint64_t aws_hash_string(const void *item) {
960 AWS_PRECONDITION(aws_string_is_valid(item));
961 const struct aws_string *str = item;
962
963 /* first digits of pi in hex */
964 uint32_t b = 0x3243F6A8, c = 0x885A308D;
965 hashlittle2(aws_string_bytes(str), str->len, &c, &b);
966 AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_string_is_valid(str));
967 }
968
aws_hash_byte_cursor_ptr(const void * item)969 uint64_t aws_hash_byte_cursor_ptr(const void *item) {
970 AWS_PRECONDITION(aws_byte_cursor_is_valid(item));
971 const struct aws_byte_cursor *cur = item;
972
973 /* first digits of pi in hex */
974 uint32_t b = 0x3243F6A8, c = 0x885A308D;
975 hashlittle2(cur->ptr, cur->len, &c, &b);
976 AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_byte_cursor_is_valid(cur));
977 }
978
aws_hash_ptr(const void * item)979 uint64_t aws_hash_ptr(const void *item) {
980 /* Since the numeric value of the pointer is considered, not the memory behind it, 0 is an acceptable value */
981 /* first digits of e in hex
982 * 2.b7e 1516 28ae d2a6 */
983 uint32_t b = 0x2b7e1516, c = 0x28aed2a6;
984
985 hashlittle2(&item, sizeof(item), &c, &b);
986
987 return ((uint64_t)b << 32) | c;
988 }
989
aws_hash_combine(uint64_t item1,uint64_t item2)990 uint64_t aws_hash_combine(uint64_t item1, uint64_t item2) {
991 uint32_t b = item2 & 0xFFFFFFFF; /* LSB */
992 uint32_t c = item2 >> 32; /* MSB */
993
994 hashlittle2(&item1, sizeof(item1), &c, &b);
995 return ((uint64_t)b << 32) | c;
996 }
997
aws_hash_callback_c_str_eq(const void * a,const void * b)998 bool aws_hash_callback_c_str_eq(const void *a, const void *b) {
999 AWS_PRECONDITION(aws_c_string_is_valid(a));
1000 AWS_PRECONDITION(aws_c_string_is_valid(b));
1001 bool rval = !strcmp(a, b);
1002 AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b));
1003 }
1004
aws_hash_callback_string_eq(const void * a,const void * b)1005 bool aws_hash_callback_string_eq(const void *a, const void *b) {
1006 AWS_PRECONDITION(aws_string_is_valid(a));
1007 AWS_PRECONDITION(aws_string_is_valid(b));
1008 bool rval = aws_string_eq(a, b);
1009 AWS_RETURN_WITH_POSTCONDITION(rval, aws_string_is_valid(a) && aws_string_is_valid(b));
1010 }
1011
aws_hash_callback_string_destroy(void * a)1012 void aws_hash_callback_string_destroy(void *a) {
1013 AWS_PRECONDITION(aws_string_is_valid(a));
1014 aws_string_destroy(a);
1015 }
1016
aws_ptr_eq(const void * a,const void * b)1017 bool aws_ptr_eq(const void *a, const void *b) {
1018 return a == b;
1019 }
1020
1021 /**
1022 * Best-effort check of hash_table_state data-structure invariants
1023 * Some invariants, such as that the number of entries is actually the
1024 * same as the entry_count field, would require a loop to check
1025 */
aws_hash_table_is_valid(const struct aws_hash_table * map)1026 bool aws_hash_table_is_valid(const struct aws_hash_table *map) {
1027 return map && map->p_impl && hash_table_state_is_valid(map->p_impl);
1028 }
1029
1030 /**
1031 * Best-effort check of hash_table_state data-structure invariants
1032 * Some invariants, such as that the number of entries is actually the
1033 * same as the entry_count field, would require a loop to check
1034 */
hash_table_state_is_valid(const struct hash_table_state * map)1035 bool hash_table_state_is_valid(const struct hash_table_state *map) {
1036 if (!map) {
1037 return false;
1038 }
1039 bool hash_fn_nonnull = (map->hash_fn != NULL);
1040 bool equals_fn_nonnull = (map->equals_fn != NULL);
1041 /*destroy_key_fn and destroy_value_fn are both allowed to be NULL*/
1042 bool alloc_nonnull = (map->alloc != NULL);
1043 bool size_at_least_two = (map->size >= 2);
1044 bool size_is_power_of_two = aws_is_power_of_two(map->size);
1045 bool entry_count = (map->entry_count <= map->max_load);
1046 bool max_load = (map->max_load < map->size);
1047 bool mask_is_correct = (map->mask == (map->size - 1));
1048 bool max_load_factor_bounded = map->max_load_factor == 0.95; //(map->max_load_factor < 1.0);
1049 bool slots_allocated = AWS_MEM_IS_WRITABLE(&map->slots[0], sizeof(map->slots[0]) * map->size);
1050
1051 return hash_fn_nonnull && equals_fn_nonnull && alloc_nonnull && size_at_least_two && size_is_power_of_two &&
1052 entry_count && max_load && mask_is_correct && max_load_factor_bounded && slots_allocated;
1053 }
1054
1055 /**
1056 * Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants.
1057 */
aws_hash_iter_is_valid(const struct aws_hash_iter * iter)1058 bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter) {
1059 if (!iter) {
1060 return false;
1061 }
1062 if (!iter->map) {
1063 return false;
1064 }
1065 if (!aws_hash_table_is_valid(iter->map)) {
1066 return false;
1067 }
1068 if (iter->limit > iter->map->p_impl->size) {
1069 return false;
1070 }
1071
1072 switch (iter->status) {
1073 case AWS_HASH_ITER_STATUS_DONE:
1074 /* Done iff slot == limit */
1075 return iter->slot == iter->limit;
1076 case AWS_HASH_ITER_STATUS_DELETE_CALLED:
1077 /* iter->slot can underflow to SIZE_MAX after a delete
1078 * see the comments for aws_hash_iter_delete() */
1079 return iter->slot <= iter->limit || iter->slot == SIZE_MAX;
1080 case AWS_HASH_ITER_STATUS_READY_FOR_USE:
1081 /* A slot must point to a valid location (i.e. hash_code != 0) */
1082 return iter->slot < iter->limit && iter->map->p_impl->slots[iter->slot].hash_code != 0;
1083 }
1084 /* Invalid status code */
1085 return false;
1086 }
1087
1088 /**
1089 * Determine the total number of bytes needed for a hash-table with
1090 * "size" slots. If the result would overflow a size_t, return
1091 * AWS_OP_ERR; otherwise, return AWS_OP_SUCCESS with the result in
1092 * "required_bytes".
1093 */
hash_table_state_required_bytes(size_t size,size_t * required_bytes)1094 int hash_table_state_required_bytes(size_t size, size_t *required_bytes) {
1095
1096 size_t elemsize;
1097 if (aws_mul_size_checked(size, sizeof(struct hash_table_entry), &elemsize)) {
1098 return AWS_OP_ERR;
1099 }
1100
1101 if (aws_add_size_checked(elemsize, sizeof(struct hash_table_state), required_bytes)) {
1102 return AWS_OP_ERR;
1103 }
1104
1105 return AWS_OP_SUCCESS;
1106 }
1107