1 /*
2 ** Copyright (C) 2001-2020 by Carnegie Mellon University.
3 **
4 ** @OPENSOURCE_LICENSE_START@
5 ** See license information in ../../LICENSE.txt
6 ** @OPENSOURCE_LICENSE_END@
7 */
8
9 /* File: hashlib.c: implements core hash library. */
10
11 #include <silk/silk.h>
12
13 RCSIDENT("$SiLK: hashlib.c ef14e54179be 2020-04-14 21:57:45Z mthomas $");
14
15 #include <silk/hashlib.h>
16 #include <silk/utils.h>
17
18 #ifdef HASHLIB_TRACE_LEVEL
19 #define TRACEMSG_LEVEL HASHLIB_TRACE_LEVEL
20 #endif
21 #define TRACEMSG(lvl, x) TRACEMSG_TO_TRACEMSGLVL(lvl, x)
22 #include <silk/sktracemsg.h>
23
24 #if TRACEMSG_LEVEL > 0
25 #define TRC_FMT "hashlib.c:%d [%p] "
26 #define TRC_ARG(v_v) __LINE__, (void *)(v_v)
27 #endif
28
29
30 /* Configuration */
31
32 /*
33 * The maximum size (in terms of number of bytes) of an individual
34 * hash block.
35 */
36 #define HASH_MAX_MEMORY_BLOCK ((uint64_t)(SIZE_MAX >> 3))
37
38 /*
39 * The maximum size (in terms of number of bytes) for the entire
40 * hash table. May be modified by the environment variable named
41 * by HASHLIB_ENV_MAXMEM.
42 *
43 * The default value is the maximum size of any block multiplied by
44 * three, where the three comes from assuming that the
45 * SECONDARY_BLOCK_FRACTION is set to -3, which is its default.
46 */
47 #define HASH_MAX_MEMORY_TOTAL (HASH_MAX_MEMORY_BLOCK * 3)
48
49 /*
50 * Maximum number of blocks ever allocated, used for sizing the
51 * array of HashBlocks in the HashTable.
52 *
53 * Once the primary block reaches HASH_MAX_MEMORY_BLOCK (the
54 * maximum block size), new blocks will be allocated until this
55 * maximum is reached. This value cannot be greater than the
56 * HASHLIB_ITER_MAX_BLOCKS value defined in hashlib.h.
57 */
58 #define HASH_MAX_BLOCKS 8
59
60 #if HASH_MAX_BLOCKS > HASHLIB_ITER_MAX_BLOCKS
61 #error "HASH_MAX_BLOCKS may not be greater than HASHLIB_ITER_MAX_BLOCKS"
62 #endif
63
64 /*
65 * When the number of HashBlocks gets to this count, a rehash is
66 * triggered unless the first block is already at the maximum block
67 * size.
68 *
69 * This value is not static since hashlib_metrics.c may set it.
70 */
71 uint32_t REHASH_BLOCK_COUNT = 4;
72
73 /*
74 * The SECONDARY_BLOCK_FRACTION is used to determine the size
75 * HashBlocks following the first.
76 *
77 * If non-negative, tables 1...HASH_MAX_BLOCKS-1 have size given by
78 *
79 * table_size >> SECONDARY_BLOCK_FRACTION
80 *
81 * May also have one of the following values:
82 *
83 * = -1 means to keep halving
84 *
85 * = -2 means to keep halving starting at a secondary block size
86 * 1/4 of block 0
87 *
88 * = -3 means block 1 is 1/2 block 0, and all other blocks are 1/4
89 * block 0.
90 *
91 * = -4 means block 1 is 1/4 block 0, and all other blocks are 1/8
92 * block 0.
93 *
94 * In all cases, the size of blocks REHASH_BLOCK_COUNT through
95 * HASH_MAX_BLOCKS is fixed.
96 *
97 * This value is not static since hashlib_metrics.c may set it.
98 */
99 int32_t SECONDARY_BLOCK_FRACTION = -3;
100
101 /*
102 * The minimum number of entries that may be stored in a block.
103 * This value must not be less than 256.
104 */
105 #ifndef MIN_BLOCK_ENTRIES
106 #define MIN_BLOCK_ENTRIES (UINT64_C(1) << 8)
107 #endif
108
109 #if MIN_BLOCK_ENTRIES < 256
110 #error "The MIN_BLOCK_ENTRIES must be greater than 256"
111 #endif
112
113 /*
114 * Environment variable to choose the maximum size of an individual
115 * hash table.
116 */
117 #define HASHLIB_ENV_MAXMEM "SILK_HASH_MAXMEM"
118
119 /* Distinguished values for block index in the iterator */
120 #define HASH_ITER_BEGIN -1
121 #define HASH_ITER_END -2
122
123
124 /*
125 * The data in a HashTable is stored in multiple HashBlock structures.
126 */
127 struct HashBlock_st {
128 /* Pointer to an array of variable-sized entries */
129 uint8_t *data_ptr;
130 /* The table that owns this block */
131 const HashTable *table;
132 /* Total capacity of this block as a number of entries */
133 uint64_t max_entries;
134 /* Number of occupied entries in the block */
135 uint64_t num_entries;
136 /* Number of entries at which block meets the load_factor */
137 uint64_t block_full;
138 };
139 typedef struct HashBlock_st HashBlock;
140
141
142 /** the HashTable structure */
143 /* typedef struct HashTable_st HashTable; */
144 struct HashTable_st {
145 /** HTT_ALLOWDELETION or 0 */
146 uint8_t options;
147 /** Storage size of a key in bytes */
148 uint8_t key_len;
149 /** Size of a value in bytes */
150 uint8_t value_len;
151 /** Point at which to resize (fraction of 255) */
152 uint8_t load_factor;
153 /** Number of blocks */
154 uint8_t num_blocks;
155 /** Non-zero if rehashing has failed in the past */
156 uint8_t rehash_failed;
157 /** Non-zero if hash entries are sorted */
158 uint8_t is_sorted;
159 /** Non-zero if we can memset new memory to a value */
160 uint8_t can_memset_val;
161 /** Maximum number of entries the initial block may store */
162 uint64_t max_init_entry;
163 /** Size of key; used as cmp_userdata by hashlib_sort_entries() */
164 size_t keylen_cmp_userdata;
165 /** Pointer to representation of an empty value */
166 uint8_t *no_value_ptr;
167 /** Pointer to representation of a deleted value */
168 uint8_t *del_value_ptr;
169 /** Comparison function to use for a sorted table */
170 hashlib_sort_key_cmp_fn cmp_fn;
171 /** Caller's argument to the cmp_fn comparison function */
172 void *cmp_userdata;
173 /** A pointer to this table, so that macros may accept either a
174 * HashTable or a HashBlock */
175 const HashTable *table;
176 /** The blocks */
177 HashBlock *block_ptrs[HASH_MAX_BLOCKS];
178 #ifdef HASHLIB_RECORD_STATS
179 /** Statistics for this table */
180 hashlib_stats_t ht_stats;
181 #endif
182 };
183 /* HashTable */
184
185
186 /* LOCAL FUNCTION PROTOTYPES */
187
188 /* pull in the code that defines the hash function */
189 #ifdef HASHLIB_LOOKUP2
190 /* hash code used up to and including SiLK 2.3.x, defined in
191 * hashlib-lookup2.c */
192 unsigned long
193 hash(
194 const uint8_t *k,
195 unsigned long len,
196 unsigned long initval);
197 unsigned long
198 hash2(
199 unsigned long *k,
200 unsigned long len,
201 unsigned long initval);
202 unsigned long
203 hash3(
204 uint8_t *k,
205 unsigned long len,
206 unsigned long initval);
207 #include "hashlib-lookup2.c"
208
209 #else
210 /* hash code used in SiLK 2.4 and beyond, defined in
211 * hashlib-lookup3.c */
212
213 uint32_t
214 hashword(
215 const uint32_t *k,
216 size_t length,
217 uint32_t initval);
218 void
219 hashword2(
220 const uint32_t *k,
221 size_t length,
222 uint32_t *pc,
223 uint32_t *pb);
224 uint32_t
225 hashlittle(
226 const void *key,
227 size_t length,
228 uint32_t initval);
229 void
230 hashlittle2(
231 const void *key,
232 size_t length,
233 uint32_t *pc,
234 uint32_t *pb);
235 uint32_t
236 hashbig(
237 const void *key,
238 size_t length,
239 uint32_t initval);
240 void
241 hashbig2(
242 const void *key,
243 size_t length,
244 uint32_t *pc,
245 uint32_t *pb);
246
247 #include "hashlib-lookup3.c"
248 /* define hash() according to the byte order */
249 #if SK_BIG_ENDIAN
250 # define hash hashbig2
251 #else
252 # define hash hashlittle2
253 #endif
254 #endif /* HASHLIB_LOOKUP2 */
255
256 static HashBlock *
257 hashlib_create_block(
258 HashTable *table_ptr,
259 uint64_t block_entries);
260 static void
261 hashlib_free_block(
262 HashBlock *block_ptr);
263 static int
264 hashlib_block_find_entry(
265 const HashBlock *block_ptr,
266 const uint8_t *key_ptr,
267 uint8_t **entry_pptr);
268 static int
269 hashlib_iterate_sorted(
270 const HashTable *table_ptr,
271 HASH_ITER *iter_ptr,
272 uint8_t **key_pptr,
273 uint8_t **val_pptr);
274 static void
275 hashlib_compute_max_initial_entries(
276 HashTable *table_ptr);
277
278
279 /* FUNCTION-LIKE MACROS */
280
281 /*
282 * Return the maximum number of entries on the largest block in
283 * the table 'tbl_ptr'.
284 */
285 #define HASH_GET_MAX_BLOCK_ENTRIES(tbl_ptr) \
286 ((tbl_ptr)->max_init_entry)
287
288 /*
289 * Return true if the HashBlock 'blk_ptr' is full; that is, whether
290 * the number of entries meets or exceeds the load factor.
291 */
292 #define HASH_BLOCK_IS_FULL(blk_ptr) \
293 ((blk_ptr)->num_entries >= (blk_ptr)->block_full)
294
295 /*
296 * Get number of bytes of storage required to hold a key in
297 * 'tbl_blk_ptr', which may be a HashTable or a HashBlock.
298 */
299 #define HASH_GET_KEY_LEN(tbl_blk_ptr) \
300 ((uint64_t)(tbl_blk_ptr)->table->key_len)
301
302 /*
303 * Get number of bytes of storage required to hold a value in
304 * 'tbl_blk_ptr', which may be a HashTable or a HashBlock.
305 */
306 #define HASH_GET_VALUE_LEN(tbl_blk_ptr) \
307 ((uint64_t)(tbl_blk_ptr)->table->value_len)
308
309 /*
310 * Get number of bytes of storage required to hold an entry in
311 * 'tbl_blk_ptr', which may be a HashTable or a HashBlock.
312 */
313 #define HASH_GET_ENTRY_LEN(tbl_blk_ptr) \
314 (HASH_GET_KEY_LEN(tbl_blk_ptr) + HASH_GET_VALUE_LEN(tbl_blk_ptr))
315
316 /*
317 * Get a pointer to the storage key part of 'entry_ptr' in
318 * 'tbl_blk_ptr' which may be a HashTable or HashBlock.
319 */
320 #define HASHENTRY_GET_KEY(tbl_blk_ptr, entry_ptr) \
321 (entry_ptr)
322
323 /*
324 * Get a pointer to the value part of 'entry_ptr' in 'tbl_blk_ptr'
325 * which may be a HashTable or HashBlock.
326 */
327 #define HASHENTRY_GET_VALUE(tbl_blk_ptr, entry_ptr) \
328 ((entry_ptr) + HASH_GET_KEY_LEN(tbl_blk_ptr))
329
330 /*
331 * Set the storage key part of 'entry_ptr' in 'tbl_blk_ptr' to
332 * contain the bytes in 'key_bytes'. 'tbl_blk_ptr' may be a
333 * HashTable or HashBlock.
334 */
335 #define HASHENTRY_SET_KEY(tbl_blk_ptr, entry_ptr, key_bytes) \
336 memcpy(HASHENTRY_GET_KEY(tbl_blk_ptr, entry_ptr), (key_bytes), \
337 HASH_GET_KEY_LEN(tbl_blk_ptr))
338
339 /*
340 * Return 1 if the bytes in 'value_ptr' match the empty value,
341 * otherwise 0. 'tbl_blk_ptr' may be a HashTable or HashBlock.
342 */
343 #define HASH_VALUE_ISEMPTY(tbl_blk_ptr, value_ptr) \
344 (0 == memcmp((value_ptr), (tbl_blk_ptr)->table->no_value_ptr, \
345 HASH_GET_VALUE_LEN(tbl_blk_ptr)))
346
347 /*
348 * Return 1 if the value part of the entry at 'entry_ptr' matches
349 * the empty value, otherwise 0. 'tbl_blk_ptr' may be a HashTable
350 * or a HashBlock.
351 */
352 #define HASHENTRY_ISEMPTY(tbl_blk_ptr, entry_ptr) \
353 HASH_VALUE_ISEMPTY(tbl_blk_ptr, \
354 HASHENTRY_GET_VALUE((tbl_blk_ptr), (entry_ptr)))
355
356 /*
357 * Get a pointer to the entry at index 'hash_index' in 'blk_ptr',
358 * which must be a HashBlock.
359 */
360 #define HASH_ENTRY_AT(blk_ptr, hash_index) \
361 ((blk_ptr)->data_ptr + (HASH_GET_ENTRY_LEN(blk_ptr) * (hash_index)))
362
363 /*
364 * Get a pointer to the storage key part of the entry at index
365 * 'hash_index' in 'blk_ptr' which must be a HashBlock.
366 */
367 #define HASH_KEY_AT(blk_ptr, hash_index) \
368 HASHENTRY_GET_KEY((blk_ptr), HASH_ENTRY_AT((blk_ptr), (hash_index)))
369
370 /*
371 * Get a pointer to the value part of the entry at index
372 * 'hash_index' in 'blk_ptr' which must be a HashBlock.
373 */
374 #define HASH_VALUE_AT(blk_ptr, hash_index) \
375 HASHENTRY_GET_VALUE((blk_ptr), HASH_ENTRY_AT((blk_ptr), (hash_index)))
376
377 /*
378 * Increment the value of 'member' in the hashlib_stats structure
379 * for 'tbl_blk_ptr', which may be a HashTable or a HashBlock.
380 */
381 #ifndef HASHLIB_RECORD_STATS
382 #define HASH_STAT_INCR(tbl_blk_ptr, member)
383 #else
384 /* need to cast away the "const" */
385 #define HASH_STAT_INCR(tbl_blk_ptr, member) \
386 ++(((HashTable *)(tbl_blk_ptr)->table)->ht_stats. member )
387 #endif
388
389
390 #ifdef NDEBUG
391 #define HASH_ASSERT_SIZE_IS_POWER_2(blk_size)
392 #else
393 #define HASH_ASSERT_SIZE_IS_POWER_2(blk_size) \
394 do { \
395 uint64_t high_bits; \
396 BITS_IN_WORD64(&high_bits, (blk_size)); \
397 assert(1 == high_bits); \
398 } while(0)
399 #endif /* NDEBUG */
400
401
402 /* FUNCTION DEFINITIONS */
403
404 HashTable *
hashlib_create_table(uint8_t key_len,uint8_t value_len,uint8_t value_type,uint8_t * no_value_ptr,uint8_t * appdata_ptr,uint32_t appdata_size,uint64_t estimated_count,uint8_t load_factor)405 hashlib_create_table(
406 uint8_t key_len,
407 uint8_t value_len,
408 uint8_t value_type,
409 uint8_t *no_value_ptr,
410 uint8_t *appdata_ptr,
411 uint32_t appdata_size,
412 uint64_t estimated_count,
413 uint8_t load_factor)
414 {
415 HashTable *table_ptr = NULL;
416 HashBlock *block_ptr = NULL;
417 uint64_t initial_entries;
418
419 /* Validate arguments */
420 if (0 == key_len || 0 == value_len) {
421 TRACEMSG(1,(TRC_FMT "create table: invalid width key %u, value %u",
422 TRC_ARG(table_ptr), key_len, value_len));
423 assert(0);
424 return NULL;
425 }
426
427 /* Allocate memory for the table and initialize attributes. */
428 table_ptr = (HashTable*)calloc(1, sizeof(HashTable));
429 if (table_ptr == NULL) {
430 TRACEMSG(1,(TRC_FMT "Failed to allocate new HashTable.",
431 TRC_ARG(table_ptr)));
432 return NULL;
433 }
434
435 /* Initialize the table structure */
436 table_ptr->table = table_ptr;
437 table_ptr->key_len = key_len;
438 table_ptr->value_len = value_len;
439 table_ptr->load_factor = load_factor;
440
441 TRACEMSG(3,
442 (TRC_FMT "key_len %u, value_len %u, entry_len %u, load_factor %u",
443 TRC_ARG(table_ptr), key_len, value_len,
444 key_len + value_len, load_factor));
445
446 /* Application data */
447 SK_UNUSED_PARAM(value_type);
448 SK_UNUSED_PARAM(appdata_ptr);
449 SK_UNUSED_PARAM(appdata_size);
450
451 /* Initialize value_ptr to string of zero-valued bytes if NULL */
452 table_ptr->no_value_ptr = (uint8_t*)calloc(value_len, sizeof(uint8_t));
453 if (table_ptr->no_value_ptr == NULL) {
454 TRACEMSG(1,((TRC_FMT "Failed to allocate new no_value_ptr"
455 " for new HashTable."), TRC_ARG(table_ptr)));
456 free(table_ptr);
457 return NULL;
458 }
459 if (no_value_ptr == NULL) {
460 table_ptr->can_memset_val = 1;
461 } else if (table_ptr->value_len == 1) {
462 table_ptr->can_memset_val = 1;
463 table_ptr->no_value_ptr[0] = no_value_ptr[0];
464 } else {
465 /* Fill the table's no_value_ptr with the first byte of the
466 * caller's no_value_ptr and then compare the complete
467 * no_value_ptr values to determine whether we can use
468 * memset() to initialize the values of new memory blocks. */
469 memset(table_ptr->no_value_ptr, no_value_ptr[0], value_len);
470 if (memcmp(table_ptr->no_value_ptr, no_value_ptr, value_len)) {
471 /* values differ; cannot use memset */
472 table_ptr->can_memset_val = 0;
473 memcpy(table_ptr->no_value_ptr, no_value_ptr, value_len);
474 } else {
475 /* values are the same; use memset */
476 table_ptr->can_memset_val = 1;
477 }
478 }
479
480 /* Compute the maximum number of entries for the initial block */
481 hashlib_compute_max_initial_entries(table_ptr);
482
483 /*
484 * Calculate the number of entres in the initial block. This is a
485 * power of 2 with at least MIN_BLOCK_ENTRIES entries that
486 */
487 /* account for the load factor */
488 initial_entries = (estimated_count << 8) / table_ptr->load_factor;
489 /* compute power of two greater than initial_entries */
490 initial_entries = UINT64_C(1) << (1 + skIntegerLog2(initial_entries));
491
492 TRACEMSG(2,((TRC_FMT "estimated_count %" PRIu64 ", initial_entries %"
493 PRIu64 " (calculated %" PRIu64 "), min_entries %" PRIu64
494 ", max_entries %" PRIu64),
495 TRC_ARG(table_ptr), estimated_count, initial_entries,
496 ((estimated_count << 8) / table_ptr->load_factor),
497 MIN_BLOCK_ENTRIES, HASH_GET_MAX_BLOCK_ENTRIES(table_ptr)));
498
499 if (initial_entries < MIN_BLOCK_ENTRIES) {
500 initial_entries = MIN_BLOCK_ENTRIES;
501 TRACEMSG(2,(TRC_FMT "adjusted initial_entries to %" PRIu64,
502 TRC_ARG(table_ptr), initial_entries));
503 } else {
504 if (initial_entries > HASH_GET_MAX_BLOCK_ENTRIES(table_ptr)) {
505 initial_entries = HASH_GET_MAX_BLOCK_ENTRIES(table_ptr);
506 }
507 TRACEMSG(2,(TRC_FMT "adjusted initial_entries to %" PRIu64,
508 TRC_ARG(table_ptr), initial_entries));
509 assert(initial_entries >= MIN_BLOCK_ENTRIES);
510 }
511
512 TRACEMSG(1,(TRC_FMT "Adding block #0...", TRC_ARG(table_ptr)));
513
514 /* Start with one block */
515 table_ptr->num_blocks = 1;
516 block_ptr = hashlib_create_block(table_ptr, initial_entries);
517 while (NULL == block_ptr) {
518 TRACEMSG(1,(TRC_FMT "Adding block #0 failed.", TRC_ARG(table_ptr)));
519 if (initial_entries <= MIN_BLOCK_ENTRIES) {
520 table_ptr->num_blocks = 0;
521 hashlib_free_table(table_ptr);
522 return NULL;
523 }
524 initial_entries >>= 1;
525 TRACEMSG(2,(TRC_FMT "adjusted initial_entries to %" PRIu64,
526 TRC_ARG(table_ptr), initial_entries));
527 assert(initial_entries >= MIN_BLOCK_ENTRIES);
528 }
529 table_ptr->block_ptrs[0] = block_ptr;
530
531 TRACEMSG(1,(TRC_FMT "Added block #%u.",
532 TRC_ARG(table_ptr), table_ptr->num_blocks - 1));
533
534 return table_ptr;
535 }
536
537
538 void
hashlib_free_table(HashTable * table_ptr)539 hashlib_free_table(
540 HashTable *table_ptr)
541 {
542 unsigned int k;
543
544 if (NULL == table_ptr) {
545 return;
546 }
547
548 TRACEMSG(1,(TRC_FMT "Freeing HashTable...", TRC_ARG(table_ptr)));
549 /* Free all the blocks in the table */
550 for (k = 0; k < table_ptr->num_blocks; ++k) {
551 TRACEMSG(2,(TRC_FMT "Freeing block #%u", TRC_ARG(table_ptr), k));
552 hashlib_free_block(table_ptr->block_ptrs[k]);
553 }
554
555 /* Free the empty pointer memory */
556 free(table_ptr->no_value_ptr);
557
558 /* Free the table structure itself */
559 free(table_ptr);
560 TRACEMSG(1,(TRC_FMT "Freed HashTable.", TRC_ARG(table_ptr)));
561 }
562
563
564 /*
565 * NOTE: Assumes block_entries is a power of 2. Very important!
566 */
567 static HashBlock *
hashlib_create_block(HashTable * table_ptr,uint64_t block_entries)568 hashlib_create_block(
569 HashTable *table_ptr,
570 uint64_t block_entries)
571 {
572 HashBlock *block_ptr;
573 uint64_t block_bytes;
574
575 HASH_ASSERT_SIZE_IS_POWER_2(block_entries);
576
577 HASH_STAT_INCR(table_ptr, blocks_allocated);
578
579 block_bytes = block_entries * HASH_GET_ENTRY_LEN(table_ptr);
580
581 TRACEMSG(1,((TRC_FMT "Creating block; requesting %#" PRIx64
582 " %" PRIu64 "-byte entries (%" PRIu64 " bytes)..."),
583 TRC_ARG(table_ptr), block_entries,
584 HASH_GET_ENTRY_LEN(table_ptr), block_bytes));
585
586 #if SIZE_MAX < UINT64_MAX
587 /* verify we do not overflow the size of a size_t */
588 if (block_bytes > SIZE_MAX) {
589 TRACEMSG(1,(TRC_FMT "Cannot create block; size exceeds SIZE_MAX.",
590 TRC_ARG(table_ptr)));
591 return NULL;
592 }
593 #endif
594
595 /* Allocate memory for the block and initialize attributes. */
596 block_ptr = (HashBlock*)malloc(sizeof(HashBlock));
597 if (block_ptr == NULL) {
598 TRACEMSG(1,(TRC_FMT "Failed to allocate new HashBlock.",
599 TRC_ARG(table_ptr)));
600 return NULL;
601 }
602 block_ptr->data_ptr = (uint8_t*)malloc(block_bytes);
603 if (block_ptr->data_ptr == NULL) {
604 free(block_ptr);
605 TRACEMSG(1,(TRC_FMT "Failed to allocate new data block.",
606 TRC_ARG(table_ptr)));
607 return NULL;
608 }
609
610 block_ptr->table = table_ptr;
611 block_ptr->max_entries = block_entries;
612 block_ptr->num_entries = 0;
613 block_ptr->block_full = table_ptr->load_factor * (block_entries >> 8);
614
615 /* Copy "empty" value to each entry. Garbage key values are
616 * ignored, so we don't bother writing to the keys. When the
617 * application overestimates the amount of memory needed, this can
618 * be bottleneck. */
619 if (table_ptr->can_memset_val) {
620 memset(block_ptr->data_ptr, table_ptr->no_value_ptr[0], block_bytes);
621 } else {
622 uint8_t *data_ptr;
623 uint64_t i;
624
625 /* Initialize 'data_ptr' to point to at the value part of the
626 * first entry in the block; move 'data_ptr' to the next value
627 * at every iteration */
628 for (i = 0, data_ptr = HASH_VALUE_AT(block_ptr, 0);
629 i < block_ptr->max_entries;
630 ++i, data_ptr += HASH_GET_ENTRY_LEN(table_ptr))
631 {
632 memcpy(data_ptr, table_ptr->no_value_ptr,
633 HASH_GET_VALUE_LEN(block_ptr));
634 }
635 }
636
637 return block_ptr;
638 }
639
640
641 static void
hashlib_free_block(HashBlock * block_ptr)642 hashlib_free_block(
643 HashBlock *block_ptr)
644 {
645 /* Free the data and the block itself */
646 assert(block_ptr);
647 free(block_ptr->data_ptr);
648 free(block_ptr);
649 }
650
651
652 /*
653 * Rehash entire table into a single block.
654 */
655 int
hashlib_rehash(HashTable * table_ptr)656 hashlib_rehash(
657 HashTable *table_ptr)
658 {
659 const uint64_t max_entries = HASH_GET_MAX_BLOCK_ENTRIES(table_ptr);
660 HashBlock *new_block_ptr = NULL;
661 HashBlock *block_ptr = NULL;
662 uint64_t num_entries = 0;
663 uint64_t initial_entries;
664 const uint8_t *key_ref;
665 const uint8_t *val_ref;
666 uint8_t *entry_ptr;
667 uint8_t *new_entry_ptr;
668 int rv;
669 unsigned int k;
670 uint64_t i;
671
672 HASH_STAT_INCR(table_ptr, rehashes);
673
674 if (table_ptr->is_sorted) {
675 TRACEMSG(1,(TRC_FMT "ERROR: Attempt to rehash a sorted HashTable",
676 TRC_ARG(table_ptr)));
677 assert(0 == table_ptr->is_sorted);
678 return ERR_SORTTABLE;
679 }
680
681 /*
682 * Count the total number of entries so we know what we need to
683 * allocate. We base this on the actual size of the blocks,
684 * and use the power of 2 that's double the smallest power of 2
685 * bigger than the sum of block sizes. It's justified by the
686 * intuition that once we reach this point, we've decided that
687 * we're going to explore an order of magnitude larger
688 * table. This particular scheme seems to work well in practice
689 * although it's difficult to justify theoretically--this is a
690 * rather arbitrary definition of "order of magnitude".
691 */
692 for (k = 0; k < table_ptr->num_blocks; ++k) {
693 num_entries += table_ptr->block_ptrs[k]->max_entries;
694 }
695 assert(num_entries > 0);
696
697 TRACEMSG(1,((TRC_FMT "Rehashing table having %" PRIu64
698 " %" PRIu64 "-byte entries..."),
699 TRC_ARG(table_ptr), num_entries,
700 HASH_GET_ENTRY_LEN(table_ptr)));
701
702 if (num_entries >= max_entries) {
703 TRACEMSG(1,((TRC_FMT "Too many entries for rehash; "
704 " num_entries=%" PRIu64 " >= max_entries=%" PRIu64 "."),
705 TRC_ARG(table_ptr), num_entries, max_entries));
706 return ERR_OUTOFMEMORY;
707 }
708
709 /* Choose the size for the initial block as the next power of 2
710 * greater than the number of entries. */
711 initial_entries = UINT64_C(1) << (1 + skIntegerLog2(num_entries));
712 if (initial_entries < MIN_BLOCK_ENTRIES) {
713 initial_entries = MIN_BLOCK_ENTRIES;
714 }
715
716 /* double it once more unless we already have 2^28 entries */
717 if (((max_entries >> 1) > initial_entries)
718 && (initial_entries < UINT64_C(0x10000000)))
719 {
720 initial_entries <<= 1;
721 }
722 if (initial_entries > max_entries) {
723 TRACEMSG(1,((TRC_FMT "Will not rehash table; new initial_entries=%"
724 PRIu64 " > max_entries=%" PRIu64 "."),
725 TRC_ARG(table_ptr), initial_entries, max_entries));
726 return ERR_OUTOFMEMORY;
727 }
728
729 TRACEMSG(1,(TRC_FMT "Allocating new rehash block...", TRC_ARG(table_ptr)));
730
731 /* Create the new block */
732 new_block_ptr = hashlib_create_block(table_ptr, initial_entries);
733 if (new_block_ptr == NULL) {
734 TRACEMSG(1,((TRC_FMT "Allocating rehash block failed for %#" PRIx64
735 " entries."), TRC_ARG(table_ptr), initial_entries));
736 return ERR_OUTOFMEMORY;
737 }
738 TRACEMSG(1,(TRC_FMT "Allocated rehash block.", TRC_ARG(table_ptr)));
739
740 /* Walk through each block in the table looking for non-empty
741 * entries and insert them into the new block. */
742 for (k = table_ptr->num_blocks; k > 0; ) {
743 --k;
744 block_ptr = table_ptr->block_ptrs[k];
745 TRACEMSG(2,(TRC_FMT "Rehashing entries from block #%u",
746 TRC_ARG(table_ptr), k));
747
748 for (i = 0, entry_ptr = HASH_ENTRY_AT(block_ptr, 0);
749 i < block_ptr->max_entries;
750 ++i, entry_ptr += HASH_GET_ENTRY_LEN(block_ptr))
751 {
752 key_ref = HASHENTRY_GET_KEY(block_ptr, entry_ptr);
753 val_ref = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
754
755 /* If not empty, then copy the entry into the new block */
756 if (!HASH_VALUE_ISEMPTY(block_ptr, val_ref)) {
757 rv = hashlib_block_find_entry(new_block_ptr,
758 key_ref, &new_entry_ptr);
759 if (rv != ERR_NOTFOUND) {
760 /* value is not-empty, but we cannot find the key
761 * in the hash table. either the hashlib code is
762 * broken, or the user set a value to the
763 * no_value_ptr value and broke the collision
764 * resolution mechanism. if assert() is active,
765 * the next line will call abort(). */
766 TRACEMSG(1,((TRC_FMT "During the rehash, unexpectedly"
767 " found an existing key in the new block"),
768 TRC_ARG(table_ptr)));
769 assert(rv == ERR_NOTFOUND);
770 free(new_block_ptr);
771 table_ptr->num_blocks = 1 + k;
772 return ERR_INTERNALERROR;
773 }
774 /* Copy the key and value */
775 HASHENTRY_SET_KEY(new_block_ptr, new_entry_ptr, key_ref);
776 memcpy(HASHENTRY_GET_VALUE(new_block_ptr, new_entry_ptr),
777 val_ref, HASH_GET_VALUE_LEN(block_ptr));
778 ++new_block_ptr->num_entries;
779 HASH_STAT_INCR(table_ptr, rehash_inserts);
780 }
781 }
782
783 /* Free the block */
784 hashlib_free_block(block_ptr);
785 table_ptr->block_ptrs[k] = NULL;
786 } /* blocks */
787
788 /* Associate the new block with the table */
789 table_ptr->num_blocks = 1;
790 table_ptr->block_ptrs[0] = new_block_ptr;
791
792 TRACEMSG(1,(TRC_FMT "Rehashed table.", TRC_ARG(table_ptr)));
793
794 return OK;
795 }
796
797
798 /*
799 * Add a new block to a table.
800 */
801 static int
hashlib_add_block(HashTable * table_ptr,uint64_t new_block_entries)802 hashlib_add_block(
803 HashTable *table_ptr,
804 uint64_t new_block_entries)
805 {
806 HashBlock *block_ptr = NULL;
807
808 assert(table_ptr->num_blocks < HASH_MAX_BLOCKS);
809 if (table_ptr->num_blocks >= HASH_MAX_BLOCKS) {
810 TRACEMSG(1,((TRC_FMT "Cannot allocate another block:"
811 " num_blocks=%" PRIu32 " >= HASH_MAX_BLOCKS=%u."),
812 TRC_ARG(table_ptr), table_ptr->num_blocks,
813 HASH_MAX_BLOCKS));
814 return ERR_NOMOREBLOCKS;
815 }
816 /* Create the new block */
817 TRACEMSG(1,((TRC_FMT "Adding block #%u..."),
818 TRC_ARG(table_ptr), table_ptr->num_blocks));
819 block_ptr = hashlib_create_block(table_ptr, new_block_entries);
820 if (block_ptr == NULL) {
821 TRACEMSG(1,(TRC_FMT "Adding block #%u failed.",
822 TRC_ARG(table_ptr), table_ptr->num_blocks));
823 return ERR_OUTOFMEMORY;
824 }
825
826 /* Add it to the table */
827 table_ptr->block_ptrs[table_ptr->num_blocks] = block_ptr;
828 ++table_ptr->num_blocks;
829 TRACEMSG(1,(TRC_FMT "Added block #%u.",
830 TRC_ARG(table_ptr), table_ptr->num_blocks - 1));
831
832 return OK;
833 }
834
835
836 /*
837 * Using the maximum memory_size for the entire hash table (either
838 * from the environment or the HASH_MAX_MEMORY_TOTAL default),
839 * compute the maximum number of entries that the initial block is
840 * allowed to contain and update the hash table with that value.
841 */
842 static void
hashlib_compute_max_initial_entries(HashTable * table_ptr)843 hashlib_compute_max_initial_entries(
844 HashTable *table_ptr)
845 {
846 /* true if env contains an invalid value */
847 static int bad_env = 0;
848
849 /* assume the initial block has this size */
850 const uint64_t init_basis = (1 << 16);
851
852 /* based on the initial block having a size of 'init_basis',
853 * compute the total size of the hash table using the
854 * SECONDARY_BLOCK_FRACTION and the number of hash blocks */
855 uint64_t total_basis;
856
857 /* max number of entries in initial block */
858 uint64_t max_init_entry;
859
860 uint64_t max_memory = HASH_MAX_MEMORY_TOTAL;
861 const char *env;
862
863 /* Determine maximum memory footprint of the entire hash table */
864 env = getenv(HASHLIB_ENV_MAXMEM);
865 if (bad_env) {
866 /* do nothing */
867 } else if (env && *env) {
868 int rv = skStringParseHumanUint64(&max_memory, env, SK_HUMAN_NORMAL);
869 if (rv) {
870 bad_env = 1;
871 skAppPrintErr("Ignoring Invalid %s '%s': %s",
872 HASHLIB_ENV_MAXMEM, env, skStringParseStrerror(rv));
873 max_memory = HASH_MAX_MEMORY_TOTAL;
874 }
875 }
876
877 assert(HASH_MAX_BLOCKS > 2);
878 assert(REHASH_BLOCK_COUNT >= 2);
879 assert(REHASH_BLOCK_COUNT < HASH_MAX_BLOCKS);
880
881 /*
882 * When you continually take half of an initial value and add
883 * it to the initial value, the sum approaches 2. If you only
884 * take half N times, the sum is:
885 *
886 * init_value * 2 - (init_value >> (N - 1))
887 */
888
889 switch (SECONDARY_BLOCK_FRACTION) {
890 case -1:
891 /* Keep halving blocks until REHASH_BLOCK_COUNT blocks have
892 * been created, then size is constant */
893 total_basis = (init_basis * 2
894 + ((init_basis >> (REHASH_BLOCK_COUNT - 1))
895 * (HASH_MAX_BLOCKS - REHASH_BLOCK_COUNT - 1)));
896 break;
897 case -2:
898 /* First secondary block is 1/4 size of main block; other
899 * blocks are halved until REHASH_BLOCK_COUNT is reached */
900 total_basis = (init_basis + (init_basis >> 1)
901 - ((init_basis >> 2) >> (REHASH_BLOCK_COUNT - 2))
902 + ((init_basis >> REHASH_BLOCK_COUNT)
903 >> (HASH_MAX_BLOCKS - REHASH_BLOCK_COUNT)));
904 break;
905 case -3:
906 /* First secondary block is 1/2 size of main block; all others
907 * are 1/4 of main block */
908 total_basis = (init_basis + (init_basis >> 1)
909 + (init_basis >> 2) * (HASH_MAX_BLOCKS - 2));
910 break;
911 case -4:
912 /* First secondary block is 1/4 size of main block; all others
913 * are 1/8 of main block */
914 total_basis = (init_basis + (init_basis >> 2)
915 + (init_basis >> 3) * (HASH_MAX_BLOCKS - 2));
916 break;
917 case 0:
918 /* All blocks are the same size (shift size by 0) */
919 total_basis = init_basis * HASH_MAX_BLOCKS;
920 break;
921 default:
922 /* All secondary blocks shifted by SECONDARY_BLOCK_FRACTION
923 * from the main block */
924 if (SECONDARY_BLOCK_FRACTION < 0) {
925 skAbort();
926 }
927 total_basis = (init_basis + ((init_basis >> SECONDARY_BLOCK_FRACTION)
928 * (HASH_MAX_BLOCKS - 1)));
929 break;
930 }
931
932 /*
933 * this is the formula that would compute the total maximum
934 * size of the hash table
935 *
936 * total_size = initial_bins * record_size * total_basis / init_basis;
937 *
938 * using that formula, solve for initial_bins:
939 */
940 max_init_entry = ((double)max_memory / (double)total_basis
941 * (double)init_basis
942 / (double)HASH_GET_ENTRY_LEN(table_ptr));
943
944 /*
945 * Get the largest power of two less than max_init_entry.
946 */
947 table_ptr->max_init_entry = UINT64_C(1) << skIntegerLog2(max_init_entry);
948 if (table_ptr->max_init_entry < MIN_BLOCK_ENTRIES) {
949 table_ptr->max_init_entry = MIN_BLOCK_ENTRIES;
950 }
951
952 #if TRACEMSG_LEVEL >= 2
953 {
954 uint64_t total_size;
955
956 total_size = ((double)table_ptr->max_init_entry / (double)init_basis
957 * (double)HASH_GET_ENTRY_LEN(table_ptr)
958 * (double)total_basis);
959
960 TRACEMSG(2,((TRC_FMT "desired max mem = %" PRIu64
961 "; entry size = %" PRIu64
962 "; max total entry = %" PRIu64
963 "; total size / init block size = %.4f"
964 "; max initial entry = %" PRIu64
965 "; log2 initial entry = %" PRIu64
966 "; actual max memory = %" PRIu64
967 "; actual max/desired max = %.4f"),
968 TRC_ARG(table_ptr), max_memory,
969 HASH_GET_ENTRY_LEN(table_ptr),
970 max_memory / HASH_GET_ENTRY_LEN(table_ptr),
971 (double)total_basis/(double)init_basis,
972 max_init_entry, table_ptr->max_init_entry, total_size,
973 ((0 == max_memory)
974 ? DBL_MAX
975 : (double)total_size/max_memory)));
976 }
977 #endif /* TRACEMSG_LEVEL >= 2 */
978 }
979
980
981
982 /*
983 * Compute the size the next hash block.
984 */
985 static uint64_t
hashlib_compute_next_block_entries(HashTable * table_ptr)986 hashlib_compute_next_block_entries(
987 HashTable *table_ptr)
988 {
989 uint64_t block_entries = 0;
990
991 /* This condition will only be true when the primary block has
992 * reached the maximum block size. */
993 if (table_ptr->num_blocks >= REHASH_BLOCK_COUNT) {
994 return table_ptr->block_ptrs[table_ptr->num_blocks-1]->max_entries;
995 }
996 /* Otherwise, it depends on current parameters */
997 switch (SECONDARY_BLOCK_FRACTION) {
998 case -1:
999 /* Keep halving blocks */
1000 block_entries =
1001 (table_ptr->block_ptrs[table_ptr->num_blocks-1]->max_entries >> 1);
1002 break;
1003 case -2:
1004 if (table_ptr->num_blocks == 1) {
1005 /* First secondary block is 1/4 size of main block */
1006 block_entries = table_ptr->block_ptrs[0]->max_entries >>2;
1007 } else {
1008 /* Other secondary blocks are halved */
1009 block_entries =
1010 table_ptr->block_ptrs[table_ptr->num_blocks-1]->max_entries >>1;
1011 }
1012 break;
1013 case -3:
1014 if (table_ptr->num_blocks == 1) {
1015 /* First secondary block is 1/2 size of main block */
1016 block_entries = table_ptr->block_ptrs[0]->max_entries >> 1;
1017 } else {
1018 /* All others are 1/4 size of main block */
1019 block_entries = table_ptr->block_ptrs[0]->max_entries >> 2;
1020 }
1021 break;
1022 case -4:
1023 if (table_ptr->num_blocks == 1) {
1024 /* First secondary block is 1/4 size of main block */
1025 block_entries = table_ptr->block_ptrs[0]->max_entries >> 2;
1026 } else {
1027 /* All others are 1/8 size of main block */
1028 block_entries = table_ptr->block_ptrs[0]->max_entries >> 3;
1029 }
1030 break;
1031 case 0:
1032 /* All blocks are the same size */
1033 block_entries = table_ptr->block_ptrs[0]->max_entries;
1034 break;
1035 default:
1036 if (SECONDARY_BLOCK_FRACTION < 0) {
1037 skAppPrintErr("Invalid block fraction %d",
1038 SECONDARY_BLOCK_FRACTION);
1039 skAbort();
1040 }
1041 block_entries =
1042 (table_ptr->block_ptrs[0]->max_entries >> SECONDARY_BLOCK_FRACTION);
1043 break;
1044 }
1045 if (block_entries < MIN_BLOCK_ENTRIES) {
1046 block_entries = MIN_BLOCK_ENTRIES;
1047 }
1048 return block_entries;
1049 }
1050
1051 /*
1052 * Algorithm:
1053 * - If the primary block is at its maximum, never rehash, only add
1054 * new blocks.
1055 * - If we have a small table, then don't bother creating
1056 * secondary tables. Simply rehash into a new block.
1057 * - If we've exceeded the maximum number of blocks, rehash
1058 * into a new block.
1059 * - Otherwise, create a new block
1060 */
1061
1062 static int
hashlib_resize_table(HashTable * table_ptr)1063 hashlib_resize_table(
1064 HashTable *table_ptr)
1065 {
1066 uint64_t new_block_entries;
1067 int rv;
1068
1069 TRACEMSG(1,(TRC_FMT "Resizing the table...", TRC_ARG(table_ptr)));
1070
1071 /* Compute the (potential) size of the new block */
1072 new_block_entries = hashlib_compute_next_block_entries(table_ptr);
1073 assert(new_block_entries != 0);
1074
1075 /* If we're at the maximum number of blocks (which implies that
1076 * the first block is at its max, and we can't resize, then that's
1077 * it. */
1078 if (table_ptr->num_blocks == HASH_MAX_BLOCKS) {
1079 TRACEMSG(1,((TRC_FMT "Unable to resize table: no more blocks;"
1080 " table contains %" PRIu64 " %" PRIu64 "-byte entries"
1081 " in %" PRIu64 " buckets across %u blocks"),
1082 TRC_ARG(table_ptr), hashlib_count_entries(table_ptr),
1083 HASH_GET_ENTRY_LEN(table_ptr),
1084 hashlib_count_buckets(table_ptr), table_ptr->num_blocks));
1085 return ERR_NOMOREBLOCKS;
1086 }
1087 /* If the first block is at its maximum size or if we have tried
1088 * and failed to rehash in the past, then add a new block. Once we
1089 * reach the maximum block size, we don't rehash. Instead we keep
1090 * adding blocks until we reach the maximum. */
1091 if ((table_ptr->block_ptrs[0]->max_entries
1092 == HASH_GET_MAX_BLOCK_ENTRIES(table_ptr))
1093 || table_ptr->rehash_failed)
1094 {
1095 assert(new_block_entries > MIN_BLOCK_ENTRIES);
1096 return hashlib_add_block(table_ptr, new_block_entries);
1097 }
1098 /* If we have REHASH_BLOCK_COUNT blocks, or the new block would be
1099 * too small, we simply rehash. */
1100 if ((new_block_entries < MIN_BLOCK_ENTRIES) ||
1101 (table_ptr->num_blocks >= REHASH_BLOCK_COUNT))
1102 {
1103 TRACEMSG(1,((TRC_FMT "Resize table forcing rehash;"
1104 " new_block_entries = %#" PRIx64
1105 "; num_blocks = %u; REHASH_BLOCK_COUNT = %" PRIu32 "."),
1106 TRC_ARG(table_ptr), new_block_entries,
1107 table_ptr->num_blocks, REHASH_BLOCK_COUNT));
1108 rv = hashlib_rehash(table_ptr);
1109 if (rv != ERR_OUTOFMEMORY) {
1110 return rv;
1111 }
1112 /* rehashing failed. try instead to add a new (small) block */
1113 table_ptr->rehash_failed = 1;
1114 if (new_block_entries < MIN_BLOCK_ENTRIES) {
1115 new_block_entries = MIN_BLOCK_ENTRIES;
1116 }
1117 TRACEMSG(1,(TRC_FMT "Rehash failed; creating new block instead...",
1118 TRC_ARG(table_ptr)));
1119 }
1120 /* Assert several global invariants */
1121 assert(new_block_entries >= MIN_BLOCK_ENTRIES);
1122 assert(new_block_entries <= HASH_GET_MAX_BLOCK_ENTRIES(table_ptr));
1123 assert(table_ptr->num_blocks < HASH_MAX_BLOCKS);
1124
1125 /* Otherwise, add new a new block */
1126 return hashlib_add_block(table_ptr, new_block_entries);
1127 }
1128
1129
1130 #if 0
1131 static void
1132 assert_not_already_there(
1133 const HashTable *table_ptr,
1134 const uint8_t *key_ptr)
1135 {
1136 const uint8_t *entry_ptr;
1137 const HashBlock *block_ptr;
1138 unsigned int k;
1139 int rv;
1140
1141 for (k = 0; k < (table_ptr->num_blocks-1); ++k) {
1142 block_ptr = table_ptr->block_ptrs[k];
1143 rv = hashlib_block_find_entry(block_ptr, key_ptr, &entry_ptr);
1144 if (rv == OK) {
1145 getc(stdin);
1146 }
1147 }
1148 }
1149 #endif /* 0 */
1150
1151
1152 int
hashlib_insert(HashTable * table_ptr,const uint8_t * key_ptr,uint8_t ** value_pptr)1153 hashlib_insert(
1154 HashTable *table_ptr,
1155 const uint8_t *key_ptr,
1156 uint8_t **value_pptr)
1157 {
1158 HashBlock *block_ptr = NULL;
1159 uint8_t *entry_ptr = NULL;
1160 unsigned int k;
1161 int rv;
1162
1163 assert(table_ptr);
1164
1165 HASH_STAT_INCR(table_ptr, inserts);
1166
1167 if (table_ptr->is_sorted) {
1168 TRACEMSG(1,(TRC_FMT "Attempted an insert into a sorted HashTable",
1169 TRC_ARG(table_ptr)));
1170 assert(0 == table_ptr->is_sorted);
1171 return ERR_SORTTABLE;
1172 }
1173
1174 /* See if we are ready to do a resize by either adding a block or
1175 * rehashing. */
1176 if (HASH_BLOCK_IS_FULL(table_ptr->block_ptrs[table_ptr->num_blocks-1])){
1177 rv = hashlib_resize_table(table_ptr);
1178 if (rv != OK) {
1179 return rv;
1180 }
1181 }
1182 assert(table_ptr->num_blocks);
1183
1184 /* Look in each block for the key */
1185 for (k = 0; k < table_ptr->num_blocks; ++k) {
1186 block_ptr = table_ptr->block_ptrs[k];
1187 if (hashlib_block_find_entry(block_ptr, key_ptr, &entry_ptr) == OK) {
1188 /* Found entry, use it */
1189 *value_pptr = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
1190 return OK_DUPLICATE;
1191 }
1192 }
1193 assert(block_ptr);
1194
1195 /*
1196 * We did not find it; do an insert into the last block by
1197 * setting the key AND increasing the count. The caller will set
1198 * the value.
1199 *
1200 * NOTE: entry_ptr is pointer to the insert location and
1201 * block_ptr is pointing at the last block, and this is why we
1202 * first check whether need to grow the table.
1203 *
1204 * NOTE: Since we return a reference to the value, the user could
1205 * either not set the value or mistakenly set the value to the
1206 * 'no_value_ptr'. This is problematic, since the internal count
1207 * will have been incremented even though in essence no entry has
1208 * been added. This may lead to growing the table sooner than
1209 * necesssary.
1210 *
1211 * Even worse is if the user updates an existing entry's value to
1212 * the 'no_value_ptr' after there has been a collision on that
1213 * entry. Keys that collided can no longer be found in the table.
1214 */
1215 *value_pptr = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
1216 HASHENTRY_SET_KEY(block_ptr, entry_ptr, key_ptr);
1217 ++block_ptr->num_entries;
1218
1219 return OK;
1220 }
1221
1222
1223 int
hashlib_lookup(const HashTable * table_ptr,const uint8_t * key_ptr,uint8_t ** value_pptr)1224 hashlib_lookup(
1225 const HashTable *table_ptr,
1226 const uint8_t *key_ptr,
1227 uint8_t **value_pptr)
1228 {
1229 const HashBlock *block_ptr;
1230 uint8_t *entry_ptr = NULL;
1231 unsigned int k;
1232
1233 assert(table_ptr);
1234
1235 HASH_STAT_INCR(table_ptr, lookups);
1236
1237 if (table_ptr->is_sorted) {
1238 TRACEMSG(1,(TRC_FMT "Attempt to lookup in a sorted HashTable",
1239 TRC_ARG(table_ptr)));
1240 assert(0 == table_ptr->is_sorted);
1241 return ERR_SORTTABLE;
1242 }
1243
1244 /* Look in each block for the key */
1245 for (k = 0; k < table_ptr->num_blocks; ++k) {
1246 block_ptr = table_ptr->block_ptrs[k];
1247 if (hashlib_block_find_entry(block_ptr, key_ptr, &entry_ptr) == OK) {
1248 /* Return pointer to the value in the entry structure */
1249 *value_pptr = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
1250 return OK;
1251 }
1252 }
1253 return ERR_NOTFOUND;
1254 }
1255
1256
1257 /*
1258 * If not found, points to insertion point,
1259 */
1260 static int
hashlib_block_find_entry(const HashBlock * block_ptr,const uint8_t * key_ptr,uint8_t ** entry_pptr)1261 hashlib_block_find_entry(
1262 const HashBlock *block_ptr,
1263 const uint8_t *key_ptr,
1264 uint8_t **entry_pptr)
1265 {
1266 #ifdef HASHLIB_RECORD_STATS
1267 int first_check = 1;
1268 #endif
1269 #ifndef NDEBUG
1270 uint32_t num_tries = 0;
1271 #endif
1272 uint64_t hash_index;
1273 uint64_t hash_value;
1274 uint64_t hash_probe_increment;
1275 /* seeds for the hashing function */
1276 uint32_t hash_primary = 0x53694c4b;
1277 uint32_t hash_secondary = 0x4361726e;
1278
1279 HASH_STAT_INCR(block_ptr, find_entries);
1280
1281 /*
1282 * First this code computes the hash for the key, the
1283 * 'hash_value'.
1284 *
1285 * The 'hash_value' is masked by the size of the block to
1286 * determine which bucket to check (the 'hash_index'). Since the
1287 * block size is a power of 2, masking can be used as a modulo
1288 * operation.
1289 *
1290 * If the bucket is empty, this function is done; the function
1291 * passes back a handle to that bucket and returns ERR_NOTFOUND.
1292 *
1293 * If the bucket is not empty, the function checks to see if
1294 * bucket's key matches the key passed into the function. The
1295 * comparison is done by memcmp()ing the keys. If the keys
1296 * match, the entry is found; the function passes back a handle
1297 * the bucket and returns OK.
1298 *
1299 * If the keys do not match, it means multiple keys return the
1300 * same hash value; that is, there is a collision. A new bucket
1301 * is selected by incrementing the 'hash_value' by the
1302 * 'hash_probe_increment' value and masking the result by the
1303 * block size.
1304 *
1305 * The process repeats until either an empty bucket is found or
1306 * the keys match.
1307 *
1308 * This collision resolution mechanism is what makes removal
1309 * impossible. To allow removal, we would need either to define
1310 * a special "deleted-entry" value or to rehash the table after
1311 * each deletion.
1312 *
1313 * Collision resolution is also why the caller should never
1314 * update a bucket's value to the no_value_ptr value---though
1315 * there is no way for the hashlib code to enforce this.
1316 */
1317 hash(key_ptr, HASH_GET_KEY_LEN(block_ptr), &hash_primary, &hash_secondary);
1318 hash_value = hash_primary + (((uint64_t)hash_secondary) << UINT64_C(32));
1319 hash_probe_increment = hash_value | 0x01; /* must be odd */
1320 for (;;) {
1321 hash_index = hash_value & (block_ptr->max_entries - 1);
1322
1323 *entry_pptr = HASH_ENTRY_AT(block_ptr, hash_index);
1324 if (HASHENTRY_ISEMPTY(block_ptr, *entry_pptr)) {
1325 /* Hit an empty entry, we're done. */
1326 return ERR_NOTFOUND;
1327 }
1328 /* compare the keys */
1329 if (0 == memcmp(HASHENTRY_GET_KEY(block_ptr, *entry_pptr),
1330 key_ptr, HASH_GET_KEY_LEN(block_ptr)))
1331 {
1332 /* Found a match, we're done */
1333 return OK;
1334 }
1335
1336 /* increment the hash value */
1337 hash_value += hash_probe_increment;
1338 assert(++num_tries < block_ptr->max_entries);
1339 #ifdef HASHLIB_RECORD_STATS
1340 if (first_check) {
1341 first_check = 0;
1342 HASH_STAT_INCR(block_ptr, find_collisions);
1343 }
1344 HASH_STAT_INCR(block_ptr, collision_hops);
1345 #endif /* HASHLIB_RECORD_STATS */
1346 }
1347 }
1348
1349
1350 #ifdef HASHLIB_RECORD_STATS
1351 void
hashlib_clear_stats(HashTable * table_ptr)1352 hashlib_clear_stats(
1353 HashTable *table_ptr)
1354 {
1355 if (table_ptr) {
1356 memset(&table_ptr->ht_stats, 0, sizeof(table_ptr->ht_stats));
1357 }
1358 }
1359
1360 void
hashlib_get_stats(const HashTable * table_ptr,hashlib_stats_t * hash_stats)1361 hashlib_get_stats(
1362 const HashTable *table_ptr,
1363 hashlib_stats_t *hash_stats)
1364 {
1365 if (table_ptr && hash_stats) {
1366 memcpy(hash_stats, &table_ptr->ht_stats, sizeof(table_ptr->ht_stats));
1367 }
1368 }
1369
1370 void
hashlib_print_stats(FILE * fp,const HashTable * table_ptr)1371 hashlib_print_stats(
1372 FILE *fp,
1373 const HashTable *table_ptr)
1374 {
1375 const hashlib_stats_t *hts = &table_ptr->ht_stats;
1376
1377 fprintf(fp, "Accumulated hashtable statistics:\n");
1378 fprintf(fp, " %" PRIu32 " total allocations.\n", hts->blocks_allocated);
1379 fprintf(fp, " %" PRIu64 " total inserts.\n", hts->inserts);
1380 fprintf(fp, " %" PRIu64 " total lookups.\n", hts->lookups);
1381 fprintf(fp, " %" PRIu32 " total rehashes.\n", hts->rehashes);
1382 fprintf(fp, " %" PRIu64 " inserts due to rehashing.\n",
1383 hts->rehash_inserts);
1384 fprintf(fp, " %" PRIu64 " total finds.\n", hts->find_entries);
1385 fprintf(fp, " %" PRIu64 " total find collisions.\n",
1386 hts->find_collisions);
1387 fprintf(fp, " %" PRIu64 " total collision hops.\n",
1388 hts->collision_hops);
1389 }
1390 #endif /* HASHLIB_RECORD_STATS */
1391
1392
1393 HASH_ITER
hashlib_create_iterator(const HashTable UNUSED (* table_ptr))1394 hashlib_create_iterator(
1395 const HashTable UNUSED(*table_ptr))
1396 {
1397 HASH_ITER iter;
1398
1399 memset(&iter, 0, sizeof(HASH_ITER));
1400 iter.block = HASH_ITER_BEGIN;
1401 return iter;
1402 }
1403
1404
1405 int
hashlib_iterate(const HashTable * table_ptr,HASH_ITER * iter_ptr,uint8_t ** key_pptr,uint8_t ** val_pptr)1406 hashlib_iterate(
1407 const HashTable *table_ptr,
1408 HASH_ITER *iter_ptr,
1409 uint8_t **key_pptr,
1410 uint8_t **val_pptr)
1411 {
1412 #if TRACEMSG_LEVEL >= 2
1413 static uint64_t so_far = 0;
1414 #endif
1415 HashBlock *block_ptr;
1416 uint8_t *entry_ptr;
1417
1418 if (iter_ptr->block == HASH_ITER_END) {
1419 return ERR_NOMOREENTRIES;
1420 }
1421
1422 if (table_ptr->is_sorted && table_ptr->num_blocks > 1) {
1423 /* Use sorted iterator if we should */
1424 return hashlib_iterate_sorted(table_ptr, iter_ptr, key_pptr, val_pptr);
1425 }
1426
1427 /* Start at the first entry in the first block or increment the
1428 * iterator to start looking at the next entry. */
1429 if (iter_ptr->block == HASH_ITER_BEGIN) {
1430 /* Initialize the iterator. */
1431 memset(iter_ptr, 0, sizeof(HASH_ITER));
1432 #if TRACEMSG_LEVEL >= 2
1433 TRACEMSG(2,(TRC_FMT "Iterate. Starting to iterate over HashTable...",
1434 TRC_ARG(table_ptr)));
1435 so_far = 0;
1436 #endif
1437 } else {
1438 ++iter_ptr->index;
1439 }
1440
1441 /* Walk through indices of current block until we find a
1442 * non-empty. Once we reach the end of the block, move on to the
1443 * next block. */
1444 while (iter_ptr->block < table_ptr->num_blocks) {
1445
1446 /* Select the current block */
1447 block_ptr = table_ptr->block_ptrs[iter_ptr->block];
1448
1449 /* Find the next non-empty entry in the current block (if
1450 * there is one). */
1451 for (entry_ptr = HASH_ENTRY_AT(block_ptr, iter_ptr->index);
1452 iter_ptr->index < block_ptr->max_entries;
1453 ++iter_ptr->index, entry_ptr += HASH_GET_ENTRY_LEN(block_ptr))
1454 {
1455 if (!HASHENTRY_ISEMPTY(block_ptr, entry_ptr)) {
1456 /* We found an entry, return it */
1457 *key_pptr = HASHENTRY_GET_KEY(block_ptr, entry_ptr);
1458 *val_pptr = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
1459 #if TRACEMSG_LEVEL >= 2
1460 ++so_far;
1461 #endif
1462 return OK;
1463 }
1464 }
1465
1466 /* At the end of the block. */
1467 TRACEMSG(2,((TRC_FMT "Iterate. Finished block #%u containing %" PRIu64
1468 " entries. Total visted %" PRIu64),
1469 TRC_ARG(table_ptr), iter_ptr->block,
1470 block_ptr->num_entries, so_far));
1471
1472 /* try the next block */
1473 ++iter_ptr->block;
1474 iter_ptr->index = 0;
1475 }
1476
1477 /* We're past the last entry of the last block, so we're done. */
1478 *key_pptr = NULL;
1479 *val_pptr = NULL;
1480 iter_ptr->block = HASH_ITER_END;
1481 TRACEMSG(2,(TRC_FMT "Iterate. No more entries. Total visited %"
1482 PRIu64, TRC_ARG(table_ptr), so_far));
1483
1484 return ERR_NOMOREENTRIES;
1485 }
1486
1487
1488 static int
hashlib_iterate_sorted(const HashTable * table_ptr,HASH_ITER * iter_ptr,uint8_t ** key_pptr,uint8_t ** val_pptr)1489 hashlib_iterate_sorted(
1490 const HashTable *table_ptr,
1491 HASH_ITER *iter_ptr,
1492 uint8_t **key_pptr,
1493 uint8_t **val_pptr)
1494 {
1495 uint8_t *lowest_entry = NULL;
1496 unsigned int k;
1497
1498 assert(iter_ptr->block != HASH_ITER_END);
1499
1500 /* Start at the first entry in the first block or increment the
1501 * iterator to start looking at the next entry. */
1502 if (iter_ptr->block == HASH_ITER_BEGIN) {
1503 /* Initialize the iterator. */
1504 memset(iter_ptr, 0, sizeof(HASH_ITER));
1505 TRACEMSG(2,((TRC_FMT "Iterate. Starting to iterate"
1506 " over sorted HashTable..."), TRC_ARG(table_ptr)));
1507 } else {
1508 /* Increment the pointer in the block from which we took the
1509 * entry last time. */
1510 ++iter_ptr->block_idx[iter_ptr->block];
1511 }
1512
1513 /* Find the first available value across all blocks; this is our
1514 * arbitrary "lowest" value. */
1515 for (k = 0; k < table_ptr->num_blocks; ++k) {
1516 if (iter_ptr->block_idx[k] < table_ptr->block_ptrs[k]->num_entries) {
1517 iter_ptr->block = k;
1518 lowest_entry = HASH_ENTRY_AT(table_ptr->block_ptrs[k],
1519 iter_ptr->block_idx[k]);
1520 break;
1521 }
1522 }
1523
1524 if (k == table_ptr->num_blocks) {
1525 /* We've processed all blocks. Done. */
1526 *key_pptr = NULL;
1527 *val_pptr = NULL;
1528 iter_ptr->block = HASH_ITER_END;
1529 TRACEMSG(2,(TRC_FMT "Iterate. No more entries.", TRC_ARG(table_ptr)));
1530 return ERR_NOMOREENTRIES;
1531 }
1532
1533 /* Compare our arbitrary "lowest" with every remaining block to
1534 * find the actual lowest. */
1535 for ( ++k; k < table_ptr->num_blocks; ++k) {
1536 if ((iter_ptr->block_idx[k] < table_ptr->block_ptrs[k]->num_entries)
1537 && (table_ptr->cmp_fn(HASH_ENTRY_AT(table_ptr->block_ptrs[k],
1538 iter_ptr->block_idx[k]),
1539 lowest_entry,
1540 table_ptr->cmp_userdata)
1541 < 0))
1542 {
1543 iter_ptr->block = k;
1544 lowest_entry = HASH_ENTRY_AT(table_ptr->block_ptrs[k],
1545 iter_ptr->block_idx[k]);
1546 }
1547 }
1548
1549 /* return lowest */
1550 *key_pptr = HASHENTRY_GET_KEY(table_ptr, lowest_entry);
1551 *val_pptr = HASHENTRY_GET_VALUE(table_ptr, lowest_entry);
1552 return OK;
1553 }
1554
1555
1556 uint64_t
hashlib_count_buckets(const HashTable * table_ptr)1557 hashlib_count_buckets(
1558 const HashTable *table_ptr)
1559 {
1560 unsigned int k;
1561 uint64_t total = 0;
1562
1563 for (k = 0; k < table_ptr->num_blocks; ++k) {
1564 total += table_ptr->block_ptrs[k]->max_entries;
1565 }
1566 return total;
1567 }
1568
1569
1570 uint64_t
hashlib_count_entries(const HashTable * table_ptr)1571 hashlib_count_entries(
1572 const HashTable *table_ptr)
1573 {
1574 unsigned int k;
1575 uint64_t total = 0;
1576
1577 for (k = 0; k < table_ptr->num_blocks; ++k) {
1578 total += table_ptr->block_ptrs[k]->num_entries;
1579 TRACEMSG(3,((TRC_FMT "entry count for block #%u is %" PRIu64 "."),
1580 TRC_ARG(table_ptr), k,
1581 table_ptr->block_ptrs[k]->num_entries));
1582 }
1583 return total;
1584 }
1585
1586
1587 uint64_t
hashlib_count_nonempties(const HashTable * table_ptr)1588 hashlib_count_nonempties(
1589 const HashTable *table_ptr)
1590 {
1591 const HashBlock *block_ptr;
1592 const uint8_t *entry_ptr;
1593 unsigned int k;
1594 uint64_t total;
1595 uint64_t count;
1596 uint64_t i;
1597
1598 total = 0;
1599 for (k = 0; k < table_ptr->num_blocks; ++k) {
1600 block_ptr = table_ptr->block_ptrs[k];
1601 count = 0;
1602 for (i = 0, entry_ptr = HASH_ENTRY_AT(block_ptr, 0);
1603 i < block_ptr->max_entries;
1604 ++i, entry_ptr += HASH_GET_ENTRY_LEN(block_ptr))
1605 {
1606 if (!HASHENTRY_ISEMPTY(block_ptr, entry_ptr)) {
1607 ++count;
1608 }
1609 }
1610 total += count;
1611 TRACEMSG(2,((TRC_FMT "nonempty count for block #%u is %" PRIu64 "."),
1612 TRC_ARG(table_ptr), k, count));
1613 }
1614 return total;
1615 }
1616
1617
1618 /*
1619 * Callback function used by hashlib_sort_entries().
1620 *
1621 * Compare keys in 'a' and 'b' where the length of the keys is
1622 * given by 'v_length'.
1623 */
1624 static int
hashlib_memcmp_keys(const void * a,const void * b,void * v_length)1625 hashlib_memcmp_keys(
1626 const void *a,
1627 const void *b,
1628 void *v_length)
1629 {
1630 return memcmp(a, b, *(size_t*)v_length);
1631 }
1632
1633
1634 /*
1635 * move the entries in each block to the front of the block, in
1636 * preparation for sorting the entries
1637 */
1638 static void
hashlib_make_contiguous(HashTable * table_ptr)1639 hashlib_make_contiguous(
1640 HashTable *table_ptr)
1641 {
1642 HashBlock *block_ptr;
1643 uint64_t i, j;
1644 unsigned int k;
1645 uint8_t *entry_i;
1646 uint8_t *entry_j;
1647 const uint64_t entry_len = HASH_GET_ENTRY_LEN(table_ptr);
1648 const uint64_t value_len = HASH_GET_VALUE_LEN(table_ptr);
1649
1650 TRACEMSG(1,(TRC_FMT "Making the HashTable contiguous...",
1651 TRC_ARG(table_ptr)));
1652
1653 for (k = 0; k < table_ptr->num_blocks; ++k) {
1654 TRACEMSG(2,(TRC_FMT "Making block #%u contiguous",
1655 TRC_ARG(table_ptr), k));
1656 block_ptr = table_ptr->block_ptrs[k];
1657 if (0 == block_ptr->num_entries) {
1658 continue;
1659 }
1660
1661 /* 'j' starts at the front of the block and moves forward to
1662 * find empty entries. 'i' starts at the end of the block
1663 * and moves backward to find occupied entries. We move
1664 * non-empty entries from 'i' to 'j' to get rid of holes in
1665 * the block. Stop once i and j meet. */
1666 for (j = 0, entry_j = HASH_ENTRY_AT(block_ptr, 0);
1667 j < block_ptr->max_entries;
1668 ++j, entry_j += entry_len)
1669 {
1670 if (HASHENTRY_ISEMPTY(block_ptr, entry_j)) {
1671 break;
1672 }
1673 }
1674
1675 for (i = block_ptr->max_entries-1, entry_i =HASH_ENTRY_AT(block_ptr,i);
1676 i > j;
1677 --i, entry_i -= entry_len)
1678 {
1679 if (!HASHENTRY_ISEMPTY(block_ptr, entry_i)) {
1680 memcpy(entry_j, entry_i, entry_len);
1681 /* set i to the empty value */
1682 memcpy(HASHENTRY_GET_VALUE(block_ptr, entry_i),
1683 table_ptr->no_value_ptr, value_len);
1684 /* find next empty value */
1685 for (++j, entry_j += entry_len;
1686 j < i;
1687 ++j, entry_j += entry_len)
1688 {
1689 if (HASHENTRY_ISEMPTY(block_ptr, entry_j)) {
1690 break;
1691 }
1692 }
1693 }
1694 }
1695 assert(j <= block_ptr->num_entries);
1696 }
1697 TRACEMSG(1,(TRC_FMT "Made the HashTable contiguous.", TRC_ARG(table_ptr)));
1698 }
1699
1700
1701 int
hashlib_sort_entries_usercmp(HashTable * table_ptr,hashlib_sort_key_cmp_fn cmp_fn,void * cmp_userdata)1702 hashlib_sort_entries_usercmp(
1703 HashTable *table_ptr,
1704 hashlib_sort_key_cmp_fn cmp_fn,
1705 void *cmp_userdata)
1706 {
1707 HashBlock *block_ptr;
1708 const size_t entry_len = HASH_GET_ENTRY_LEN(table_ptr);
1709 unsigned int k;
1710
1711 TRACEMSG(1,(TRC_FMT "Sorting the HashTable...", TRC_ARG(table_ptr)));
1712
1713 if (NULL == cmp_fn) {
1714 return ERR_BADARGUMENT;
1715 }
1716 if (!table_ptr->is_sorted) {
1717 /* first call; make the data in each block contiguous */
1718 hashlib_make_contiguous(table_ptr);
1719 }
1720
1721 table_ptr->cmp_fn = cmp_fn;
1722 table_ptr->cmp_userdata = cmp_userdata;
1723
1724 /* we use qsort to sort each block individually; when iterating,
1725 * return the lowest value among all sorted blocks. */
1726 for (k = 0; k < table_ptr->num_blocks; ++k) {
1727 TRACEMSG(2,(TRC_FMT "Sorting block #%u...", TRC_ARG(table_ptr), k));
1728 block_ptr = table_ptr->block_ptrs[k];
1729 /* sort the block's entries */
1730 skQSort_r(block_ptr->data_ptr, block_ptr->num_entries,
1731 entry_len, table_ptr->cmp_fn, table_ptr->cmp_userdata);
1732 }
1733
1734 TRACEMSG(1,(TRC_FMT "Sorted the HashTable.", TRC_ARG(table_ptr)));
1735
1736 table_ptr->is_sorted = 1;
1737 return 0;
1738 }
1739
1740 int
hashlib_sort_entries(HashTable * table_ptr)1741 hashlib_sort_entries(
1742 HashTable *table_ptr)
1743 {
1744 /* pass the key length in the context pointer */
1745 table_ptr->keylen_cmp_userdata = HASH_GET_KEY_LEN(table_ptr);
1746 return hashlib_sort_entries_usercmp(table_ptr, &hashlib_memcmp_keys,
1747 &table_ptr->keylen_cmp_userdata);
1748 }
1749
1750
1751
1752 /*
1753 * ********************************************************************
1754 * DEBUGGING FUNCTIONS FOR PRINTING INFO ABOUT A TABLE
1755 * ********************************************************************
1756 */
1757
1758 static void
hashlib_dump_bytes(FILE * fp,const uint8_t * data_ptr,uint64_t data_len)1759 hashlib_dump_bytes(
1760 FILE *fp,
1761 const uint8_t *data_ptr,
1762 uint64_t data_len)
1763 {
1764 uint64_t j;
1765
1766 for (j = 0; j < data_len; ++j) {
1767 fprintf(fp, "%02x ", data_ptr[j]);
1768 }
1769 }
1770
1771
1772 static void
hashlib_dump_block_header(FILE * fp,const HashBlock * block_ptr)1773 hashlib_dump_block_header(
1774 FILE *fp,
1775 const HashBlock *block_ptr)
1776 {
1777 /* Dump header info */
1778 fprintf(fp, ("Block size: \t %" PRIu64 "\n"), block_ptr->max_entries);
1779 fprintf(fp, ("Num entries:\t %" PRIu64 " (%2.0f%% full)\n"),
1780 block_ptr->num_entries,
1781 100 * (float) block_ptr->num_entries / block_ptr->max_entries);
1782 fprintf(fp, "Key width:\t %u bytes\n", block_ptr->table->key_len);
1783 fprintf(fp, "Value width:\t %u bytes\n", block_ptr->table->value_len);
1784 fprintf(fp, "Load factor:\t %u = %2.0f%%\n",
1785 block_ptr->table->load_factor,
1786 100 * (float) block_ptr->table->load_factor / 255);
1787 fprintf(fp, "Empty value representation: ");
1788 hashlib_dump_bytes(fp, block_ptr->table->no_value_ptr,
1789 block_ptr->table->value_len);
1790 fprintf(fp, "\n");
1791 }
1792
1793
1794 static void
hashlib_dump_block(FILE * fp,const HashBlock * block_ptr)1795 hashlib_dump_block(
1796 FILE *fp,
1797 const HashBlock *block_ptr)
1798 {
1799 uint64_t i; /* Index of into hash table */
1800 uint64_t entry_index = 0;
1801 const uint8_t *entry_ptr;
1802
1803 hashlib_dump_block_header(fp, block_ptr);
1804 fprintf(fp, "Data Dump:\n");
1805 fprintf(fp, "----------\n");
1806 for (i = 0; i < block_ptr->max_entries; ++i) {
1807 entry_ptr = HASH_ENTRY_AT(block_ptr, i);
1808 /* Don't dump empty entries */
1809 if (HASHENTRY_ISEMPTY(block_ptr, entry_ptr)) {
1810 continue;
1811 }
1812 ++entry_index;
1813
1814 /* Dump hash index in table, the key and the value */
1815 fprintf(fp, ("%6" PRIu64 " (%" PRIu64 "). "), entry_index, i);
1816 hashlib_dump_bytes(fp, HASHENTRY_GET_KEY(block_ptr, entry_ptr),
1817 HASH_GET_KEY_LEN(block_ptr));
1818 fprintf(fp, " -- ");
1819 hashlib_dump_bytes(fp, HASHENTRY_GET_VALUE(block_ptr, entry_ptr),
1820 HASH_GET_VALUE_LEN(block_ptr));
1821 fprintf(fp, "\n");
1822 }
1823 }
1824
1825
1826 void
hashlib_dump_table(FILE * fp,const HashTable * table_ptr)1827 hashlib_dump_table(
1828 FILE *fp,
1829 const HashTable *table_ptr)
1830 {
1831 unsigned int k;
1832
1833 hashlib_dump_table_header(fp, table_ptr);
1834 for (k = 0; k < table_ptr->num_blocks; ++k) {
1835 fprintf(fp, "Block #%u:\n", k);
1836 hashlib_dump_block(fp, table_ptr->block_ptrs[k]);
1837 }
1838 }
1839
1840
1841 void
hashlib_dump_table_header(FILE * fp,const HashTable * table_ptr)1842 hashlib_dump_table_header(
1843 FILE *fp,
1844 const HashTable *table_ptr)
1845 {
1846 unsigned int k;
1847 HashBlock *block_ptr;
1848 uint64_t total_used_memory = 0;
1849 uint64_t total_data_memory = 0;
1850
1851 /* Dump header info */
1852 fprintf(fp, "Key width:\t %u bytes\n", table_ptr->key_len);
1853 fprintf(fp, "Value width:\t %d bytes\n", table_ptr->value_len);
1854 fprintf(fp, "Empty value:\t");
1855 hashlib_dump_bytes(fp, table_ptr->no_value_ptr, table_ptr->value_len);
1856 fprintf(fp, "\n");
1857 fprintf(fp, "Load factor:\t %d = %2.0f%%\n",
1858 table_ptr->load_factor,
1859 100 * (float) table_ptr->load_factor / 255);
1860 fprintf(fp, ("Table has %" PRIu8 " blocks:\n"), table_ptr->num_blocks);
1861 for (k = 0; k < table_ptr->num_blocks; ++k) {
1862 block_ptr = table_ptr->block_ptrs[k];
1863 total_data_memory +=
1864 HASH_GET_ENTRY_LEN(block_ptr) * block_ptr->max_entries;
1865 total_used_memory +=
1866 HASH_GET_ENTRY_LEN(block_ptr) * block_ptr->num_entries;
1867 fprintf(fp, (" Block #%u: %" PRIu64 "/%" PRIu64 " (%3.1f%%)\n"),
1868 k, block_ptr->num_entries, block_ptr->max_entries,
1869 100 * ((float)block_ptr->num_entries) / block_ptr->max_entries);
1870 }
1871 fprintf(fp, ("Total data memory: %" PRIu64 " bytes\n"),
1872 total_data_memory);
1873 fprintf(fp, ("Total allocated data memory: %" PRIu64 " bytes\n"),
1874 total_used_memory);
1875 fprintf(fp, ("Excess data memory: %" PRIu64 " bytes\n"),
1876 total_data_memory - total_used_memory);
1877 fprintf(fp, "\n");
1878 }
1879
1880
1881 /*
1882 ** Local Variables:
1883 ** mode:c
1884 ** indent-tabs-mode:nil
1885 ** c-basic-offset:4
1886 ** End:
1887 */
1888