1 /*
2 ** Copyright (C) 2001-2020 by Carnegie Mellon University.
3 **
4 ** @OPENSOURCE_LICENSE_START@
5 ** See license information in ../../LICENSE.txt
6 ** @OPENSOURCE_LICENSE_END@
7 */
8 
9 /* File: hashlib.c: implements core hash library. */
10 
11 #include <silk/silk.h>
12 
13 RCSIDENT("$SiLK: hashlib.c ef14e54179be 2020-04-14 21:57:45Z mthomas $");
14 
15 #include <silk/hashlib.h>
16 #include <silk/utils.h>
17 
18 #ifdef HASHLIB_TRACE_LEVEL
19 #define TRACEMSG_LEVEL HASHLIB_TRACE_LEVEL
20 #endif
21 #define TRACEMSG(lvl, x)  TRACEMSG_TO_TRACEMSGLVL(lvl, x)
22 #include <silk/sktracemsg.h>
23 
24 #if TRACEMSG_LEVEL > 0
25 #define TRC_FMT         "hashlib.c:%d [%p] "
26 #define TRC_ARG(v_v)    __LINE__, (void *)(v_v)
27 #endif
28 
29 
30 /* Configuration */
31 
32 /*
33  *    The maximum size (in terms of number of bytes) of an individual
34  *    hash block.
35  */
36 #define HASH_MAX_MEMORY_BLOCK   ((uint64_t)(SIZE_MAX >> 3))
37 
38 /*
39  *    The maximum size (in terms of number of bytes) for the entire
40  *    hash table.  May be modified by the environment variable named
41  *    by HASHLIB_ENV_MAXMEM.
42  *
43  *    The default value is the maximum size of any block multiplied by
44  *    three, where the three comes from assuming that the
45  *    SECONDARY_BLOCK_FRACTION is set to -3, which is its default.
46  */
47 #define HASH_MAX_MEMORY_TOTAL   (HASH_MAX_MEMORY_BLOCK * 3)
48 
49 /*
50  *    Maximum number of blocks ever allocated, used for sizing the
51  *    array of HashBlocks in the HashTable.
52  *
53  *    Once the primary block reaches HASH_MAX_MEMORY_BLOCK (the
54  *    maximum block size), new blocks will be allocated until this
55  *    maximum is reached.  This value cannot be greater than the
56  *    HASHLIB_ITER_MAX_BLOCKS value defined in hashlib.h.
57  */
58 #define HASH_MAX_BLOCKS 8
59 
60 #if HASH_MAX_BLOCKS > HASHLIB_ITER_MAX_BLOCKS
61 #error  "HASH_MAX_BLOCKS may not be greater than HASHLIB_ITER_MAX_BLOCKS"
62 #endif
63 
64 /*
65  *    When the number of HashBlocks gets to this count, a rehash is
66  *    triggered unless the first block is already at the maximum block
67  *    size.
68  *
69  *    This value is not static since hashlib_metrics.c may set it.
70  */
71 uint32_t REHASH_BLOCK_COUNT = 4;
72 
73 /*
74  *    The SECONDARY_BLOCK_FRACTION is used to determine the size
75  *    HashBlocks following the first.
76  *
77  *    If non-negative, tables 1...HASH_MAX_BLOCKS-1 have size given by
78  *
79  *    table_size >> SECONDARY_BLOCK_FRACTION
80  *
81  *    May also have one of the following values:
82  *
83  *    = -1 means to keep halving
84  *
85  *    = -2 means to keep halving starting at a secondary block size
86  *    1/4 of block 0
87  *
88  *    = -3 means block 1 is 1/2 block 0, and all other blocks are 1/4
89  *    block 0.
90  *
91  *    = -4 means block 1 is 1/4 block 0, and all other blocks are 1/8
92  *    block 0.
93  *
94  *    In all cases, the size of blocks REHASH_BLOCK_COUNT through
95  *    HASH_MAX_BLOCKS is fixed.
96  *
97  *    This value is not static since hashlib_metrics.c may set it.
98  */
99 int32_t SECONDARY_BLOCK_FRACTION = -3;
100 
101 /*
102  *    The minimum number of entries that may be stored in a block.
103  *    This value must not be less than 256.
104  */
105 #ifndef MIN_BLOCK_ENTRIES
106 #define MIN_BLOCK_ENTRIES   (UINT64_C(1) << 8)
107 #endif
108 
109 #if MIN_BLOCK_ENTRIES < 256
110 #error "The MIN_BLOCK_ENTRIES must be greater than 256"
111 #endif
112 
113 /*
114  *    Environment variable to choose the maximum size of an individual
115  *    hash table.
116  */
117 #define HASHLIB_ENV_MAXMEM      "SILK_HASH_MAXMEM"
118 
119 /* Distinguished values for block index in the iterator */
120 #define HASH_ITER_BEGIN -1
121 #define HASH_ITER_END -2
122 
123 
124 /*
125  *    The data in a HashTable is stored in multiple HashBlock structures.
126  */
127 struct HashBlock_st {
128     /* Pointer to an array of variable-sized entries */
129     uint8_t            *data_ptr;
130     /* The table that owns this block */
131     const HashTable    *table;
132     /* Total capacity of this block as a number of entries */
133     uint64_t            max_entries;
134     /* Number of occupied entries in the block */
135     uint64_t            num_entries;
136     /* Number of entries at which block meets the load_factor */
137     uint64_t            block_full;
138 };
139 typedef struct HashBlock_st HashBlock;
140 
141 
142 /**  the HashTable structure */
143 /* typedef struct HashTable_st HashTable; */
144 struct HashTable_st {
145     /**  HTT_ALLOWDELETION or 0 */
146     uint8_t             options;
147     /**  Storage size of a key in bytes */
148     uint8_t             key_len;
149     /**  Size of a value in bytes */
150     uint8_t             value_len;
151     /**  Point at which to resize (fraction of 255) */
152     uint8_t             load_factor;
153     /**  Number of blocks */
154     uint8_t             num_blocks;
155     /**  Non-zero if rehashing has failed in the past */
156     uint8_t             rehash_failed;
157     /**  Non-zero if hash entries are sorted */
158     uint8_t             is_sorted;
159     /**  Non-zero if we can memset new memory to a value */
160     uint8_t             can_memset_val;
161     /**  Maximum number of entries the initial block may store */
162     uint64_t            max_init_entry;
163     /**  Size of key; used as cmp_userdata by hashlib_sort_entries() */
164     size_t              keylen_cmp_userdata;
165     /**  Pointer to representation of an empty value */
166     uint8_t            *no_value_ptr;
167     /**  Pointer to representation of a deleted value */
168     uint8_t            *del_value_ptr;
169     /**  Comparison function to use for a sorted table */
170     hashlib_sort_key_cmp_fn     cmp_fn;
171     /**  Caller's argument to the cmp_fn comparison function */
172     void               *cmp_userdata;
173     /**  A pointer to this table, so that macros may accept either a
174      *   HashTable or a HashBlock */
175     const HashTable    *table;
176     /**  The blocks */
177     HashBlock          *block_ptrs[HASH_MAX_BLOCKS];
178 #ifdef HASHLIB_RECORD_STATS
179     /**  Statistics for this table */
180     hashlib_stats_t     ht_stats;
181 #endif
182 };
183 /* HashTable */
184 
185 
186 /* LOCAL FUNCTION PROTOTYPES */
187 
188 /* pull in the code that defines the hash function */
189 #ifdef HASHLIB_LOOKUP2
190 /* hash code used up to and including SiLK 2.3.x, defined in
191  * hashlib-lookup2.c */
192 unsigned long
193 hash(
194     const uint8_t      *k,
195     unsigned long       len,
196     unsigned long       initval);
197 unsigned long
198 hash2(
199     unsigned long      *k,
200     unsigned long       len,
201     unsigned long       initval);
202 unsigned long
203 hash3(
204     uint8_t            *k,
205     unsigned long       len,
206     unsigned long       initval);
207 #include "hashlib-lookup2.c"
208 
209 #else
210 /* hash code used in SiLK 2.4 and beyond, defined in
211  * hashlib-lookup3.c */
212 
213 uint32_t
214 hashword(
215     const uint32_t     *k,
216     size_t              length,
217     uint32_t            initval);
218 void
219 hashword2(
220     const uint32_t     *k,
221     size_t              length,
222     uint32_t           *pc,
223     uint32_t           *pb);
224 uint32_t
225 hashlittle(
226     const void         *key,
227     size_t              length,
228     uint32_t            initval);
229 void
230 hashlittle2(
231     const void         *key,
232     size_t              length,
233     uint32_t           *pc,
234     uint32_t           *pb);
235 uint32_t
236 hashbig(
237     const void         *key,
238     size_t              length,
239     uint32_t            initval);
240 void
241 hashbig2(
242     const void         *key,
243     size_t              length,
244     uint32_t           *pc,
245     uint32_t           *pb);
246 
247 #include "hashlib-lookup3.c"
248 /* define hash() according to the byte order */
249 #if SK_BIG_ENDIAN
250 #  define hash  hashbig2
251 #else
252 #  define hash  hashlittle2
253 #endif
254 #endif  /* HASHLIB_LOOKUP2 */
255 
256 static HashBlock *
257 hashlib_create_block(
258     HashTable          *table_ptr,
259     uint64_t            block_entries);
260 static void
261 hashlib_free_block(
262     HashBlock          *block_ptr);
263 static int
264 hashlib_block_find_entry(
265     const HashBlock    *block_ptr,
266     const uint8_t      *key_ptr,
267     uint8_t           **entry_pptr);
268 static int
269 hashlib_iterate_sorted(
270     const HashTable    *table_ptr,
271     HASH_ITER          *iter_ptr,
272     uint8_t           **key_pptr,
273     uint8_t           **val_pptr);
274 static void
275 hashlib_compute_max_initial_entries(
276     HashTable          *table_ptr);
277 
278 
279 /* FUNCTION-LIKE MACROS */
280 
281 /*
282  *    Return the maximum number of entries on the largest block in
283  *    the table 'tbl_ptr'.
284  */
285 #define HASH_GET_MAX_BLOCK_ENTRIES(tbl_ptr)                             \
286     ((tbl_ptr)->max_init_entry)
287 
288 /*
289  *    Return true if the HashBlock 'blk_ptr' is full; that is, whether
290  *    the number of entries meets or exceeds the load factor.
291  */
292 #define HASH_BLOCK_IS_FULL(blk_ptr)                     \
293     ((blk_ptr)->num_entries >= (blk_ptr)->block_full)
294 
295 /*
296  *    Get number of bytes of storage required to hold a key in
297  *    'tbl_blk_ptr', which may be a HashTable or a HashBlock.
298  */
299 #define HASH_GET_KEY_LEN(tbl_blk_ptr)           \
300     ((uint64_t)(tbl_blk_ptr)->table->key_len)
301 
302 /*
303  *    Get number of bytes of storage required to hold a value in
304  *    'tbl_blk_ptr', which may be a HashTable or a HashBlock.
305  */
306 #define HASH_GET_VALUE_LEN(tbl_blk_ptr)                 \
307     ((uint64_t)(tbl_blk_ptr)->table->value_len)
308 
309 /*
310  *    Get number of bytes of storage required to hold an entry in
311  *    'tbl_blk_ptr', which may be a HashTable or a HashBlock.
312  */
313 #define HASH_GET_ENTRY_LEN(tbl_blk_ptr)                                 \
314     (HASH_GET_KEY_LEN(tbl_blk_ptr) + HASH_GET_VALUE_LEN(tbl_blk_ptr))
315 
316 /*
317  *    Get a pointer to the storage key part of 'entry_ptr' in
318  *    'tbl_blk_ptr' which may be a HashTable or HashBlock.
319  */
320 #define HASHENTRY_GET_KEY(tbl_blk_ptr, entry_ptr)       \
321     (entry_ptr)
322 
323 /*
324  *    Get a pointer to the value part of 'entry_ptr' in 'tbl_blk_ptr'
325  *    which may be a HashTable or HashBlock.
326  */
327 #define HASHENTRY_GET_VALUE(tbl_blk_ptr, entry_ptr)     \
328     ((entry_ptr) + HASH_GET_KEY_LEN(tbl_blk_ptr))
329 
330 /*
331  *    Set the storage key part of 'entry_ptr' in 'tbl_blk_ptr' to
332  *    contain the bytes in 'key_bytes'.  'tbl_blk_ptr' may be a
333  *    HashTable or HashBlock.
334  */
335 #define HASHENTRY_SET_KEY(tbl_blk_ptr, entry_ptr, key_bytes)            \
336     memcpy(HASHENTRY_GET_KEY(tbl_blk_ptr, entry_ptr), (key_bytes),      \
337            HASH_GET_KEY_LEN(tbl_blk_ptr))
338 
339 /*
340  *    Return 1 if the bytes in 'value_ptr' match the empty value,
341  *    otherwise 0.  'tbl_blk_ptr' may be a HashTable or HashBlock.
342  */
343 #define HASH_VALUE_ISEMPTY(tbl_blk_ptr, value_ptr)                      \
344     (0 == memcmp((value_ptr), (tbl_blk_ptr)->table->no_value_ptr,       \
345                  HASH_GET_VALUE_LEN(tbl_blk_ptr)))
346 
347 /*
348  *    Return 1 if the value part of the entry at 'entry_ptr' matches
349  *    the empty value, otherwise 0.  'tbl_blk_ptr' may be a HashTable
350  *    or a HashBlock.
351  */
352 #define HASHENTRY_ISEMPTY(tbl_blk_ptr, entry_ptr)                       \
353     HASH_VALUE_ISEMPTY(tbl_blk_ptr,                                     \
354                        HASHENTRY_GET_VALUE((tbl_blk_ptr), (entry_ptr)))
355 
356 /*
357  *    Get a pointer to the entry at index 'hash_index' in 'blk_ptr',
358  *    which must be a HashBlock.
359  */
360 #define HASH_ENTRY_AT(blk_ptr, hash_index)                              \
361     ((blk_ptr)->data_ptr + (HASH_GET_ENTRY_LEN(blk_ptr) * (hash_index)))
362 
363 /*
364  *    Get a pointer to the storage key part of the entry at index
365  *    'hash_index' in 'blk_ptr' which must be a HashBlock.
366  */
367 #define HASH_KEY_AT(blk_ptr, hash_index)                                \
368     HASHENTRY_GET_KEY((blk_ptr), HASH_ENTRY_AT((blk_ptr), (hash_index)))
369 
370 /*
371  *    Get a pointer to the value part of the entry at index
372  *    'hash_index' in 'blk_ptr' which must be a HashBlock.
373  */
374 #define HASH_VALUE_AT(blk_ptr, hash_index)                              \
375     HASHENTRY_GET_VALUE((blk_ptr), HASH_ENTRY_AT((blk_ptr), (hash_index)))
376 
377 /*
378  *    Increment the value of 'member' in the hashlib_stats structure
379  *    for 'tbl_blk_ptr', which may be a HashTable or a HashBlock.
380  */
381 #ifndef HASHLIB_RECORD_STATS
382 #define HASH_STAT_INCR(tbl_blk_ptr, member)
383 #else
384 /* need to cast away the "const" */
385 #define HASH_STAT_INCR(tbl_blk_ptr, member)                     \
386     ++(((HashTable *)(tbl_blk_ptr)->table)->ht_stats. member )
387 #endif
388 
389 
390 #ifdef NDEBUG
391 #define HASH_ASSERT_SIZE_IS_POWER_2(blk_size)
392 #else
393 #define HASH_ASSERT_SIZE_IS_POWER_2(blk_size)   \
394     do {                                        \
395         uint64_t high_bits;                     \
396         BITS_IN_WORD64(&high_bits, (blk_size)); \
397         assert(1 == high_bits);                 \
398     } while(0)
399 #endif  /* NDEBUG */
400 
401 
402 /* FUNCTION DEFINITIONS */
403 
404 HashTable *
hashlib_create_table(uint8_t key_len,uint8_t value_len,uint8_t value_type,uint8_t * no_value_ptr,uint8_t * appdata_ptr,uint32_t appdata_size,uint64_t estimated_count,uint8_t load_factor)405 hashlib_create_table(
406     uint8_t             key_len,
407     uint8_t             value_len,
408     uint8_t             value_type,
409     uint8_t            *no_value_ptr,
410     uint8_t            *appdata_ptr,
411     uint32_t            appdata_size,
412     uint64_t            estimated_count,
413     uint8_t             load_factor)
414 {
415     HashTable *table_ptr = NULL;
416     HashBlock *block_ptr = NULL;
417     uint64_t initial_entries;
418 
419     /* Validate arguments */
420     if (0 == key_len || 0 == value_len) {
421         TRACEMSG(1,(TRC_FMT "create table: invalid width key %u, value %u",
422                     TRC_ARG(table_ptr), key_len, value_len));
423         assert(0);
424         return NULL;
425     }
426 
427     /* Allocate memory for the table and initialize attributes.  */
428     table_ptr = (HashTable*)calloc(1, sizeof(HashTable));
429     if (table_ptr == NULL) {
430         TRACEMSG(1,(TRC_FMT "Failed to allocate new HashTable.",
431                     TRC_ARG(table_ptr)));
432         return NULL;
433     }
434 
435     /* Initialize the table structure */
436     table_ptr->table = table_ptr;
437     table_ptr->key_len = key_len;
438     table_ptr->value_len = value_len;
439     table_ptr->load_factor = load_factor;
440 
441     TRACEMSG(3,
442              (TRC_FMT "key_len %u, value_len %u, entry_len %u, load_factor %u",
443               TRC_ARG(table_ptr), key_len, value_len,
444               key_len + value_len, load_factor));
445 
446     /* Application data */
447     SK_UNUSED_PARAM(value_type);
448     SK_UNUSED_PARAM(appdata_ptr);
449     SK_UNUSED_PARAM(appdata_size);
450 
451     /* Initialize value_ptr to string of zero-valued bytes if NULL */
452     table_ptr->no_value_ptr = (uint8_t*)calloc(value_len, sizeof(uint8_t));
453     if (table_ptr->no_value_ptr == NULL) {
454         TRACEMSG(1,((TRC_FMT "Failed to allocate new no_value_ptr"
455                      " for new HashTable."), TRC_ARG(table_ptr)));
456         free(table_ptr);
457         return NULL;
458     }
459     if (no_value_ptr == NULL) {
460         table_ptr->can_memset_val = 1;
461     } else if (table_ptr->value_len == 1) {
462         table_ptr->can_memset_val = 1;
463         table_ptr->no_value_ptr[0] = no_value_ptr[0];
464     } else {
465         /* Fill the table's no_value_ptr with the first byte of the
466          * caller's no_value_ptr and then compare the complete
467          * no_value_ptr values to determine whether we can use
468          * memset() to initialize the values of new memory blocks. */
469         memset(table_ptr->no_value_ptr, no_value_ptr[0], value_len);
470         if (memcmp(table_ptr->no_value_ptr, no_value_ptr, value_len)) {
471             /* values differ; cannot use memset */
472             table_ptr->can_memset_val = 0;
473             memcpy(table_ptr->no_value_ptr, no_value_ptr, value_len);
474         } else {
475             /* values are the same; use memset */
476             table_ptr->can_memset_val = 1;
477         }
478     }
479 
480     /* Compute the maximum number of entries for the initial block */
481     hashlib_compute_max_initial_entries(table_ptr);
482 
483     /*
484      * Calculate the number of entres in the initial block.  This is a
485      * power of 2 with at least MIN_BLOCK_ENTRIES entries that
486      */
487     /* account for the load factor */
488     initial_entries = (estimated_count << 8) / table_ptr->load_factor;
489     /* compute power of two greater than initial_entries */
490     initial_entries = UINT64_C(1) << (1 + skIntegerLog2(initial_entries));
491 
492     TRACEMSG(2,((TRC_FMT "estimated_count %" PRIu64 ", initial_entries %"
493                  PRIu64 " (calculated %" PRIu64 "), min_entries %" PRIu64
494                  ", max_entries %" PRIu64),
495                 TRC_ARG(table_ptr), estimated_count, initial_entries,
496                 ((estimated_count << 8) / table_ptr->load_factor),
497                 MIN_BLOCK_ENTRIES, HASH_GET_MAX_BLOCK_ENTRIES(table_ptr)));
498 
499     if (initial_entries < MIN_BLOCK_ENTRIES) {
500         initial_entries = MIN_BLOCK_ENTRIES;
501         TRACEMSG(2,(TRC_FMT "adjusted initial_entries to %" PRIu64,
502                     TRC_ARG(table_ptr), initial_entries));
503     } else {
504         if (initial_entries > HASH_GET_MAX_BLOCK_ENTRIES(table_ptr)) {
505             initial_entries = HASH_GET_MAX_BLOCK_ENTRIES(table_ptr);
506         }
507         TRACEMSG(2,(TRC_FMT "adjusted initial_entries to %" PRIu64,
508                     TRC_ARG(table_ptr), initial_entries));
509         assert(initial_entries >= MIN_BLOCK_ENTRIES);
510     }
511 
512     TRACEMSG(1,(TRC_FMT "Adding block #0...", TRC_ARG(table_ptr)));
513 
514     /* Start with one block */
515     table_ptr->num_blocks = 1;
516     block_ptr = hashlib_create_block(table_ptr, initial_entries);
517     while (NULL == block_ptr) {
518         TRACEMSG(1,(TRC_FMT "Adding block #0 failed.", TRC_ARG(table_ptr)));
519         if (initial_entries <= MIN_BLOCK_ENTRIES) {
520             table_ptr->num_blocks = 0;
521             hashlib_free_table(table_ptr);
522             return NULL;
523         }
524         initial_entries >>= 1;
525         TRACEMSG(2,(TRC_FMT "adjusted initial_entries to %" PRIu64,
526                     TRC_ARG(table_ptr), initial_entries));
527         assert(initial_entries >= MIN_BLOCK_ENTRIES);
528     }
529     table_ptr->block_ptrs[0] = block_ptr;
530 
531     TRACEMSG(1,(TRC_FMT "Added block #%u.",
532                 TRC_ARG(table_ptr), table_ptr->num_blocks - 1));
533 
534     return table_ptr;
535 }
536 
537 
538 void
hashlib_free_table(HashTable * table_ptr)539 hashlib_free_table(
540     HashTable          *table_ptr)
541 {
542     unsigned int k;
543 
544     if (NULL == table_ptr) {
545         return;
546     }
547 
548     TRACEMSG(1,(TRC_FMT "Freeing HashTable...", TRC_ARG(table_ptr)));
549     /* Free all the blocks in the table */
550     for (k = 0; k < table_ptr->num_blocks; ++k) {
551         TRACEMSG(2,(TRC_FMT "Freeing block #%u", TRC_ARG(table_ptr), k));
552         hashlib_free_block(table_ptr->block_ptrs[k]);
553     }
554 
555     /* Free the empty pointer memory */
556     free(table_ptr->no_value_ptr);
557 
558     /* Free the table structure itself */
559     free(table_ptr);
560     TRACEMSG(1,(TRC_FMT "Freed HashTable.", TRC_ARG(table_ptr)));
561 }
562 
563 
564 /*
565  *    NOTE: Assumes block_entries is a power of 2.  Very important!
566  */
567 static HashBlock *
hashlib_create_block(HashTable * table_ptr,uint64_t block_entries)568 hashlib_create_block(
569     HashTable          *table_ptr,
570     uint64_t            block_entries)
571 {
572     HashBlock *block_ptr;
573     uint64_t block_bytes;
574 
575     HASH_ASSERT_SIZE_IS_POWER_2(block_entries);
576 
577     HASH_STAT_INCR(table_ptr, blocks_allocated);
578 
579     block_bytes = block_entries * HASH_GET_ENTRY_LEN(table_ptr);
580 
581     TRACEMSG(1,((TRC_FMT "Creating block; requesting %#" PRIx64
582                  " %" PRIu64 "-byte entries (%" PRIu64 " bytes)..."),
583                 TRC_ARG(table_ptr), block_entries,
584                 HASH_GET_ENTRY_LEN(table_ptr), block_bytes));
585 
586 #if SIZE_MAX < UINT64_MAX
587     /* verify we do not overflow the size of a size_t */
588     if (block_bytes > SIZE_MAX) {
589         TRACEMSG(1,(TRC_FMT "Cannot create block; size exceeds SIZE_MAX.",
590                     TRC_ARG(table_ptr)));
591         return NULL;
592     }
593 #endif
594 
595     /* Allocate memory for the block and initialize attributes.  */
596     block_ptr = (HashBlock*)malloc(sizeof(HashBlock));
597     if (block_ptr == NULL) {
598         TRACEMSG(1,(TRC_FMT "Failed to allocate new HashBlock.",
599                     TRC_ARG(table_ptr)));
600         return NULL;
601     }
602     block_ptr->data_ptr = (uint8_t*)malloc(block_bytes);
603     if (block_ptr->data_ptr == NULL) {
604         free(block_ptr);
605         TRACEMSG(1,(TRC_FMT "Failed to allocate new data block.",
606                     TRC_ARG(table_ptr)));
607         return NULL;
608     }
609 
610     block_ptr->table = table_ptr;
611     block_ptr->max_entries = block_entries;
612     block_ptr->num_entries = 0;
613     block_ptr->block_full = table_ptr->load_factor * (block_entries >> 8);
614 
615     /* Copy "empty" value to each entry.  Garbage key values are
616      * ignored, so we don't bother writing to the keys.  When the
617      * application overestimates the amount of memory needed, this can
618      * be bottleneck.  */
619     if (table_ptr->can_memset_val) {
620         memset(block_ptr->data_ptr, table_ptr->no_value_ptr[0], block_bytes);
621     } else {
622         uint8_t *data_ptr;
623         uint64_t i;
624 
625         /* Initialize 'data_ptr' to point to at the value part of the
626          * first entry in the block; move 'data_ptr' to the next value
627          * at every iteration */
628         for (i = 0, data_ptr = HASH_VALUE_AT(block_ptr, 0);
629              i < block_ptr->max_entries;
630              ++i, data_ptr += HASH_GET_ENTRY_LEN(table_ptr))
631         {
632             memcpy(data_ptr, table_ptr->no_value_ptr,
633                    HASH_GET_VALUE_LEN(block_ptr));
634         }
635     }
636 
637     return block_ptr;
638 }
639 
640 
641 static void
hashlib_free_block(HashBlock * block_ptr)642 hashlib_free_block(
643     HashBlock          *block_ptr)
644 {
645     /* Free the data and the block itself */
646     assert(block_ptr);
647     free(block_ptr->data_ptr);
648     free(block_ptr);
649 }
650 
651 
652 /*
653  *    Rehash entire table into a single block.
654  */
655 int
hashlib_rehash(HashTable * table_ptr)656 hashlib_rehash(
657     HashTable          *table_ptr)
658 {
659     const uint64_t max_entries = HASH_GET_MAX_BLOCK_ENTRIES(table_ptr);
660     HashBlock *new_block_ptr = NULL;
661     HashBlock *block_ptr = NULL;
662     uint64_t num_entries = 0;
663     uint64_t initial_entries;
664     const uint8_t *key_ref;
665     const uint8_t *val_ref;
666     uint8_t *entry_ptr;
667     uint8_t *new_entry_ptr;
668     int rv;
669     unsigned int k;
670     uint64_t i;
671 
672     HASH_STAT_INCR(table_ptr, rehashes);
673 
674     if (table_ptr->is_sorted) {
675         TRACEMSG(1,(TRC_FMT "ERROR: Attempt to rehash a sorted HashTable",
676                     TRC_ARG(table_ptr)));
677         assert(0 == table_ptr->is_sorted);
678         return ERR_SORTTABLE;
679     }
680 
681     /*
682      *    Count the total number of entries so we know what we need to
683      *    allocate.  We base this on the actual size of the blocks,
684      *    and use the power of 2 that's double the smallest power of 2
685      *    bigger than the sum of block sizes. It's justified by the
686      *    intuition that once we reach this point, we've decided that
687      *    we're going to explore an order of magnitude larger
688      *    table. This particular scheme seems to work well in practice
689      *    although it's difficult to justify theoretically--this is a
690      *    rather arbitrary definition of "order of magnitude".
691      */
692     for (k = 0; k < table_ptr->num_blocks; ++k) {
693         num_entries += table_ptr->block_ptrs[k]->max_entries;
694     }
695     assert(num_entries > 0);
696 
697     TRACEMSG(1,((TRC_FMT "Rehashing table having %" PRIu64
698                  " %" PRIu64 "-byte entries..."),
699                 TRC_ARG(table_ptr), num_entries,
700                 HASH_GET_ENTRY_LEN(table_ptr)));
701 
702     if (num_entries >= max_entries) {
703         TRACEMSG(1,((TRC_FMT "Too many entries for rehash; "
704                      " num_entries=%" PRIu64 " >= max_entries=%" PRIu64 "."),
705                     TRC_ARG(table_ptr), num_entries, max_entries));
706         return ERR_OUTOFMEMORY;
707     }
708 
709     /* Choose the size for the initial block as the next power of 2
710      * greater than the number of entries. */
711     initial_entries = UINT64_C(1) << (1 + skIntegerLog2(num_entries));
712     if (initial_entries < MIN_BLOCK_ENTRIES) {
713         initial_entries = MIN_BLOCK_ENTRIES;
714     }
715 
716     /* double it once more unless we already have 2^28 entries */
717     if (((max_entries >> 1) > initial_entries)
718         && (initial_entries < UINT64_C(0x10000000)))
719     {
720         initial_entries <<= 1;
721     }
722     if (initial_entries > max_entries) {
723         TRACEMSG(1,((TRC_FMT "Will not rehash table; new initial_entries=%"
724                      PRIu64 " > max_entries=%" PRIu64 "."),
725                     TRC_ARG(table_ptr), initial_entries, max_entries));
726         return ERR_OUTOFMEMORY;
727     }
728 
729     TRACEMSG(1,(TRC_FMT "Allocating new rehash block...", TRC_ARG(table_ptr)));
730 
731     /* Create the new block */
732     new_block_ptr = hashlib_create_block(table_ptr, initial_entries);
733     if (new_block_ptr == NULL) {
734         TRACEMSG(1,((TRC_FMT "Allocating rehash block failed for %#" PRIx64
735                      " entries."), TRC_ARG(table_ptr), initial_entries));
736         return ERR_OUTOFMEMORY;
737     }
738     TRACEMSG(1,(TRC_FMT "Allocated rehash block.", TRC_ARG(table_ptr)));
739 
740     /* Walk through each block in the table looking for non-empty
741      * entries and insert them into the new block. */
742     for (k = table_ptr->num_blocks; k > 0; ) {
743         --k;
744         block_ptr = table_ptr->block_ptrs[k];
745         TRACEMSG(2,(TRC_FMT "Rehashing entries from block #%u",
746                     TRC_ARG(table_ptr), k));
747 
748         for (i = 0, entry_ptr = HASH_ENTRY_AT(block_ptr, 0);
749              i < block_ptr->max_entries;
750              ++i, entry_ptr += HASH_GET_ENTRY_LEN(block_ptr))
751         {
752             key_ref = HASHENTRY_GET_KEY(block_ptr, entry_ptr);
753             val_ref = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
754 
755             /* If not empty, then copy the entry into the new block */
756             if (!HASH_VALUE_ISEMPTY(block_ptr, val_ref)) {
757                 rv = hashlib_block_find_entry(new_block_ptr,
758                                               key_ref, &new_entry_ptr);
759                 if (rv != ERR_NOTFOUND) {
760                     /* value is not-empty, but we cannot find the key
761                      * in the hash table. either the hashlib code is
762                      * broken, or the user set a value to the
763                      * no_value_ptr value and broke the collision
764                      * resolution mechanism.  if assert() is active,
765                      * the next line will call abort(). */
766                     TRACEMSG(1,((TRC_FMT "During the rehash, unexpectedly"
767                                  " found an existing key in the new block"),
768                                 TRC_ARG(table_ptr)));
769                     assert(rv == ERR_NOTFOUND);
770                     free(new_block_ptr);
771                     table_ptr->num_blocks = 1 + k;
772                     return ERR_INTERNALERROR;
773                 }
774                 /* Copy the key and value */
775                 HASHENTRY_SET_KEY(new_block_ptr, new_entry_ptr, key_ref);
776                 memcpy(HASHENTRY_GET_VALUE(new_block_ptr, new_entry_ptr),
777                        val_ref, HASH_GET_VALUE_LEN(block_ptr));
778                 ++new_block_ptr->num_entries;
779                 HASH_STAT_INCR(table_ptr, rehash_inserts);
780             }
781         }
782 
783         /* Free the block */
784         hashlib_free_block(block_ptr);
785         table_ptr->block_ptrs[k] = NULL;
786     }                           /* blocks */
787 
788     /* Associate the new block with the table */
789     table_ptr->num_blocks = 1;
790     table_ptr->block_ptrs[0] = new_block_ptr;
791 
792     TRACEMSG(1,(TRC_FMT "Rehashed table.", TRC_ARG(table_ptr)));
793 
794     return OK;
795 }
796 
797 
798 /*
799  *    Add a new block to a table.
800  */
801 static int
hashlib_add_block(HashTable * table_ptr,uint64_t new_block_entries)802 hashlib_add_block(
803     HashTable          *table_ptr,
804     uint64_t            new_block_entries)
805 {
806     HashBlock *block_ptr = NULL;
807 
808     assert(table_ptr->num_blocks < HASH_MAX_BLOCKS);
809     if (table_ptr->num_blocks >= HASH_MAX_BLOCKS) {
810         TRACEMSG(1,((TRC_FMT "Cannot allocate another block:"
811                      " num_blocks=%" PRIu32 " >= HASH_MAX_BLOCKS=%u."),
812                     TRC_ARG(table_ptr), table_ptr->num_blocks,
813                     HASH_MAX_BLOCKS));
814         return ERR_NOMOREBLOCKS;
815     }
816     /* Create the new block */
817     TRACEMSG(1,((TRC_FMT "Adding block #%u..."),
818                 TRC_ARG(table_ptr), table_ptr->num_blocks));
819     block_ptr = hashlib_create_block(table_ptr, new_block_entries);
820     if (block_ptr == NULL) {
821         TRACEMSG(1,(TRC_FMT "Adding block #%u failed.",
822                     TRC_ARG(table_ptr), table_ptr->num_blocks));
823         return ERR_OUTOFMEMORY;
824     }
825 
826     /* Add it to the table */
827     table_ptr->block_ptrs[table_ptr->num_blocks] = block_ptr;
828     ++table_ptr->num_blocks;
829     TRACEMSG(1,(TRC_FMT "Added block #%u.",
830                 TRC_ARG(table_ptr), table_ptr->num_blocks - 1));
831 
832     return OK;
833 }
834 
835 
836 /*
837  *    Using the maximum memory_size for the entire hash table (either
838  *    from the environment or the HASH_MAX_MEMORY_TOTAL default),
839  *    compute the maximum number of entries that the initial block is
840  *    allowed to contain and update the hash table with that value.
841  */
842 static void
hashlib_compute_max_initial_entries(HashTable * table_ptr)843 hashlib_compute_max_initial_entries(
844     HashTable          *table_ptr)
845 {
846     /* true if env contains an invalid value */
847     static int bad_env = 0;
848 
849     /* assume the initial block has this size */
850     const uint64_t init_basis = (1 << 16);
851 
852     /* based on the initial block having a size of 'init_basis',
853      * compute the total size of the hash table using the
854      * SECONDARY_BLOCK_FRACTION and the number of hash blocks */
855     uint64_t total_basis;
856 
857     /* max number of entries in initial block */
858     uint64_t max_init_entry;
859 
860     uint64_t max_memory = HASH_MAX_MEMORY_TOTAL;
861     const char *env;
862 
863     /* Determine maximum memory footprint of the entire hash table */
864     env = getenv(HASHLIB_ENV_MAXMEM);
865     if (bad_env) {
866         /* do nothing */
867     } else if (env && *env) {
868         int rv = skStringParseHumanUint64(&max_memory, env, SK_HUMAN_NORMAL);
869         if (rv) {
870             bad_env = 1;
871             skAppPrintErr("Ignoring Invalid %s '%s': %s",
872                           HASHLIB_ENV_MAXMEM, env, skStringParseStrerror(rv));
873             max_memory = HASH_MAX_MEMORY_TOTAL;
874         }
875     }
876 
877     assert(HASH_MAX_BLOCKS > 2);
878     assert(REHASH_BLOCK_COUNT >= 2);
879     assert(REHASH_BLOCK_COUNT < HASH_MAX_BLOCKS);
880 
881     /*
882      *    When you continually take half of an initial value and add
883      *    it to the initial value, the sum approaches 2.  If you only
884      *    take half N times, the sum is:
885      *
886      *    init_value * 2 - (init_value >> (N - 1))
887      */
888 
889     switch (SECONDARY_BLOCK_FRACTION) {
890       case -1:
891         /* Keep halving blocks until REHASH_BLOCK_COUNT blocks have
892          * been created, then size is constant */
893         total_basis = (init_basis * 2
894                        + ((init_basis >> (REHASH_BLOCK_COUNT - 1))
895                           * (HASH_MAX_BLOCKS - REHASH_BLOCK_COUNT - 1)));
896         break;
897       case -2:
898         /* First secondary block is 1/4 size of main block; other
899          * blocks are halved until REHASH_BLOCK_COUNT is reached */
900         total_basis = (init_basis + (init_basis >> 1)
901                        - ((init_basis >> 2) >> (REHASH_BLOCK_COUNT - 2))
902                        + ((init_basis >> REHASH_BLOCK_COUNT)
903                           >> (HASH_MAX_BLOCKS - REHASH_BLOCK_COUNT)));
904         break;
905       case -3:
906         /* First secondary block is 1/2 size of main block; all others
907          * are 1/4 of main block */
908         total_basis = (init_basis + (init_basis >> 1)
909                        + (init_basis >> 2) * (HASH_MAX_BLOCKS - 2));
910         break;
911       case -4:
912         /* First secondary block is 1/4 size of main block; all others
913          * are 1/8 of main block */
914         total_basis = (init_basis + (init_basis >> 2)
915                        + (init_basis >> 3) * (HASH_MAX_BLOCKS - 2));
916         break;
917       case 0:
918         /* All blocks are the same size (shift size by 0) */
919         total_basis = init_basis * HASH_MAX_BLOCKS;
920         break;
921       default:
922         /* All secondary blocks shifted by SECONDARY_BLOCK_FRACTION
923          * from the main block */
924         if (SECONDARY_BLOCK_FRACTION < 0) {
925             skAbort();
926         }
927         total_basis = (init_basis + ((init_basis >> SECONDARY_BLOCK_FRACTION)
928                                      * (HASH_MAX_BLOCKS - 1)));
929         break;
930     }
931 
932     /*
933      *    this is the formula that would compute the total maximum
934      *    size of the hash table
935      *
936      * total_size = initial_bins * record_size * total_basis / init_basis;
937      *
938      *    using that formula, solve for initial_bins:
939      */
940     max_init_entry = ((double)max_memory / (double)total_basis
941                       * (double)init_basis
942                       / (double)HASH_GET_ENTRY_LEN(table_ptr));
943 
944     /*
945      *    Get the largest power of two less than max_init_entry.
946      */
947     table_ptr->max_init_entry = UINT64_C(1) << skIntegerLog2(max_init_entry);
948     if (table_ptr->max_init_entry < MIN_BLOCK_ENTRIES) {
949         table_ptr->max_init_entry = MIN_BLOCK_ENTRIES;
950     }
951 
952 #if TRACEMSG_LEVEL >= 2
953     {
954         uint64_t total_size;
955 
956         total_size = ((double)table_ptr->max_init_entry / (double)init_basis
957                       * (double)HASH_GET_ENTRY_LEN(table_ptr)
958                       * (double)total_basis);
959 
960         TRACEMSG(2,((TRC_FMT "desired max mem = %" PRIu64
961                      "; entry size = %" PRIu64
962                      "; max total entry = %" PRIu64
963                      "; total size / init block size = %.4f"
964                      "; max initial entry = %" PRIu64
965                      "; log2 initial entry = %" PRIu64
966                      "; actual max memory = %" PRIu64
967                      "; actual max/desired max = %.4f"),
968                     TRC_ARG(table_ptr), max_memory,
969                     HASH_GET_ENTRY_LEN(table_ptr),
970                     max_memory / HASH_GET_ENTRY_LEN(table_ptr),
971                     (double)total_basis/(double)init_basis,
972                     max_init_entry, table_ptr->max_init_entry, total_size,
973                     ((0 == max_memory)
974                      ? DBL_MAX
975                      : (double)total_size/max_memory)));
976     }
977 #endif  /* TRACEMSG_LEVEL >= 2 */
978 }
979 
980 
981 
982 /*
983  *    Compute the size the next hash block.
984  */
985 static uint64_t
hashlib_compute_next_block_entries(HashTable * table_ptr)986 hashlib_compute_next_block_entries(
987     HashTable          *table_ptr)
988 {
989     uint64_t block_entries = 0;
990 
991     /* This condition will only be true when the primary block has
992      * reached the maximum block size. */
993     if (table_ptr->num_blocks >= REHASH_BLOCK_COUNT) {
994         return table_ptr->block_ptrs[table_ptr->num_blocks-1]->max_entries;
995     }
996     /* Otherwise, it depends on current parameters */
997     switch (SECONDARY_BLOCK_FRACTION) {
998       case -1:
999         /* Keep halving blocks */
1000         block_entries =
1001             (table_ptr->block_ptrs[table_ptr->num_blocks-1]->max_entries >> 1);
1002         break;
1003       case -2:
1004         if (table_ptr->num_blocks == 1) {
1005             /* First secondary block is 1/4 size of main block */
1006             block_entries = table_ptr->block_ptrs[0]->max_entries >>2;
1007         } else {
1008             /* Other secondary blocks are halved */
1009             block_entries =
1010                 table_ptr->block_ptrs[table_ptr->num_blocks-1]->max_entries >>1;
1011         }
1012         break;
1013       case -3:
1014         if (table_ptr->num_blocks == 1) {
1015             /* First secondary block is 1/2 size of main block */
1016             block_entries = table_ptr->block_ptrs[0]->max_entries >> 1;
1017         } else {
1018             /* All others are 1/4 size of main block */
1019             block_entries = table_ptr->block_ptrs[0]->max_entries >> 2;
1020         }
1021         break;
1022       case -4:
1023         if (table_ptr->num_blocks == 1) {
1024             /* First secondary block is 1/4 size of main block */
1025             block_entries = table_ptr->block_ptrs[0]->max_entries >> 2;
1026         } else {
1027             /* All others are 1/8 size of main block */
1028             block_entries = table_ptr->block_ptrs[0]->max_entries >> 3;
1029         }
1030         break;
1031       case 0:
1032         /* All blocks are the same size */
1033         block_entries = table_ptr->block_ptrs[0]->max_entries;
1034         break;
1035       default:
1036         if (SECONDARY_BLOCK_FRACTION < 0) {
1037             skAppPrintErr("Invalid block fraction %d",
1038                           SECONDARY_BLOCK_FRACTION);
1039             skAbort();
1040         }
1041         block_entries =
1042             (table_ptr->block_ptrs[0]->max_entries >> SECONDARY_BLOCK_FRACTION);
1043         break;
1044     }
1045     if (block_entries < MIN_BLOCK_ENTRIES) {
1046         block_entries = MIN_BLOCK_ENTRIES;
1047     }
1048     return block_entries;
1049 }
1050 
1051 /*
1052  *  Algorithm:
1053  *  - If the primary block is at its maximum, never rehash, only add
1054  *    new blocks.
1055  *  - If we have a small table, then don't bother creating
1056  *    secondary tables.  Simply rehash into a new block.
1057  *  - If we've exceeded the maximum number of blocks, rehash
1058  *    into a new block.
1059  *  - Otherwise, create a new block
1060  */
1061 
1062 static int
hashlib_resize_table(HashTable * table_ptr)1063 hashlib_resize_table(
1064     HashTable          *table_ptr)
1065 {
1066     uint64_t new_block_entries;
1067     int rv;
1068 
1069     TRACEMSG(1,(TRC_FMT "Resizing the table...", TRC_ARG(table_ptr)));
1070 
1071     /* Compute the (potential) size of the new block */
1072     new_block_entries = hashlib_compute_next_block_entries(table_ptr);
1073     assert(new_block_entries != 0);
1074 
1075     /* If we're at the maximum number of blocks (which implies that
1076      * the first block is at its max, and we can't resize, then that's
1077      * it. */
1078     if (table_ptr->num_blocks == HASH_MAX_BLOCKS) {
1079         TRACEMSG(1,((TRC_FMT "Unable to resize table: no more blocks;"
1080                      " table contains %" PRIu64 " %" PRIu64 "-byte entries"
1081                      " in %" PRIu64 " buckets across %u blocks"),
1082                     TRC_ARG(table_ptr), hashlib_count_entries(table_ptr),
1083                     HASH_GET_ENTRY_LEN(table_ptr),
1084                     hashlib_count_buckets(table_ptr), table_ptr->num_blocks));
1085         return ERR_NOMOREBLOCKS;
1086     }
1087     /* If the first block is at its maximum size or if we have tried
1088      * and failed to rehash in the past, then add a new block. Once we
1089      * reach the maximum block size, we don't rehash.  Instead we keep
1090      * adding blocks until we reach the maximum. */
1091     if ((table_ptr->block_ptrs[0]->max_entries
1092          == HASH_GET_MAX_BLOCK_ENTRIES(table_ptr))
1093         || table_ptr->rehash_failed)
1094     {
1095         assert(new_block_entries > MIN_BLOCK_ENTRIES);
1096         return hashlib_add_block(table_ptr, new_block_entries);
1097     }
1098     /* If we have REHASH_BLOCK_COUNT blocks, or the new block would be
1099      * too small, we simply rehash. */
1100     if ((new_block_entries < MIN_BLOCK_ENTRIES) ||
1101         (table_ptr->num_blocks >= REHASH_BLOCK_COUNT))
1102     {
1103         TRACEMSG(1,((TRC_FMT "Resize table forcing rehash;"
1104                      " new_block_entries = %#" PRIx64
1105                      "; num_blocks = %u; REHASH_BLOCK_COUNT = %" PRIu32 "."),
1106                     TRC_ARG(table_ptr), new_block_entries,
1107                     table_ptr->num_blocks, REHASH_BLOCK_COUNT));
1108         rv = hashlib_rehash(table_ptr);
1109         if (rv != ERR_OUTOFMEMORY) {
1110             return rv;
1111         }
1112         /* rehashing failed.  try instead to add a new (small) block */
1113         table_ptr->rehash_failed = 1;
1114         if (new_block_entries < MIN_BLOCK_ENTRIES) {
1115             new_block_entries = MIN_BLOCK_ENTRIES;
1116         }
1117         TRACEMSG(1,(TRC_FMT "Rehash failed; creating new block instead...",
1118                     TRC_ARG(table_ptr)));
1119     }
1120     /* Assert several global invariants */
1121     assert(new_block_entries >= MIN_BLOCK_ENTRIES);
1122     assert(new_block_entries <= HASH_GET_MAX_BLOCK_ENTRIES(table_ptr));
1123     assert(table_ptr->num_blocks < HASH_MAX_BLOCKS);
1124 
1125     /* Otherwise, add new a new block */
1126     return hashlib_add_block(table_ptr, new_block_entries);
1127 }
1128 
1129 
1130 #if 0
1131 static void
1132 assert_not_already_there(
1133     const HashTable    *table_ptr,
1134     const uint8_t      *key_ptr)
1135 {
1136     const uint8_t *entry_ptr;
1137     const HashBlock *block_ptr;
1138     unsigned int k;
1139     int rv;
1140 
1141     for (k = 0; k < (table_ptr->num_blocks-1); ++k) {
1142         block_ptr = table_ptr->block_ptrs[k];
1143         rv = hashlib_block_find_entry(block_ptr, key_ptr, &entry_ptr);
1144         if (rv == OK) {
1145             getc(stdin);
1146         }
1147     }
1148 }
1149 #endif /* 0 */
1150 
1151 
1152 int
hashlib_insert(HashTable * table_ptr,const uint8_t * key_ptr,uint8_t ** value_pptr)1153 hashlib_insert(
1154     HashTable          *table_ptr,
1155     const uint8_t      *key_ptr,
1156     uint8_t           **value_pptr)
1157 {
1158     HashBlock *block_ptr = NULL;
1159     uint8_t *entry_ptr = NULL;
1160     unsigned int k;
1161     int rv;
1162 
1163     assert(table_ptr);
1164 
1165     HASH_STAT_INCR(table_ptr, inserts);
1166 
1167     if (table_ptr->is_sorted) {
1168         TRACEMSG(1,(TRC_FMT "Attempted an insert into a sorted HashTable",
1169                     TRC_ARG(table_ptr)));
1170         assert(0 == table_ptr->is_sorted);
1171         return ERR_SORTTABLE;
1172     }
1173 
1174     /* See if we are ready to do a resize by either adding a block or
1175      * rehashing. */
1176     if (HASH_BLOCK_IS_FULL(table_ptr->block_ptrs[table_ptr->num_blocks-1])){
1177         rv = hashlib_resize_table(table_ptr);
1178         if (rv != OK) {
1179             return rv;
1180         }
1181     }
1182     assert(table_ptr->num_blocks);
1183 
1184     /* Look in each block for the key */
1185     for (k = 0; k < table_ptr->num_blocks; ++k) {
1186         block_ptr = table_ptr->block_ptrs[k];
1187         if (hashlib_block_find_entry(block_ptr, key_ptr, &entry_ptr) == OK) {
1188             /* Found entry, use it */
1189             *value_pptr = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
1190             return OK_DUPLICATE;
1191         }
1192     }
1193     assert(block_ptr);
1194 
1195     /*
1196      *  We did not find it; do an insert into the last block by
1197      *  setting the key AND increasing the count.  The caller will set
1198      *  the value.
1199      *
1200      *  NOTE: entry_ptr is pointer to the insert location and
1201      *  block_ptr is pointing at the last block, and this is why we
1202      *  first check whether need to grow the table.
1203      *
1204      *  NOTE: Since we return a reference to the value, the user could
1205      *  either not set the value or mistakenly set the value to the
1206      *  'no_value_ptr'.  This is problematic, since the internal count
1207      *  will have been incremented even though in essence no entry has
1208      *  been added.  This may lead to growing the table sooner than
1209      *  necesssary.
1210      *
1211      *  Even worse is if the user updates an existing entry's value to
1212      *  the 'no_value_ptr' after there has been a collision on that
1213      *  entry.  Keys that collided can no longer be found in the table.
1214      */
1215     *value_pptr = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
1216     HASHENTRY_SET_KEY(block_ptr, entry_ptr, key_ptr);
1217     ++block_ptr->num_entries;
1218 
1219     return OK;
1220 }
1221 
1222 
1223 int
hashlib_lookup(const HashTable * table_ptr,const uint8_t * key_ptr,uint8_t ** value_pptr)1224 hashlib_lookup(
1225     const HashTable    *table_ptr,
1226     const uint8_t      *key_ptr,
1227     uint8_t           **value_pptr)
1228 {
1229     const HashBlock *block_ptr;
1230     uint8_t *entry_ptr = NULL;
1231     unsigned int k;
1232 
1233     assert(table_ptr);
1234 
1235     HASH_STAT_INCR(table_ptr, lookups);
1236 
1237     if (table_ptr->is_sorted) {
1238         TRACEMSG(1,(TRC_FMT "Attempt to lookup in a sorted HashTable",
1239                     TRC_ARG(table_ptr)));
1240         assert(0 == table_ptr->is_sorted);
1241         return ERR_SORTTABLE;
1242     }
1243 
1244     /* Look in each block for the key */
1245     for (k = 0; k < table_ptr->num_blocks; ++k) {
1246         block_ptr = table_ptr->block_ptrs[k];
1247         if (hashlib_block_find_entry(block_ptr, key_ptr, &entry_ptr) == OK) {
1248             /* Return pointer to the value in the entry structure */
1249             *value_pptr = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
1250             return OK;
1251         }
1252     }
1253     return ERR_NOTFOUND;
1254 }
1255 
1256 
1257 /*
1258  *    If not found, points to insertion point,
1259  */
1260 static int
hashlib_block_find_entry(const HashBlock * block_ptr,const uint8_t * key_ptr,uint8_t ** entry_pptr)1261 hashlib_block_find_entry(
1262     const HashBlock    *block_ptr,
1263     const uint8_t      *key_ptr,
1264     uint8_t           **entry_pptr)
1265 {
1266 #ifdef HASHLIB_RECORD_STATS
1267     int first_check = 1;
1268 #endif
1269 #ifndef NDEBUG
1270     uint32_t num_tries = 0;
1271 #endif
1272     uint64_t hash_index;
1273     uint64_t hash_value;
1274     uint64_t hash_probe_increment;
1275     /* seeds for the hashing function */
1276     uint32_t hash_primary = 0x53694c4b;
1277     uint32_t hash_secondary = 0x4361726e;
1278 
1279     HASH_STAT_INCR(block_ptr, find_entries);
1280 
1281     /*
1282      *  First this code computes the hash for the key, the
1283      *  'hash_value'.
1284      *
1285      *  The 'hash_value' is masked by the size of the block to
1286      *  determine which bucket to check (the 'hash_index').  Since the
1287      *  block size is a power of 2, masking can be used as a modulo
1288      *  operation.
1289      *
1290      *  If the bucket is empty, this function is done; the function
1291      *  passes back a handle to that bucket and returns ERR_NOTFOUND.
1292      *
1293      *  If the bucket is not empty, the function checks to see if
1294      *  bucket's key matches the key passed into the function.  The
1295      *  comparison is done by memcmp()ing the keys.  If the keys
1296      *  match, the entry is found; the function passes back a handle
1297      *  the bucket and returns OK.
1298      *
1299      *  If the keys do not match, it means multiple keys return the
1300      *  same hash value; that is, there is a collision.  A new bucket
1301      *  is selected by incrementing the 'hash_value' by the
1302      *  'hash_probe_increment' value and masking the result by the
1303      *  block size.
1304      *
1305      *  The process repeats until either an empty bucket is found or
1306      *  the keys match.
1307      *
1308      *  This collision resolution mechanism is what makes removal
1309      *  impossible.  To allow removal, we would need either to define
1310      *  a special "deleted-entry" value or to rehash the table after
1311      *  each deletion.
1312      *
1313      *  Collision resolution is also why the caller should never
1314      *  update a bucket's value to the no_value_ptr value---though
1315      *  there is no way for the hashlib code to enforce this.
1316      */
1317     hash(key_ptr, HASH_GET_KEY_LEN(block_ptr), &hash_primary, &hash_secondary);
1318     hash_value = hash_primary + (((uint64_t)hash_secondary) << UINT64_C(32));
1319     hash_probe_increment = hash_value | 0x01; /* must be odd */
1320     for (;;) {
1321         hash_index = hash_value & (block_ptr->max_entries - 1);
1322 
1323         *entry_pptr = HASH_ENTRY_AT(block_ptr, hash_index);
1324         if (HASHENTRY_ISEMPTY(block_ptr, *entry_pptr)) {
1325             /* Hit an empty entry, we're done. */
1326             return ERR_NOTFOUND;
1327         }
1328         /* compare the keys */
1329         if (0 == memcmp(HASHENTRY_GET_KEY(block_ptr, *entry_pptr),
1330                         key_ptr, HASH_GET_KEY_LEN(block_ptr)))
1331         {
1332             /* Found a match, we're done */
1333             return OK;
1334         }
1335 
1336         /* increment the hash value */
1337         hash_value += hash_probe_increment;
1338         assert(++num_tries < block_ptr->max_entries);
1339 #ifdef HASHLIB_RECORD_STATS
1340         if (first_check) {
1341             first_check = 0;
1342             HASH_STAT_INCR(block_ptr, find_collisions);
1343         }
1344         HASH_STAT_INCR(block_ptr, collision_hops);
1345 #endif  /* HASHLIB_RECORD_STATS */
1346     }
1347 }
1348 
1349 
1350 #ifdef HASHLIB_RECORD_STATS
1351 void
hashlib_clear_stats(HashTable * table_ptr)1352 hashlib_clear_stats(
1353     HashTable          *table_ptr)
1354 {
1355     if (table_ptr) {
1356         memset(&table_ptr->ht_stats, 0, sizeof(table_ptr->ht_stats));
1357     }
1358 }
1359 
1360 void
hashlib_get_stats(const HashTable * table_ptr,hashlib_stats_t * hash_stats)1361 hashlib_get_stats(
1362     const HashTable    *table_ptr,
1363     hashlib_stats_t    *hash_stats)
1364 {
1365     if (table_ptr && hash_stats) {
1366         memcpy(hash_stats, &table_ptr->ht_stats, sizeof(table_ptr->ht_stats));
1367     }
1368 }
1369 
1370 void
hashlib_print_stats(FILE * fp,const HashTable * table_ptr)1371 hashlib_print_stats(
1372     FILE               *fp,
1373     const HashTable    *table_ptr)
1374 {
1375     const hashlib_stats_t *hts = &table_ptr->ht_stats;
1376 
1377     fprintf(fp, "Accumulated hashtable statistics:\n");
1378     fprintf(fp, "  %" PRIu32 " total allocations.\n",   hts->blocks_allocated);
1379     fprintf(fp, "  %" PRIu64 " total inserts.\n",       hts->inserts);
1380     fprintf(fp, "  %" PRIu64 " total lookups.\n",       hts->lookups);
1381     fprintf(fp, "  %" PRIu32 " total rehashes.\n",      hts->rehashes);
1382     fprintf(fp, "  %" PRIu64 " inserts due to rehashing.\n",
1383             hts->rehash_inserts);
1384     fprintf(fp, "  %" PRIu64 " total finds.\n",         hts->find_entries);
1385     fprintf(fp, "  %" PRIu64 " total find collisions.\n",
1386             hts->find_collisions);
1387     fprintf(fp, "  %" PRIu64 " total collision hops.\n",
1388             hts->collision_hops);
1389 }
1390 #endif /* HASHLIB_RECORD_STATS */
1391 
1392 
1393 HASH_ITER
hashlib_create_iterator(const HashTable UNUSED (* table_ptr))1394 hashlib_create_iterator(
1395     const HashTable UNUSED(*table_ptr))
1396 {
1397     HASH_ITER iter;
1398 
1399     memset(&iter, 0, sizeof(HASH_ITER));
1400     iter.block = HASH_ITER_BEGIN;
1401     return iter;
1402 }
1403 
1404 
1405 int
hashlib_iterate(const HashTable * table_ptr,HASH_ITER * iter_ptr,uint8_t ** key_pptr,uint8_t ** val_pptr)1406 hashlib_iterate(
1407     const HashTable    *table_ptr,
1408     HASH_ITER          *iter_ptr,
1409     uint8_t           **key_pptr,
1410     uint8_t           **val_pptr)
1411 {
1412 #if TRACEMSG_LEVEL >= 2
1413     static uint64_t so_far = 0;
1414 #endif
1415     HashBlock *block_ptr;
1416     uint8_t *entry_ptr;
1417 
1418     if (iter_ptr->block == HASH_ITER_END) {
1419         return ERR_NOMOREENTRIES;
1420     }
1421 
1422     if (table_ptr->is_sorted && table_ptr->num_blocks > 1) {
1423         /* Use sorted iterator if we should */
1424         return hashlib_iterate_sorted(table_ptr, iter_ptr, key_pptr, val_pptr);
1425     }
1426 
1427     /* Start at the first entry in the first block or increment the
1428      * iterator to start looking at the next entry. */
1429     if (iter_ptr->block == HASH_ITER_BEGIN) {
1430         /* Initialize the iterator. */
1431         memset(iter_ptr, 0, sizeof(HASH_ITER));
1432 #if TRACEMSG_LEVEL >= 2
1433         TRACEMSG(2,(TRC_FMT "Iterate. Starting to iterate over HashTable...",
1434                     TRC_ARG(table_ptr)));
1435         so_far = 0;
1436 #endif
1437     } else {
1438         ++iter_ptr->index;
1439     }
1440 
1441     /* Walk through indices of current block until we find a
1442      * non-empty.  Once we reach the end of the block, move on to the
1443      * next block. */
1444     while (iter_ptr->block < table_ptr->num_blocks) {
1445 
1446         /* Select the current block */
1447         block_ptr = table_ptr->block_ptrs[iter_ptr->block];
1448 
1449         /* Find the next non-empty entry in the current block (if
1450          * there is one). */
1451         for (entry_ptr = HASH_ENTRY_AT(block_ptr, iter_ptr->index);
1452              iter_ptr->index < block_ptr->max_entries;
1453              ++iter_ptr->index, entry_ptr += HASH_GET_ENTRY_LEN(block_ptr))
1454         {
1455             if (!HASHENTRY_ISEMPTY(block_ptr, entry_ptr)) {
1456                 /* We found an entry, return it */
1457                 *key_pptr = HASHENTRY_GET_KEY(block_ptr, entry_ptr);
1458                 *val_pptr = HASHENTRY_GET_VALUE(block_ptr, entry_ptr);
1459 #if TRACEMSG_LEVEL >= 2
1460                 ++so_far;
1461 #endif
1462                 return OK;
1463             }
1464         }
1465 
1466         /* At the end of the block. */
1467         TRACEMSG(2,((TRC_FMT "Iterate. Finished block #%u containing %" PRIu64
1468                      " entries. Total visted %" PRIu64),
1469                     TRC_ARG(table_ptr), iter_ptr->block,
1470                     block_ptr->num_entries, so_far));
1471 
1472         /* try the next block */
1473         ++iter_ptr->block;
1474         iter_ptr->index = 0;
1475     }
1476 
1477     /* We're past the last entry of the last block, so we're done. */
1478     *key_pptr = NULL;
1479     *val_pptr = NULL;
1480     iter_ptr->block = HASH_ITER_END;
1481     TRACEMSG(2,(TRC_FMT "Iterate. No more entries. Total visited %"
1482                 PRIu64, TRC_ARG(table_ptr), so_far));
1483 
1484     return ERR_NOMOREENTRIES;
1485 }
1486 
1487 
1488 static int
hashlib_iterate_sorted(const HashTable * table_ptr,HASH_ITER * iter_ptr,uint8_t ** key_pptr,uint8_t ** val_pptr)1489 hashlib_iterate_sorted(
1490     const HashTable    *table_ptr,
1491     HASH_ITER          *iter_ptr,
1492     uint8_t           **key_pptr,
1493     uint8_t           **val_pptr)
1494 {
1495     uint8_t *lowest_entry = NULL;
1496     unsigned int k;
1497 
1498     assert(iter_ptr->block != HASH_ITER_END);
1499 
1500     /* Start at the first entry in the first block or increment the
1501      * iterator to start looking at the next entry. */
1502     if (iter_ptr->block == HASH_ITER_BEGIN) {
1503         /* Initialize the iterator. */
1504         memset(iter_ptr, 0, sizeof(HASH_ITER));
1505         TRACEMSG(2,((TRC_FMT "Iterate. Starting to iterate"
1506                      " over sorted HashTable..."), TRC_ARG(table_ptr)));
1507     } else {
1508         /* Increment the pointer in the block from which we took the
1509          * entry last time. */
1510         ++iter_ptr->block_idx[iter_ptr->block];
1511     }
1512 
1513     /* Find the first available value across all blocks; this is our
1514      * arbitrary "lowest" value. */
1515     for (k = 0; k < table_ptr->num_blocks; ++k) {
1516         if (iter_ptr->block_idx[k] < table_ptr->block_ptrs[k]->num_entries) {
1517             iter_ptr->block = k;
1518             lowest_entry = HASH_ENTRY_AT(table_ptr->block_ptrs[k],
1519                                          iter_ptr->block_idx[k]);
1520             break;
1521         }
1522     }
1523 
1524     if (k == table_ptr->num_blocks) {
1525         /* We've processed all blocks.  Done. */
1526         *key_pptr = NULL;
1527         *val_pptr = NULL;
1528         iter_ptr->block = HASH_ITER_END;
1529         TRACEMSG(2,(TRC_FMT "Iterate. No more entries.", TRC_ARG(table_ptr)));
1530         return ERR_NOMOREENTRIES;
1531     }
1532 
1533     /* Compare our arbitrary "lowest" with every remaining block to
1534      * find the actual lowest. */
1535     for ( ++k; k < table_ptr->num_blocks; ++k) {
1536         if ((iter_ptr->block_idx[k] < table_ptr->block_ptrs[k]->num_entries)
1537             && (table_ptr->cmp_fn(HASH_ENTRY_AT(table_ptr->block_ptrs[k],
1538                                                 iter_ptr->block_idx[k]),
1539                                   lowest_entry,
1540                                   table_ptr->cmp_userdata)
1541                 < 0))
1542         {
1543             iter_ptr->block = k;
1544             lowest_entry = HASH_ENTRY_AT(table_ptr->block_ptrs[k],
1545                                          iter_ptr->block_idx[k]);
1546         }
1547     }
1548 
1549     /* return lowest */
1550     *key_pptr = HASHENTRY_GET_KEY(table_ptr, lowest_entry);
1551     *val_pptr = HASHENTRY_GET_VALUE(table_ptr, lowest_entry);
1552     return OK;
1553 }
1554 
1555 
1556 uint64_t
hashlib_count_buckets(const HashTable * table_ptr)1557 hashlib_count_buckets(
1558     const HashTable    *table_ptr)
1559 {
1560     unsigned int k;
1561     uint64_t total = 0;
1562 
1563     for (k = 0; k < table_ptr->num_blocks; ++k) {
1564         total += table_ptr->block_ptrs[k]->max_entries;
1565     }
1566     return total;
1567 }
1568 
1569 
1570 uint64_t
hashlib_count_entries(const HashTable * table_ptr)1571 hashlib_count_entries(
1572     const HashTable    *table_ptr)
1573 {
1574     unsigned int k;
1575     uint64_t total = 0;
1576 
1577     for (k = 0; k < table_ptr->num_blocks; ++k) {
1578         total += table_ptr->block_ptrs[k]->num_entries;
1579         TRACEMSG(3,((TRC_FMT "entry count for block #%u is %" PRIu64 "."),
1580                     TRC_ARG(table_ptr), k,
1581                     table_ptr->block_ptrs[k]->num_entries));
1582     }
1583     return total;
1584 }
1585 
1586 
1587 uint64_t
hashlib_count_nonempties(const HashTable * table_ptr)1588 hashlib_count_nonempties(
1589     const HashTable    *table_ptr)
1590 {
1591     const HashBlock *block_ptr;
1592     const uint8_t *entry_ptr;
1593     unsigned int k;
1594     uint64_t total;
1595     uint64_t count;
1596     uint64_t i;
1597 
1598     total = 0;
1599     for (k = 0; k < table_ptr->num_blocks; ++k) {
1600         block_ptr = table_ptr->block_ptrs[k];
1601         count = 0;
1602         for (i = 0, entry_ptr = HASH_ENTRY_AT(block_ptr, 0);
1603              i < block_ptr->max_entries;
1604              ++i, entry_ptr += HASH_GET_ENTRY_LEN(block_ptr))
1605         {
1606             if (!HASHENTRY_ISEMPTY(block_ptr, entry_ptr)) {
1607                 ++count;
1608             }
1609         }
1610         total += count;
1611         TRACEMSG(2,((TRC_FMT "nonempty count for block #%u is %" PRIu64 "."),
1612                     TRC_ARG(table_ptr), k, count));
1613     }
1614     return total;
1615 }
1616 
1617 
1618 /*
1619  *    Callback function used by hashlib_sort_entries().
1620  *
1621  *    Compare keys in 'a' and 'b' where the length of the keys is
1622  *    given by 'v_length'.
1623  */
1624 static int
hashlib_memcmp_keys(const void * a,const void * b,void * v_length)1625 hashlib_memcmp_keys(
1626     const void         *a,
1627     const void         *b,
1628     void               *v_length)
1629 {
1630     return memcmp(a, b, *(size_t*)v_length);
1631 }
1632 
1633 
1634 /*
1635  *    move the entries in each block to the front of the block, in
1636  *    preparation for sorting the entries
1637  */
1638 static void
hashlib_make_contiguous(HashTable * table_ptr)1639 hashlib_make_contiguous(
1640     HashTable          *table_ptr)
1641 {
1642     HashBlock *block_ptr;
1643     uint64_t i, j;
1644     unsigned int k;
1645     uint8_t *entry_i;
1646     uint8_t *entry_j;
1647     const uint64_t entry_len = HASH_GET_ENTRY_LEN(table_ptr);
1648     const uint64_t value_len = HASH_GET_VALUE_LEN(table_ptr);
1649 
1650     TRACEMSG(1,(TRC_FMT "Making the HashTable contiguous...",
1651                 TRC_ARG(table_ptr)));
1652 
1653     for (k = 0; k < table_ptr->num_blocks; ++k) {
1654         TRACEMSG(2,(TRC_FMT "Making block #%u contiguous",
1655                     TRC_ARG(table_ptr), k));
1656         block_ptr = table_ptr->block_ptrs[k];
1657         if (0 == block_ptr->num_entries) {
1658             continue;
1659         }
1660 
1661         /* 'j' starts at the front of the block and moves forward to
1662          * find empty entries.  'i' starts at the end of the block
1663          * and moves backward to find occupied entries.  We move
1664          * non-empty entries from 'i' to 'j' to get rid of holes in
1665          * the block.  Stop once i and j meet. */
1666         for (j = 0, entry_j = HASH_ENTRY_AT(block_ptr, 0);
1667              j < block_ptr->max_entries;
1668              ++j, entry_j += entry_len)
1669         {
1670             if (HASHENTRY_ISEMPTY(block_ptr, entry_j)) {
1671                 break;
1672             }
1673         }
1674 
1675         for (i = block_ptr->max_entries-1, entry_i =HASH_ENTRY_AT(block_ptr,i);
1676              i > j;
1677              --i, entry_i -= entry_len)
1678         {
1679             if (!HASHENTRY_ISEMPTY(block_ptr, entry_i)) {
1680                 memcpy(entry_j, entry_i, entry_len);
1681                 /* set i to the empty value */
1682                 memcpy(HASHENTRY_GET_VALUE(block_ptr, entry_i),
1683                        table_ptr->no_value_ptr, value_len);
1684                 /* find next empty value */
1685                 for (++j, entry_j += entry_len;
1686                      j < i;
1687                      ++j, entry_j += entry_len)
1688                 {
1689                     if (HASHENTRY_ISEMPTY(block_ptr, entry_j)) {
1690                         break;
1691                     }
1692                 }
1693             }
1694         }
1695         assert(j <= block_ptr->num_entries);
1696     }
1697     TRACEMSG(1,(TRC_FMT "Made the HashTable contiguous.", TRC_ARG(table_ptr)));
1698 }
1699 
1700 
1701 int
hashlib_sort_entries_usercmp(HashTable * table_ptr,hashlib_sort_key_cmp_fn cmp_fn,void * cmp_userdata)1702 hashlib_sort_entries_usercmp(
1703     HashTable              *table_ptr,
1704     hashlib_sort_key_cmp_fn cmp_fn,
1705     void                   *cmp_userdata)
1706 {
1707     HashBlock *block_ptr;
1708     const size_t entry_len = HASH_GET_ENTRY_LEN(table_ptr);
1709     unsigned int k;
1710 
1711     TRACEMSG(1,(TRC_FMT "Sorting the HashTable...", TRC_ARG(table_ptr)));
1712 
1713     if (NULL == cmp_fn) {
1714         return ERR_BADARGUMENT;
1715     }
1716     if (!table_ptr->is_sorted) {
1717         /* first call; make the data in each block contiguous */
1718         hashlib_make_contiguous(table_ptr);
1719     }
1720 
1721     table_ptr->cmp_fn = cmp_fn;
1722     table_ptr->cmp_userdata = cmp_userdata;
1723 
1724     /* we use qsort to sort each block individually; when iterating,
1725      * return the lowest value among all sorted blocks. */
1726     for (k = 0; k < table_ptr->num_blocks; ++k) {
1727         TRACEMSG(2,(TRC_FMT "Sorting block #%u...", TRC_ARG(table_ptr), k));
1728         block_ptr = table_ptr->block_ptrs[k];
1729         /* sort the block's entries */
1730         skQSort_r(block_ptr->data_ptr, block_ptr->num_entries,
1731                   entry_len, table_ptr->cmp_fn, table_ptr->cmp_userdata);
1732     }
1733 
1734     TRACEMSG(1,(TRC_FMT "Sorted the HashTable.", TRC_ARG(table_ptr)));
1735 
1736     table_ptr->is_sorted = 1;
1737     return 0;
1738 }
1739 
1740 int
hashlib_sort_entries(HashTable * table_ptr)1741 hashlib_sort_entries(
1742     HashTable          *table_ptr)
1743 {
1744     /* pass the key length in the context pointer */
1745     table_ptr->keylen_cmp_userdata = HASH_GET_KEY_LEN(table_ptr);
1746     return hashlib_sort_entries_usercmp(table_ptr, &hashlib_memcmp_keys,
1747                                         &table_ptr->keylen_cmp_userdata);
1748 }
1749 
1750 
1751 
1752 /*
1753  *  ********************************************************************
1754  *  DEBUGGING FUNCTIONS FOR PRINTING INFO ABOUT A TABLE
1755  *  ********************************************************************
1756  */
1757 
1758 static void
hashlib_dump_bytes(FILE * fp,const uint8_t * data_ptr,uint64_t data_len)1759 hashlib_dump_bytes(
1760     FILE               *fp,
1761     const uint8_t      *data_ptr,
1762     uint64_t            data_len)
1763 {
1764     uint64_t j;
1765 
1766     for (j = 0; j < data_len; ++j) {
1767         fprintf(fp, "%02x ", data_ptr[j]);
1768     }
1769 }
1770 
1771 
1772 static void
hashlib_dump_block_header(FILE * fp,const HashBlock * block_ptr)1773 hashlib_dump_block_header(
1774     FILE               *fp,
1775     const HashBlock    *block_ptr)
1776 {
1777     /* Dump header info */
1778     fprintf(fp, ("Block size: \t %" PRIu64 "\n"), block_ptr->max_entries);
1779     fprintf(fp, ("Num entries:\t %" PRIu64 " (%2.0f%% full)\n"),
1780             block_ptr->num_entries,
1781             100 * (float) block_ptr->num_entries / block_ptr->max_entries);
1782     fprintf(fp, "Key width:\t %u bytes\n", block_ptr->table->key_len);
1783     fprintf(fp, "Value width:\t %u bytes\n", block_ptr->table->value_len);
1784     fprintf(fp, "Load factor:\t %u = %2.0f%%\n",
1785             block_ptr->table->load_factor,
1786             100 * (float) block_ptr->table->load_factor / 255);
1787     fprintf(fp, "Empty value representation: ");
1788     hashlib_dump_bytes(fp, block_ptr->table->no_value_ptr,
1789                        block_ptr->table->value_len);
1790     fprintf(fp, "\n");
1791 }
1792 
1793 
1794 static void
hashlib_dump_block(FILE * fp,const HashBlock * block_ptr)1795 hashlib_dump_block(
1796     FILE               *fp,
1797     const HashBlock    *block_ptr)
1798 {
1799     uint64_t i;                 /* Index of into hash table */
1800     uint64_t entry_index = 0;
1801     const uint8_t *entry_ptr;
1802 
1803     hashlib_dump_block_header(fp, block_ptr);
1804     fprintf(fp, "Data Dump:\n");
1805     fprintf(fp, "----------\n");
1806     for (i = 0; i < block_ptr->max_entries; ++i) {
1807         entry_ptr = HASH_ENTRY_AT(block_ptr, i);
1808         /* Don't dump empty entries */
1809         if (HASHENTRY_ISEMPTY(block_ptr, entry_ptr)) {
1810             continue;
1811         }
1812         ++entry_index;
1813 
1814         /* Dump hash index in table, the key and the value */
1815         fprintf(fp, ("%6" PRIu64 " (%" PRIu64 "). "), entry_index, i);
1816         hashlib_dump_bytes(fp, HASHENTRY_GET_KEY(block_ptr, entry_ptr),
1817                            HASH_GET_KEY_LEN(block_ptr));
1818         fprintf(fp, " -- ");
1819         hashlib_dump_bytes(fp, HASHENTRY_GET_VALUE(block_ptr, entry_ptr),
1820                            HASH_GET_VALUE_LEN(block_ptr));
1821         fprintf(fp, "\n");
1822     }
1823 }
1824 
1825 
1826 void
hashlib_dump_table(FILE * fp,const HashTable * table_ptr)1827 hashlib_dump_table(
1828     FILE               *fp,
1829     const HashTable    *table_ptr)
1830 {
1831     unsigned int k;
1832 
1833     hashlib_dump_table_header(fp, table_ptr);
1834     for (k = 0; k < table_ptr->num_blocks; ++k) {
1835         fprintf(fp, "Block #%u:\n", k);
1836         hashlib_dump_block(fp, table_ptr->block_ptrs[k]);
1837     }
1838 }
1839 
1840 
1841 void
hashlib_dump_table_header(FILE * fp,const HashTable * table_ptr)1842 hashlib_dump_table_header(
1843     FILE               *fp,
1844     const HashTable    *table_ptr)
1845 {
1846     unsigned int k;
1847     HashBlock *block_ptr;
1848     uint64_t total_used_memory = 0;
1849     uint64_t total_data_memory = 0;
1850 
1851     /* Dump header info */
1852     fprintf(fp, "Key width:\t %u bytes\n", table_ptr->key_len);
1853     fprintf(fp, "Value width:\t %d bytes\n", table_ptr->value_len);
1854     fprintf(fp, "Empty value:\t");
1855     hashlib_dump_bytes(fp, table_ptr->no_value_ptr, table_ptr->value_len);
1856     fprintf(fp, "\n");
1857     fprintf(fp, "Load factor:\t %d = %2.0f%%\n",
1858             table_ptr->load_factor,
1859             100 * (float) table_ptr->load_factor / 255);
1860     fprintf(fp, ("Table has %" PRIu8 " blocks:\n"), table_ptr->num_blocks);
1861     for (k = 0; k < table_ptr->num_blocks; ++k) {
1862         block_ptr = table_ptr->block_ptrs[k];
1863         total_data_memory +=
1864             HASH_GET_ENTRY_LEN(block_ptr) * block_ptr->max_entries;
1865         total_used_memory +=
1866             HASH_GET_ENTRY_LEN(block_ptr) * block_ptr->num_entries;
1867         fprintf(fp, ("  Block #%u: %" PRIu64 "/%" PRIu64 " (%3.1f%%)\n"),
1868                 k, block_ptr->num_entries, block_ptr->max_entries,
1869                 100 * ((float)block_ptr->num_entries) / block_ptr->max_entries);
1870     }
1871     fprintf(fp, ("Total data memory:           %" PRIu64 " bytes\n"),
1872             total_data_memory);
1873     fprintf(fp, ("Total allocated data memory: %" PRIu64 " bytes\n"),
1874             total_used_memory);
1875     fprintf(fp, ("Excess data memory:          %" PRIu64 " bytes\n"),
1876             total_data_memory - total_used_memory);
1877     fprintf(fp, "\n");
1878 }
1879 
1880 
1881 /*
1882 ** Local Variables:
1883 ** mode:c
1884 ** indent-tabs-mode:nil
1885 ** c-basic-offset:4
1886 ** End:
1887 */
1888