1 //--------------------------------------------------------------------------
2 //
3 // File:           hash.c
4 //
5 // Purpose:        Hash table implementation
6 //
7 // Author:         Jerry Coffin
8 //
9 // Description:    Public domain code by Jerry Coffin, with improvements by
10 //                 HenkJan Wolthuis.
11 //                 Date last modified: 05-Jul-1997
12 //
13 // Revisions:      18-09-2002 -- modified by Pavel Sakov
14 //
15 //--------------------------------------------------------------------------
16 
17 #include <string.h>
18 #include <stdlib.h>
19 #include <assert.h>
20 #include "hash.h"
21 
22 #define INT_PER_DOUBLE    2
23 
24 //* A hash table consists of an array of these buckets.
25 //
26 typedef struct ht_bucket
27 {
28     void * key;
29     void            * data;
30     int  id;                    // unique id -- just in case
31     struct ht_bucket* next;
32 } ht_bucket;
33 
34 //* Hash table structure.
35 // Note that more nodes than `size' can be inserted in the table,
36 // but performance degrades as this happens.
37 //
38 struct hashtable
39 {
40     int         size;           // table size
41     int         n;              // current number of entries
42     int         naccum;         // number of inserted entries
43     int         nhash;          // number of used table elements
44     ht_keycp    cp;
45     ht_keyeq    eq;
46     ht_key2hash hash;
47     ht_bucket   ** table;
48 };
49 
50 int d1eq( void* key1, void* key2 );
51 
52 // Creates a hashtable of specified size.
53 //
ht_create(int size,ht_keycp cp,ht_keyeq eq,ht_key2hash hash)54 hashtable* ht_create( int size, ht_keycp cp, ht_keyeq eq, ht_key2hash hash )
55 {
56     hashtable* table = malloc( sizeof ( hashtable ) );
57     ht_bucket** bucket;
58     int      i;
59 
60     assert( sizeof ( double ) == INT_PER_DOUBLE * sizeof ( int ) );
61     //
62     // (used in d1hash() and d2hash())
63     //
64 
65     if ( table == NULL )
66         return NULL;
67 
68     if ( size <= 0 )
69     {
70         free( table );
71         return NULL;
72     }
73 
74     table->size  = size;
75     table->table = malloc( sizeof ( ht_bucket* ) * (size_t) size );
76     bucket       = table->table;
77 
78     if ( bucket == NULL )
79     {
80         free( table );
81         return NULL;
82     }
83 
84     for ( i = 0; i < size; ++i )
85         bucket[i] = NULL;
86     table->n      = 0;
87     table->naccum = 0;
88     table->nhash  = 0;
89     table->eq     = eq;
90     table->cp     = cp;
91     table->hash   = hash;
92 
93     return table;
94 }
95 
96 // Destroys a hash table.
97 // (Take care of deallocating data by ht_process() prior to destroying the
98 // table if necessary.)
99 //
100 // @param table Hash table to be destroyed
101 //
ht_destroy(hashtable * table)102 void ht_destroy( hashtable* table )
103 {
104     int i;
105 
106     if ( table == NULL )
107         return;
108 
109     for ( i = 0; i < table->size; ++i )
110     {
111         ht_bucket* bucket;
112 
113         for ( bucket = ( table->table )[i]; bucket != NULL; )
114         {
115             ht_bucket* prev = bucket;
116 
117             free( bucket->key );
118             bucket = bucket->next;
119             free( prev );
120         }
121     }
122 
123     free( table->table );
124     free( table );
125 }
126 
127 // Inserts a new entry into the hash table.
128 //
129 // @param table The hash table
130 // @param key Ponter to entry's key
131 // @param data Pointer to associated data
132 // @return Pointer to the old data associated with the key, NULL if the key
133 //         wasn't in the table previously
134 //
ht_insert(hashtable * table,void * key,void * data)135 void* ht_insert( hashtable* table, void* key, void* data )
136 {
137     unsigned int val = table->hash( key ) % (unsigned int) table->size;
138     ht_bucket    * bucket;
139 
140     //
141     // NULL means this bucket hasn't been used yet.  We'll simply allocate
142     // space for our new bucket and put our data there, with the table
143     // pointing at it.
144     //
145     if ( ( table->table )[val] == NULL )
146     {
147         bucket = malloc( sizeof ( ht_bucket ) );
148         if ( bucket == NULL )
149             return NULL;
150 
151         bucket->key  = table->cp( key );
152         bucket->next = NULL;
153         bucket->data = data;
154         bucket->id   = table->naccum;
155 
156         ( table->table )[val] = bucket;
157         table->n++;
158         table->naccum++;
159         table->nhash++;
160 
161         return bucket->data;
162     }
163 
164     //
165     // This spot in the table is already in use.  See if the current string
166     // has already been inserted, and if so, return corresponding data.
167     //
168     for ( bucket = ( table->table )[val]; bucket != NULL; bucket = bucket->next )
169         if ( table->eq( key, bucket->key ) == 1 )
170         {
171             void* old_data = bucket->data;
172 
173             bucket->data = data;
174             bucket->id   = table->naccum;
175             table->naccum++;
176 
177             return old_data;
178         }
179 
180     //
181     // This key must not be in the table yet.  We'll add it to the head of
182     // the list at this spot in the hash table.  Speed would be slightly
183     // improved if the list was kept sorted instead.  In this case, this
184     // code would be moved into the loop above, and the insertion would take
185     // place as soon as it was determined that the present key in the list
186     // was larger than this one.
187     //
188     bucket = (ht_bucket *) malloc( sizeof ( ht_bucket ) );
189     if ( bucket == NULL )
190         return 0;
191     bucket->key  = table->cp( key );
192     bucket->data = data;
193     bucket->next = ( table->table )[val];
194     bucket->id   = table->naccum;
195 
196     ( table->table )[val] = bucket;
197     table->n++;
198     table->naccum++;
199 
200     return data;
201 }
202 
203 // Returns a pointer to the data associated with a key.  If the key has
204 // not been inserted in the table, returns NULL.
205 //
206 // @param table The hash table
207 // @param key The key
208 // @return The associated data or NULL
209 //
ht_find(hashtable * table,void * key)210 void* ht_find( hashtable* table, void* key )
211 {
212     unsigned int val = table->hash( key ) % (unsigned int) table->size;
213     ht_bucket    * bucket;
214 
215     if ( ( table->table )[val] == NULL )
216         return NULL;
217 
218     for ( bucket = ( table->table )[val]; bucket != NULL; bucket = bucket->next )
219         if ( table->eq( key, bucket->key ) == 1 )
220             return bucket->data;
221 
222     return NULL;
223 }
224 
225 // Deletes an entry from the table.  Returns a pointer to the data that
226 // was associated with the key so that the calling code can dispose it
227 // properly.
228 //
229 // @param table The hash table
230 // @param key The key
231 // @return The associated data or NULL
232 //
ht_delete(hashtable * table,void * key)233 void* ht_delete( hashtable* table, void* key )
234 {
235     unsigned int val = table->hash( key ) % (unsigned int) table->size;
236     ht_bucket    * prev;
237     ht_bucket    * bucket;
238     void         * data;
239 
240     if ( ( table->table )[val] == NULL )
241         return NULL;
242 
243     //
244     // Traverse the list, keeping track of the previous node in the list.
245     // When we find the node to delete, we set the previous node's next
246     // pointer to point to the node after ourself instead.  We then delete
247     // the key from the present node, and return a pointer to the data it
248     // contains.
249     //
250     for ( prev = NULL, bucket = ( table->table )[val]; bucket != NULL; prev = bucket, bucket = bucket->next )
251     {
252         if ( table->eq( key, bucket->key ) == 1 )
253         {
254             data = bucket->data;
255             if ( prev != NULL )
256                 prev->next = bucket->next;
257             else
258             {
259                 //
260                 // If 'prev' still equals NULL, it means that we need to
261                 // delete the first node in the list. This simply consists
262                 // of putting our own 'next' pointer in the array holding
263                 // the head of the list.  We then dispose of the current
264                 // node as above.
265                 //
266                 ( table->table )[val] = bucket->next;
267                 table->nhash--;
268             }
269             free( bucket->key );
270             free( bucket );
271             table->n--;
272 
273             return data;
274         }
275     }
276 
277     //
278     // If we get here, it means we didn't find the item in the table. Signal
279     // this by returning NULL.
280     //
281     return NULL;
282 }
283 
284 // For each entry, calls a specified function with corresponding data as a
285 // parameter.
286 //
287 // @param table The hash table
288 // @param func The action function
289 //
ht_process(hashtable * table,void (* func)(void *))290 void ht_process( hashtable* table, void ( *func )( void* ) )
291 {
292     int i;
293 
294     for ( i = 0; i < table->size; ++i )
295         if ( ( table->table )[i] != NULL )
296         {
297             ht_bucket* bucket;
298 
299             for ( bucket = ( table->table )[i]; bucket != NULL; bucket = bucket->next )
300                 func( bucket->data );
301         }
302 }
303 
304 //
305 // functions for for string keys
306 //
307 
strhash(void * key)308 static unsigned int strhash( void* key )
309 {
310     char         * str     = (char *) key;
311     unsigned int hashvalue = 0;
312 
313     while ( *str != 0 )
314     {
315         hashvalue  ^= *(unsigned int *) str;
316         hashvalue <<= 1;
317         str++;
318     }
319 
320     return hashvalue;
321 }
322 
strcp(void * key)323 static void* strcp( void* key )
324 {
325     return (void *) strdup( (const char *) key );
326 }
327 
streq(void * key1,void * key2)328 static int streq( void* key1, void* key2 )
329 {
330     return !strcmp( key1, key2 );
331 }
332 
333 // functions for for double keys
334 
d1hash(void * key)335 static unsigned int d1hash( void* key )
336 {
337     unsigned int* v = (unsigned int *) key;
338 
339 #if INT_PER_DOUBLE == 2
340     return v[0] + v[1];
341 #else
342 #error not implemented
343 #endif
344 }
345 
d1cp(void * key)346 static void* d1cp( void* key )
347 {
348     double* newkey = malloc( sizeof ( double ) );
349 
350     *newkey = *(double *) key;
351 
352     return newkey;
353 }
354 
d1eq(void * key1,void * key2)355 int d1eq( void* key1, void* key2 )
356 {
357     return *(double *) key1 == *(double *) key2;
358 }
359 
360 //
361 // functions for for double[2] keys
362 //
363 
364 #include "math.h"
365 
d2hash(void * key)366 static unsigned int d2hash( void* key )
367 {
368     unsigned int* v = (unsigned int *) key;
369 
370 #if INT_PER_DOUBLE == 2
371     //
372     // PS: here multiplications suppose to make (a,b) and (b,a) generate
373     // different hash values
374     //
375     return v[0] + v[1] + v[2] * 3 + v[3] * 7;
376 #else
377 #error not implemented
378 #endif
379 }
380 
d2cp(void * key)381 static void* d2cp( void* key )
382 {
383     double* newkey = malloc( sizeof ( double ) * 2 );
384 
385     newkey[0] = ( (double *) key )[0];
386     newkey[1] = ( (double *) key )[1];
387 
388     return newkey;
389 }
390 
d2eq(void * key1,void * key2)391 static int d2eq( void* key1, void* key2 )
392 {
393     return ( ( (double *) key1 )[0] == ( (double *) key2 )[0] ) && ( ( (double *) key1 )[1] == ( (double *) key2 )[1] );
394 }
395 
ht_create_d1(int size)396 hashtable* ht_create_d1( int size )
397 {
398     return ht_create( size, d1cp, d1eq, d1hash );
399 }
400 
ht_create_d2(int size)401 hashtable* ht_create_d2( int size )
402 {
403     return ht_create( size, d2cp, d2eq, d2hash );
404 }
405 
ht_create_str(int size)406 hashtable* ht_create_str( int size )
407 {
408     return ht_create( size, strcp, streq, strhash );
409 }
410 
411 #ifdef HT_TEST
412 
413 #include <stdio.h>
414 #include <limits.h>
415 
416 #define BUFSIZE    1024
417 
print_double(void * data)418 static void print_double( void* data )
419 {
420     printf( " \"%d\"", (int) *(double *) data );
421 }
422 
print_string(void * data)423 static void print_string( void* data )
424 {
425     printf( " \"%s\"", (char *) data );
426 }
427 
main()428 int main()
429 {
430     double   points[] = {
431         922803.7855, 7372394.688,   0,
432         922849.2037, 7372307.027,   1,
433         922894.657,  7372219.306,   2,
434         922940.1475, 7372131.528,   3,
435         922985.6777, 7372043.692,   4,
436         923031.2501, 7371955.802,   5,
437         923076.8669, 7371867.857,   6,
438         923122.5307, 7371779.861,   7,
439         923168.2439, 7371691.816,   8,
440         923214.0091, 7371603.722,   9,
441         923259.8288, 7371515.583,  10,
442         922891.3958, 7372440.117,  11,
443         922936.873,  7372352.489,  12,
444         922982.3839, 7372264.804,  13,
445         923027.9308, 7372177.064,  14,
446         923073.5159, 7372089.268,  15,
447         923119.1415,  7372001.42,  16,
448         923164.8099, 7371913.521,  17,
449         923210.5233, 7371825.572,  18,
450         923256.2841, 7371737.575,  19,
451         923302.0946, 7371649.534,  20,
452         923347.9572,  7371561.45,  21,
453         922978.9747, 7372485.605,  22,
454         923024.5085, 7372398.009,  23,
455         923070.0748, 7372310.358,  24,
456         923115.6759, 7372222.654,  25,
457         923161.3136, 7372134.897,  26,
458         923206.9903,  7372047.09,  27,
459         923252.7079, 7371959.233,  28,
460         923298.4686,  7371871.33,  29,
461         923344.2745, 7371783.381,  30,
462         923390.1279, 7371695.389,  31,
463         923436.0309, 7371607.357,  32,
464         923066.5232, 7372531.148,  33,
465         923112.1115, 7372443.583,  34,
466         923157.7311, 7372355.966,  35,
467         923203.3842, 7372268.296,  36,
468         923249.0725, 7372180.577,  37,
469         923294.7981, 7372092.808,  38,
470         923340.5628, 7372004.993,  39,
471         923386.3686, 7371917.132,  40,
472         923432.2176, 7371829.229,  41,
473         923478.1116, 7371741.284,  42,
474         923524.0527, 7371653.302,  43,
475         923154.0423, 7372576.746,  44,
476         923199.6831, 7372489.211,  45,
477         923245.3541, 7372401.625,  46,
478         923291.0572, 7372313.989,  47,
479         923336.7941, 7372226.305,  48,
480         923382.5667, 7372138.574,  49,
481         923428.3766, 7372050.798,  50,
482         923474.2256, 7371962.978,  51,
483         923520.1155, 7371875.118,  52,
484         923566.0481, 7371787.218,  53,
485         923612.0252, 7371699.282,  54,
486         923241.533,  7372622.396,  55,
487         923287.2244, 7372534.889,  56,
488         923332.9449, 7372447.334,  57,
489         923378.6963, 7372359.731,  58,
490         923424.4801, 7372272.081,  59,
491         923470.2979, 7372184.385,  60,
492         923516.1513, 7372096.646,  61,
493         923562.0418, 7372008.866,  62,
494         923607.9709, 7371921.046,  63,
495         923653.9402, 7371833.188,  64,
496         923699.9514, 7371745.296,  65,
497         923328.9962, 7372668.095,  66,
498         923374.7365, 7372580.617,  67,
499         923420.5049, 7372493.091,  68,
500         923466.303,  7372405.519,  69,
501         923512.1321, 7372317.901,  70,
502         923557.9936,  7372230.24,  71,
503         923603.8889, 7372142.536,  72,
504         923649.8192, 7372054.793,  73,
505         923695.786,  7371967.011,  74,
506         923741.7905, 7371879.193,  75,
507         923787.8341, 7371791.342,  76,
508         923416.4327, 7372713.844,  77,
509         923462.2204, 7372626.393,  78,
510         923508.0353, 7372538.895,  79,
511         923553.8787, 7372451.353,  80,
512         923599.7517, 7372363.766,  81,
513         923645.6555, 7372276.137,  82,
514         923691.5914, 7372188.467,  83,
515         923737.5603, 7372100.757,  84,
516         923783.5634, 7372013.011,  85,
517         923829.6017, 7371925.231,  86,
518         923875.6763, 7371837.419,  87,
519         923503.8433,  7372759.64,  88,
520         923549.6771, 7372672.214,  89,
521         923595.5372, 7372584.744,  90,
522         923641.4246,  7372497.23,  91,
523         923687.3404, 7372409.673,  92,
524         923733.2855, 7372322.074,  93,
525         923779.2608, 7372234.436,  94,
526         923825.2672, 7372146.759,  95,
527         923871.3056, 7372059.047,  96,
528         923917.3766, 7371971.301,  97,
529         923963.4812, 7371883.524,  98,
530         923591.2288, 7372805.481,  99,
531         923637.1076, 7372718.081, 100,
532         923683.0118, 7372630.638, 101,
533         923728.9423, 7372543.151, 102,
534         923774.8998, 7372455.622, 103,
535         923820.8852, 7372368.052, 104,
536         923866.8991, 7372280.443, 105,
537         923912.9422, 7372192.797, 106,
538         923959.015,  7372105.116, 107,
539         924005.118,  7372017.402, 108,
540         924051.2518, 7371929.657, 109,
541         923678.5898, 7372851.367, 110,
542         923724.5126, 7372763.992, 111,
543         923770.46,   7372676.574, 112,
544         923816.4328, 7372589.113, 113,
545         923862.4314, 7372501.611, 114,
546         923908.4564, 7372414.069, 115,
547         923954.5083, 7372326.488, 116,
548         924000.5875,  7372238.87, 117,
549         924046.6941, 7372151.218, 118,
550         924092.8286, 7372063.533, 119,
551         924138.9911, 7371975.818, 120
552     };
553 
554     int      size = sizeof ( points ) / sizeof ( double ) / 3;
555     hashtable* ht;
556     int      i;
557 
558     //
559     // double[2] key
560     //
561 
562     printf( "\n1. Testing a table with key of double[2] type\n\n" );
563 
564     printf( "  creating a table..." );
565     ht = ht_create_d2( size );
566     printf( "done\n" );
567 
568     printf( "  inserting %d values from a file...", size );
569     for ( i = 0; i < size; ++i )
570         ht_insert( ht, &points[i * 3], &points[i * 3 + 2] );
571     printf( "done\n" );
572 
573     printf( "  stats:\n" );
574     printf( "    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
575     printf( "    %f entries per hash value in use\n", (double) ht->n / ht->nhash );
576 
577     printf( "  finding and printing each 10th data:\n" );
578     for ( i = 0; i < size; i += 10 )
579     {
580         double* point = &points[i * 3];
581         double* data  = ht_find( ht, point );
582 
583         if ( data != NULL )
584             printf( "    i = %d; data = \"%d\"\n", i, (int) *data );
585         else
586             printf( "    i = %d; data = <none>\n", i );
587     }
588 
589     printf( "  removing every 3rd element..." );
590     for ( i = 0; i < size; i += 3 )
591     {
592         double* point = &points[i * 3];
593         ht_delete( ht, point );
594     }
595     printf( "done\n" );
596 
597     printf( "  stats:\n" );
598     printf( "    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
599     printf( "    %f entries per hash value in use\n", (double) ht->n / ht->nhash );
600 
601     printf( "  finding and printing each 10th data:\n" );
602     for ( i = 0; i < size; i += 10 )
603     {
604         double* point = &points[i * 3];
605         double* data  = ht_find( ht, point );
606 
607         if ( data != NULL )
608             printf( "    i = %d; data = \"%d\"\n", i, (int) *data );
609         else
610             printf( "    i = %d; data = <none>\n", i );
611     }
612 
613     printf( "  printing all data by calling ht_process():\n " );
614     ht_process( ht, print_double );
615 
616     printf( "\n  destroying the hash table..." );
617     ht_destroy( ht );
618     printf( "done\n" );
619 
620     //
621     // char* key
622     //
623 
624     printf( "\n2. Testing a table with key of char* type\n\n" );
625 
626     printf( "  creating a table..." );
627     ht = ht_create_str( size );
628     printf( "done\n" );
629 
630     printf( "  inserting %d elements with deep copy of each data string...", size );
631     for ( i = 0; i < size; ++i )
632     {
633         char key[BUFSIZE];
634         char str[BUFSIZE];
635         char * data;
636 
637         sprintf( key, "%d-th key", i );
638         sprintf( str, "%d-th data", i );
639         data = strdup( str );
640         ht_insert( ht, key, data );
641     }
642     printf( "done\n" );
643 
644     printf( "  stats:\n" );
645     printf( "    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
646     printf( "    %f entries per hash value in use\n", (double) ht->n / ht->nhash );
647 
648     printf( "  finding and printing each 10th data:\n" );
649     for ( i = 0; i < size; i += 10 )
650     {
651         char key[BUFSIZE];
652         char * data;
653 
654         sprintf( key, "%d-th key", i );
655         data = ht_find( ht, key );
656         if ( data != NULL )
657             printf( "    i = %d; data = \"%s\"\n", i, data );
658         else
659             printf( "    i = %d; data = <none>\n", i );
660     }
661 
662     printf( "  removing every 3rd element..." );
663     for ( i = 0; i < size; i += 3 )
664     {
665         char key[BUFSIZE];
666 
667         sprintf( key, "%d-th key", i );
668         free( ht_delete( ht, key ) );
669     }
670     printf( "done\n" );
671 
672     printf( "  stats:\n" );
673     printf( "    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
674     printf( "    %f entries per hash value in use\n", (double) ht->n / ht->nhash );
675 
676     printf( "  finding and printing each 10th data:\n" );
677     for ( i = 0; i < size; i += 10 )
678     {
679         char key[BUFSIZE];
680         char * data;
681 
682         sprintf( key, "%d-th key", i );
683         data = ht_find( ht, key );
684         if ( data != NULL )
685             printf( "    i = %d; data = \"%s\"\n", i, data );
686         else
687             printf( "    i = %d; data = <none>\n", i );
688     }
689 
690     printf( "  printing all data by calling ht_process():\n " );
691     ht_process( ht, print_string );
692 
693     printf( "\n  freeing the remaining data by calling ht_process()..." );
694     ht_process( ht, free );
695     printf( "done\n" );
696 
697     printf( "  destroying the hash table..." );
698     ht_destroy( ht );
699     printf( "done\n" );
700 
701     return 0;
702 }
703 
704 #endif                          // HT_TEST
705