1 //--------------------------------------------------------------------------
2 //
3 // File: hash.c
4 //
5 // Purpose: Hash table implementation
6 //
7 // Author: Jerry Coffin
8 //
9 // Description: Public domain code by Jerry Coffin, with improvements by
10 // HenkJan Wolthuis.
11 // Date last modified: 05-Jul-1997
12 //
13 // Revisions: 18-09-2002 -- modified by Pavel Sakov
14 //
15 //--------------------------------------------------------------------------
16
17 #include <string.h>
18 #include <stdlib.h>
19 #include <assert.h>
20 #include "hash.h"
21
22 #define INT_PER_DOUBLE 2
23
24 //* A hash table consists of an array of these buckets.
25 //
26 typedef struct ht_bucket
27 {
28 void * key;
29 void * data;
30 int id; // unique id -- just in case
31 struct ht_bucket* next;
32 } ht_bucket;
33
34 //* Hash table structure.
35 // Note that more nodes than `size' can be inserted in the table,
36 // but performance degrades as this happens.
37 //
38 struct hashtable
39 {
40 int size; // table size
41 int n; // current number of entries
42 int naccum; // number of inserted entries
43 int nhash; // number of used table elements
44 ht_keycp cp;
45 ht_keyeq eq;
46 ht_key2hash hash;
47 ht_bucket ** table;
48 };
49
50 int d1eq( void* key1, void* key2 );
51
52 // Creates a hashtable of specified size.
53 //
ht_create(int size,ht_keycp cp,ht_keyeq eq,ht_key2hash hash)54 hashtable* ht_create( int size, ht_keycp cp, ht_keyeq eq, ht_key2hash hash )
55 {
56 hashtable* table = malloc( sizeof ( hashtable ) );
57 ht_bucket** bucket;
58 int i;
59
60 assert( sizeof ( double ) == INT_PER_DOUBLE * sizeof ( int ) );
61 //
62 // (used in d1hash() and d2hash())
63 //
64
65 if ( table == NULL )
66 return NULL;
67
68 if ( size <= 0 )
69 {
70 free( table );
71 return NULL;
72 }
73
74 table->size = size;
75 table->table = malloc( sizeof ( ht_bucket* ) * (size_t) size );
76 bucket = table->table;
77
78 if ( bucket == NULL )
79 {
80 free( table );
81 return NULL;
82 }
83
84 for ( i = 0; i < size; ++i )
85 bucket[i] = NULL;
86 table->n = 0;
87 table->naccum = 0;
88 table->nhash = 0;
89 table->eq = eq;
90 table->cp = cp;
91 table->hash = hash;
92
93 return table;
94 }
95
96 // Destroys a hash table.
97 // (Take care of deallocating data by ht_process() prior to destroying the
98 // table if necessary.)
99 //
100 // @param table Hash table to be destroyed
101 //
ht_destroy(hashtable * table)102 void ht_destroy( hashtable* table )
103 {
104 int i;
105
106 if ( table == NULL )
107 return;
108
109 for ( i = 0; i < table->size; ++i )
110 {
111 ht_bucket* bucket;
112
113 for ( bucket = ( table->table )[i]; bucket != NULL; )
114 {
115 ht_bucket* prev = bucket;
116
117 free( bucket->key );
118 bucket = bucket->next;
119 free( prev );
120 }
121 }
122
123 free( table->table );
124 free( table );
125 }
126
127 // Inserts a new entry into the hash table.
128 //
129 // @param table The hash table
130 // @param key Ponter to entry's key
131 // @param data Pointer to associated data
132 // @return Pointer to the old data associated with the key, NULL if the key
133 // wasn't in the table previously
134 //
ht_insert(hashtable * table,void * key,void * data)135 void* ht_insert( hashtable* table, void* key, void* data )
136 {
137 unsigned int val = table->hash( key ) % (unsigned int) table->size;
138 ht_bucket * bucket;
139
140 //
141 // NULL means this bucket hasn't been used yet. We'll simply allocate
142 // space for our new bucket and put our data there, with the table
143 // pointing at it.
144 //
145 if ( ( table->table )[val] == NULL )
146 {
147 bucket = malloc( sizeof ( ht_bucket ) );
148 if ( bucket == NULL )
149 return NULL;
150
151 bucket->key = table->cp( key );
152 bucket->next = NULL;
153 bucket->data = data;
154 bucket->id = table->naccum;
155
156 ( table->table )[val] = bucket;
157 table->n++;
158 table->naccum++;
159 table->nhash++;
160
161 return bucket->data;
162 }
163
164 //
165 // This spot in the table is already in use. See if the current string
166 // has already been inserted, and if so, return corresponding data.
167 //
168 for ( bucket = ( table->table )[val]; bucket != NULL; bucket = bucket->next )
169 if ( table->eq( key, bucket->key ) == 1 )
170 {
171 void* old_data = bucket->data;
172
173 bucket->data = data;
174 bucket->id = table->naccum;
175 table->naccum++;
176
177 return old_data;
178 }
179
180 //
181 // This key must not be in the table yet. We'll add it to the head of
182 // the list at this spot in the hash table. Speed would be slightly
183 // improved if the list was kept sorted instead. In this case, this
184 // code would be moved into the loop above, and the insertion would take
185 // place as soon as it was determined that the present key in the list
186 // was larger than this one.
187 //
188 bucket = (ht_bucket *) malloc( sizeof ( ht_bucket ) );
189 if ( bucket == NULL )
190 return 0;
191 bucket->key = table->cp( key );
192 bucket->data = data;
193 bucket->next = ( table->table )[val];
194 bucket->id = table->naccum;
195
196 ( table->table )[val] = bucket;
197 table->n++;
198 table->naccum++;
199
200 return data;
201 }
202
203 // Returns a pointer to the data associated with a key. If the key has
204 // not been inserted in the table, returns NULL.
205 //
206 // @param table The hash table
207 // @param key The key
208 // @return The associated data or NULL
209 //
ht_find(hashtable * table,void * key)210 void* ht_find( hashtable* table, void* key )
211 {
212 unsigned int val = table->hash( key ) % (unsigned int) table->size;
213 ht_bucket * bucket;
214
215 if ( ( table->table )[val] == NULL )
216 return NULL;
217
218 for ( bucket = ( table->table )[val]; bucket != NULL; bucket = bucket->next )
219 if ( table->eq( key, bucket->key ) == 1 )
220 return bucket->data;
221
222 return NULL;
223 }
224
225 // Deletes an entry from the table. Returns a pointer to the data that
226 // was associated with the key so that the calling code can dispose it
227 // properly.
228 //
229 // @param table The hash table
230 // @param key The key
231 // @return The associated data or NULL
232 //
ht_delete(hashtable * table,void * key)233 void* ht_delete( hashtable* table, void* key )
234 {
235 unsigned int val = table->hash( key ) % (unsigned int) table->size;
236 ht_bucket * prev;
237 ht_bucket * bucket;
238 void * data;
239
240 if ( ( table->table )[val] == NULL )
241 return NULL;
242
243 //
244 // Traverse the list, keeping track of the previous node in the list.
245 // When we find the node to delete, we set the previous node's next
246 // pointer to point to the node after ourself instead. We then delete
247 // the key from the present node, and return a pointer to the data it
248 // contains.
249 //
250 for ( prev = NULL, bucket = ( table->table )[val]; bucket != NULL; prev = bucket, bucket = bucket->next )
251 {
252 if ( table->eq( key, bucket->key ) == 1 )
253 {
254 data = bucket->data;
255 if ( prev != NULL )
256 prev->next = bucket->next;
257 else
258 {
259 //
260 // If 'prev' still equals NULL, it means that we need to
261 // delete the first node in the list. This simply consists
262 // of putting our own 'next' pointer in the array holding
263 // the head of the list. We then dispose of the current
264 // node as above.
265 //
266 ( table->table )[val] = bucket->next;
267 table->nhash--;
268 }
269 free( bucket->key );
270 free( bucket );
271 table->n--;
272
273 return data;
274 }
275 }
276
277 //
278 // If we get here, it means we didn't find the item in the table. Signal
279 // this by returning NULL.
280 //
281 return NULL;
282 }
283
284 // For each entry, calls a specified function with corresponding data as a
285 // parameter.
286 //
287 // @param table The hash table
288 // @param func The action function
289 //
ht_process(hashtable * table,void (* func)(void *))290 void ht_process( hashtable* table, void ( *func )( void* ) )
291 {
292 int i;
293
294 for ( i = 0; i < table->size; ++i )
295 if ( ( table->table )[i] != NULL )
296 {
297 ht_bucket* bucket;
298
299 for ( bucket = ( table->table )[i]; bucket != NULL; bucket = bucket->next )
300 func( bucket->data );
301 }
302 }
303
304 //
305 // functions for for string keys
306 //
307
strhash(void * key)308 static unsigned int strhash( void* key )
309 {
310 char * str = (char *) key;
311 unsigned int hashvalue = 0;
312
313 while ( *str != 0 )
314 {
315 hashvalue ^= *(unsigned int *) str;
316 hashvalue <<= 1;
317 str++;
318 }
319
320 return hashvalue;
321 }
322
strcp(void * key)323 static void* strcp( void* key )
324 {
325 return (void *) strdup( (const char *) key );
326 }
327
streq(void * key1,void * key2)328 static int streq( void* key1, void* key2 )
329 {
330 return !strcmp( key1, key2 );
331 }
332
333 // functions for for double keys
334
d1hash(void * key)335 static unsigned int d1hash( void* key )
336 {
337 unsigned int* v = (unsigned int *) key;
338
339 #if INT_PER_DOUBLE == 2
340 return v[0] + v[1];
341 #else
342 #error not implemented
343 #endif
344 }
345
d1cp(void * key)346 static void* d1cp( void* key )
347 {
348 double* newkey = malloc( sizeof ( double ) );
349
350 *newkey = *(double *) key;
351
352 return newkey;
353 }
354
d1eq(void * key1,void * key2)355 int d1eq( void* key1, void* key2 )
356 {
357 return *(double *) key1 == *(double *) key2;
358 }
359
360 //
361 // functions for for double[2] keys
362 //
363
364 #include "math.h"
365
d2hash(void * key)366 static unsigned int d2hash( void* key )
367 {
368 unsigned int* v = (unsigned int *) key;
369
370 #if INT_PER_DOUBLE == 2
371 //
372 // PS: here multiplications suppose to make (a,b) and (b,a) generate
373 // different hash values
374 //
375 return v[0] + v[1] + v[2] * 3 + v[3] * 7;
376 #else
377 #error not implemented
378 #endif
379 }
380
d2cp(void * key)381 static void* d2cp( void* key )
382 {
383 double* newkey = malloc( sizeof ( double ) * 2 );
384
385 newkey[0] = ( (double *) key )[0];
386 newkey[1] = ( (double *) key )[1];
387
388 return newkey;
389 }
390
d2eq(void * key1,void * key2)391 static int d2eq( void* key1, void* key2 )
392 {
393 return ( ( (double *) key1 )[0] == ( (double *) key2 )[0] ) && ( ( (double *) key1 )[1] == ( (double *) key2 )[1] );
394 }
395
ht_create_d1(int size)396 hashtable* ht_create_d1( int size )
397 {
398 return ht_create( size, d1cp, d1eq, d1hash );
399 }
400
ht_create_d2(int size)401 hashtable* ht_create_d2( int size )
402 {
403 return ht_create( size, d2cp, d2eq, d2hash );
404 }
405
ht_create_str(int size)406 hashtable* ht_create_str( int size )
407 {
408 return ht_create( size, strcp, streq, strhash );
409 }
410
411 #ifdef HT_TEST
412
413 #include <stdio.h>
414 #include <limits.h>
415
416 #define BUFSIZE 1024
417
print_double(void * data)418 static void print_double( void* data )
419 {
420 printf( " \"%d\"", (int) *(double *) data );
421 }
422
print_string(void * data)423 static void print_string( void* data )
424 {
425 printf( " \"%s\"", (char *) data );
426 }
427
main()428 int main()
429 {
430 double points[] = {
431 922803.7855, 7372394.688, 0,
432 922849.2037, 7372307.027, 1,
433 922894.657, 7372219.306, 2,
434 922940.1475, 7372131.528, 3,
435 922985.6777, 7372043.692, 4,
436 923031.2501, 7371955.802, 5,
437 923076.8669, 7371867.857, 6,
438 923122.5307, 7371779.861, 7,
439 923168.2439, 7371691.816, 8,
440 923214.0091, 7371603.722, 9,
441 923259.8288, 7371515.583, 10,
442 922891.3958, 7372440.117, 11,
443 922936.873, 7372352.489, 12,
444 922982.3839, 7372264.804, 13,
445 923027.9308, 7372177.064, 14,
446 923073.5159, 7372089.268, 15,
447 923119.1415, 7372001.42, 16,
448 923164.8099, 7371913.521, 17,
449 923210.5233, 7371825.572, 18,
450 923256.2841, 7371737.575, 19,
451 923302.0946, 7371649.534, 20,
452 923347.9572, 7371561.45, 21,
453 922978.9747, 7372485.605, 22,
454 923024.5085, 7372398.009, 23,
455 923070.0748, 7372310.358, 24,
456 923115.6759, 7372222.654, 25,
457 923161.3136, 7372134.897, 26,
458 923206.9903, 7372047.09, 27,
459 923252.7079, 7371959.233, 28,
460 923298.4686, 7371871.33, 29,
461 923344.2745, 7371783.381, 30,
462 923390.1279, 7371695.389, 31,
463 923436.0309, 7371607.357, 32,
464 923066.5232, 7372531.148, 33,
465 923112.1115, 7372443.583, 34,
466 923157.7311, 7372355.966, 35,
467 923203.3842, 7372268.296, 36,
468 923249.0725, 7372180.577, 37,
469 923294.7981, 7372092.808, 38,
470 923340.5628, 7372004.993, 39,
471 923386.3686, 7371917.132, 40,
472 923432.2176, 7371829.229, 41,
473 923478.1116, 7371741.284, 42,
474 923524.0527, 7371653.302, 43,
475 923154.0423, 7372576.746, 44,
476 923199.6831, 7372489.211, 45,
477 923245.3541, 7372401.625, 46,
478 923291.0572, 7372313.989, 47,
479 923336.7941, 7372226.305, 48,
480 923382.5667, 7372138.574, 49,
481 923428.3766, 7372050.798, 50,
482 923474.2256, 7371962.978, 51,
483 923520.1155, 7371875.118, 52,
484 923566.0481, 7371787.218, 53,
485 923612.0252, 7371699.282, 54,
486 923241.533, 7372622.396, 55,
487 923287.2244, 7372534.889, 56,
488 923332.9449, 7372447.334, 57,
489 923378.6963, 7372359.731, 58,
490 923424.4801, 7372272.081, 59,
491 923470.2979, 7372184.385, 60,
492 923516.1513, 7372096.646, 61,
493 923562.0418, 7372008.866, 62,
494 923607.9709, 7371921.046, 63,
495 923653.9402, 7371833.188, 64,
496 923699.9514, 7371745.296, 65,
497 923328.9962, 7372668.095, 66,
498 923374.7365, 7372580.617, 67,
499 923420.5049, 7372493.091, 68,
500 923466.303, 7372405.519, 69,
501 923512.1321, 7372317.901, 70,
502 923557.9936, 7372230.24, 71,
503 923603.8889, 7372142.536, 72,
504 923649.8192, 7372054.793, 73,
505 923695.786, 7371967.011, 74,
506 923741.7905, 7371879.193, 75,
507 923787.8341, 7371791.342, 76,
508 923416.4327, 7372713.844, 77,
509 923462.2204, 7372626.393, 78,
510 923508.0353, 7372538.895, 79,
511 923553.8787, 7372451.353, 80,
512 923599.7517, 7372363.766, 81,
513 923645.6555, 7372276.137, 82,
514 923691.5914, 7372188.467, 83,
515 923737.5603, 7372100.757, 84,
516 923783.5634, 7372013.011, 85,
517 923829.6017, 7371925.231, 86,
518 923875.6763, 7371837.419, 87,
519 923503.8433, 7372759.64, 88,
520 923549.6771, 7372672.214, 89,
521 923595.5372, 7372584.744, 90,
522 923641.4246, 7372497.23, 91,
523 923687.3404, 7372409.673, 92,
524 923733.2855, 7372322.074, 93,
525 923779.2608, 7372234.436, 94,
526 923825.2672, 7372146.759, 95,
527 923871.3056, 7372059.047, 96,
528 923917.3766, 7371971.301, 97,
529 923963.4812, 7371883.524, 98,
530 923591.2288, 7372805.481, 99,
531 923637.1076, 7372718.081, 100,
532 923683.0118, 7372630.638, 101,
533 923728.9423, 7372543.151, 102,
534 923774.8998, 7372455.622, 103,
535 923820.8852, 7372368.052, 104,
536 923866.8991, 7372280.443, 105,
537 923912.9422, 7372192.797, 106,
538 923959.015, 7372105.116, 107,
539 924005.118, 7372017.402, 108,
540 924051.2518, 7371929.657, 109,
541 923678.5898, 7372851.367, 110,
542 923724.5126, 7372763.992, 111,
543 923770.46, 7372676.574, 112,
544 923816.4328, 7372589.113, 113,
545 923862.4314, 7372501.611, 114,
546 923908.4564, 7372414.069, 115,
547 923954.5083, 7372326.488, 116,
548 924000.5875, 7372238.87, 117,
549 924046.6941, 7372151.218, 118,
550 924092.8286, 7372063.533, 119,
551 924138.9911, 7371975.818, 120
552 };
553
554 int size = sizeof ( points ) / sizeof ( double ) / 3;
555 hashtable* ht;
556 int i;
557
558 //
559 // double[2] key
560 //
561
562 printf( "\n1. Testing a table with key of double[2] type\n\n" );
563
564 printf( " creating a table..." );
565 ht = ht_create_d2( size );
566 printf( "done\n" );
567
568 printf( " inserting %d values from a file...", size );
569 for ( i = 0; i < size; ++i )
570 ht_insert( ht, &points[i * 3], &points[i * 3 + 2] );
571 printf( "done\n" );
572
573 printf( " stats:\n" );
574 printf( " %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
575 printf( " %f entries per hash value in use\n", (double) ht->n / ht->nhash );
576
577 printf( " finding and printing each 10th data:\n" );
578 for ( i = 0; i < size; i += 10 )
579 {
580 double* point = &points[i * 3];
581 double* data = ht_find( ht, point );
582
583 if ( data != NULL )
584 printf( " i = %d; data = \"%d\"\n", i, (int) *data );
585 else
586 printf( " i = %d; data = <none>\n", i );
587 }
588
589 printf( " removing every 3rd element..." );
590 for ( i = 0; i < size; i += 3 )
591 {
592 double* point = &points[i * 3];
593 ht_delete( ht, point );
594 }
595 printf( "done\n" );
596
597 printf( " stats:\n" );
598 printf( " %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
599 printf( " %f entries per hash value in use\n", (double) ht->n / ht->nhash );
600
601 printf( " finding and printing each 10th data:\n" );
602 for ( i = 0; i < size; i += 10 )
603 {
604 double* point = &points[i * 3];
605 double* data = ht_find( ht, point );
606
607 if ( data != NULL )
608 printf( " i = %d; data = \"%d\"\n", i, (int) *data );
609 else
610 printf( " i = %d; data = <none>\n", i );
611 }
612
613 printf( " printing all data by calling ht_process():\n " );
614 ht_process( ht, print_double );
615
616 printf( "\n destroying the hash table..." );
617 ht_destroy( ht );
618 printf( "done\n" );
619
620 //
621 // char* key
622 //
623
624 printf( "\n2. Testing a table with key of char* type\n\n" );
625
626 printf( " creating a table..." );
627 ht = ht_create_str( size );
628 printf( "done\n" );
629
630 printf( " inserting %d elements with deep copy of each data string...", size );
631 for ( i = 0; i < size; ++i )
632 {
633 char key[BUFSIZE];
634 char str[BUFSIZE];
635 char * data;
636
637 sprintf( key, "%d-th key", i );
638 sprintf( str, "%d-th data", i );
639 data = strdup( str );
640 ht_insert( ht, key, data );
641 }
642 printf( "done\n" );
643
644 printf( " stats:\n" );
645 printf( " %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
646 printf( " %f entries per hash value in use\n", (double) ht->n / ht->nhash );
647
648 printf( " finding and printing each 10th data:\n" );
649 for ( i = 0; i < size; i += 10 )
650 {
651 char key[BUFSIZE];
652 char * data;
653
654 sprintf( key, "%d-th key", i );
655 data = ht_find( ht, key );
656 if ( data != NULL )
657 printf( " i = %d; data = \"%s\"\n", i, data );
658 else
659 printf( " i = %d; data = <none>\n", i );
660 }
661
662 printf( " removing every 3rd element..." );
663 for ( i = 0; i < size; i += 3 )
664 {
665 char key[BUFSIZE];
666
667 sprintf( key, "%d-th key", i );
668 free( ht_delete( ht, key ) );
669 }
670 printf( "done\n" );
671
672 printf( " stats:\n" );
673 printf( " %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash );
674 printf( " %f entries per hash value in use\n", (double) ht->n / ht->nhash );
675
676 printf( " finding and printing each 10th data:\n" );
677 for ( i = 0; i < size; i += 10 )
678 {
679 char key[BUFSIZE];
680 char * data;
681
682 sprintf( key, "%d-th key", i );
683 data = ht_find( ht, key );
684 if ( data != NULL )
685 printf( " i = %d; data = \"%s\"\n", i, data );
686 else
687 printf( " i = %d; data = <none>\n", i );
688 }
689
690 printf( " printing all data by calling ht_process():\n " );
691 ht_process( ht, print_string );
692
693 printf( "\n freeing the remaining data by calling ht_process()..." );
694 ht_process( ht, free );
695 printf( "done\n" );
696
697 printf( " destroying the hash table..." );
698 ht_destroy( ht );
699 printf( "done\n" );
700
701 return 0;
702 }
703
704 #endif // HT_TEST
705