1 /******************************************************************************
2  *
3  * File:           hash.c
4  *
5  * Purpose:        Hash table implementation
6  *
7  * Author:         Jerry Coffin
8  *
9  * Description:    Public domain code by Jerry Coffin, with improvements by
10  *                 HenkJan Wolthuis.
11  *                 Date last modified: 05-Jul-1997
12  *
13  * Revisions:      18-09-2002 -- modified by Pavel Sakov
14  *
15  *****************************************************************************/
16 
17 #include <string.h>
18 #include <stdlib.h>
19 #include <assert.h>
20 #include <math.h>
21 #include "hash.h"
22 
23 #define INT_PER_DOUBLE 2
24 #define BYTE_PER_INT 4
25 
26 /** A hash table consists of an array of these buckets.
27  */
28 typedef struct ht_bucket {
29     void* key;
30     void* data;
31     int id;                     /* unique id -- just in case */
32     struct ht_bucket* next;
33 } ht_bucket;
34 
35 /** Hash table structure.
36  * Note that more nodes than `size' can be inserted in the table,
37  * but performance degrades as this happens.
38  */
39 struct hashtable {
40     int size;                   /* table size */
41     int n;                      /* current number of entries */
42     int naccum;                 /* number of inserted entries */
43     int nhash;                  /* number of used table elements */
44     ht_keycp cp;
45     ht_keyeq eq;
46     ht_key2hash hash;
47     ht_bucket** table;
48 };
49 
50 /* Creates a hashtable of specified size.
51  */
ht_create(int size,ht_keycp cp,ht_keyeq eq,ht_key2hash hash)52 hashtable* ht_create(int size, ht_keycp cp, ht_keyeq eq, ht_key2hash hash)
53 {
54     hashtable* table = malloc(sizeof(hashtable));
55     ht_bucket** bucket;
56     int i;
57 
58     assert(table != NULL);
59 
60     if (size <= 0) {
61         free(table);
62         return NULL;
63     }
64 
65     table->size = size;
66     table->table = malloc(sizeof(ht_bucket*) * size);
67     assert(table->table != NULL);
68     bucket = table->table;
69 
70     if (bucket == NULL) {
71         free(table);
72         return NULL;
73     }
74 
75     for (i = 0; i < size; ++i)
76         bucket[i] = NULL;
77     table->n = 0;
78     table->naccum = 0;
79     table->nhash = 0;
80     table->eq = eq;
81     table->cp = cp;
82     table->hash = hash;
83 
84     return table;
85 }
86 
87 /* Destroys a hash table.
88  * (Take care of deallocating data by ht_process() prior to destroying the
89  * table if necessary.)
90  *
91  * @param table Hash table to be destroyed
92  */
ht_destroy(hashtable * table)93 void ht_destroy(hashtable* table)
94 {
95     int i;
96 
97     if (table == NULL)
98         return;
99 
100     for (i = 0; i < table->size; ++i) {
101         ht_bucket* bucket;
102 
103         for (bucket = (table->table)[i]; bucket != NULL;) {
104             ht_bucket* prev = bucket;
105 
106             free(bucket->key);
107             bucket = bucket->next;
108             free(prev);
109         }
110     }
111 
112     free(table->table);
113     free(table);
114 }
115 
116 /* Inserts a new entry into the hash table.
117  *
118  * @param table The hash table
119  * @param key Ponter to entry's key
120  * @param data Pointer to associated data
121  * @return Pointer to the old data associated with the key, NULL if the key
122  *         wasn't in the table previously
123  */
ht_insert(hashtable * table,void * key,void * data)124 void* ht_insert(hashtable* table, void* key, void* data)
125 {
126     unsigned int val = table->hash(key) % table->size;
127     ht_bucket* bucket;
128 
129     /*
130      * NULL means this bucket hasn't been used yet.  We'll simply allocate
131      * space for our new bucket and put our data there, with the table
132      * pointing at it.
133      */
134     if ((table->table)[val] == NULL) {
135         bucket = malloc(sizeof(ht_bucket));
136         assert(bucket != NULL);
137 
138         bucket->key = table->cp(key);
139         bucket->next = NULL;
140         bucket->data = data;
141         bucket->id = table->naccum;
142 
143         (table->table)[val] = bucket;
144         table->n++;
145         table->naccum++;
146         table->nhash++;
147 
148         return NULL;
149     }
150 
151     /*
152      * This spot in the table is already in use.  See if the current string
153      * has already been inserted, and if so, return corresponding data.
154      */
155     for (bucket = (table->table)[val]; bucket != NULL; bucket = bucket->next)
156         if (table->eq(key, bucket->key) == 1) {
157             void* old_data = bucket->data;
158 
159             bucket->data = data;
160             bucket->id = table->naccum;
161             table->naccum++;
162 
163             return old_data;
164         }
165 
166     /*
167      * This key must not be in the table yet.  We'll add it to the head of
168      * the list at this spot in the hash table.  Speed would be slightly
169      * improved if the list was kept sorted instead.  In this case, this
170      * code would be moved into the loop above, and the insertion would take
171      * place as soon as it was determined that the present key in the list
172      * was larger than this one.
173      */
174     bucket = (ht_bucket*) malloc(sizeof(ht_bucket));
175     assert(bucket != NULL);
176     bucket->key = table->cp(key);
177     bucket->data = data;
178     bucket->next = (table->table)[val];
179     bucket->id = table->naccum;
180 
181     (table->table)[val] = bucket;
182     table->n++;
183     table->naccum++;
184 
185     return NULL;
186 }
187 
188 /* Returns a pointer to the data associated with a key.  If the key has
189  * not been inserted in the table, returns NULL.
190  *
191  * @param table The hash table
192  * @param key The key
193  * @return The associated data or NULL
194  */
ht_find(hashtable * table,void * key)195 void* ht_find(hashtable* table, void* key)
196 {
197     unsigned int val = table->hash(key) % table->size;
198     ht_bucket* bucket;
199 
200     if ((table->table)[val] == NULL)
201         return NULL;
202 
203     for (bucket = (table->table)[val]; bucket != NULL; bucket = bucket->next)
204         if (table->eq(key, bucket->key) == 1)
205             return bucket->data;
206 
207     return NULL;
208 }
209 
210 /* Deletes an entry from the table.  Returns a pointer to the data that
211  * was associated with the key so that the calling code can dispose it
212  * properly.
213  *
214  * @param table The hash table
215  * @param key The key
216  * @return The associated data or NULL
217  */
ht_delete(hashtable * table,void * key)218 void* ht_delete(hashtable* table, void* key)
219 {
220     unsigned int val = table->hash(key) % table->size;
221     ht_bucket* prev;
222     ht_bucket* bucket;
223     void* data;
224 
225     if ((table->table)[val] == NULL)
226         return NULL;
227 
228     /*
229      * Traverse the list, keeping track of the previous node in the list.
230      * When we find the node to delete, we set the previous node's next
231      * pointer to point to the node after ourself instead.  We then delete
232      * the key from the present node, and return a pointer to the data it
233      * contains.
234      */
235     for (prev = NULL, bucket = (table->table)[val]; bucket != NULL; prev = bucket, bucket = bucket->next) {
236         if (table->eq(key, bucket->key) == 1) {
237             data = bucket->data;
238             if (prev != NULL)
239                 prev->next = bucket->next;
240             else {
241                 /*
242                  * If 'prev' still equals NULL, it means that we need to
243                  * delete the first node in the list. This simply consists
244                  * of putting our own 'next' pointer in the array holding
245                  * the head of the list.  We then dispose of the current
246                  * node as above.
247                  */
248                 (table->table)[val] = bucket->next;
249                 table->nhash--;
250             }
251             free(bucket->key);
252             free(bucket);
253             table->n--;
254 
255             return data;
256         }
257     }
258 
259     /*
260      * If we get here, it means we didn't find the item in the table. Signal
261      * this by returning NULL.
262      */
263     return NULL;
264 }
265 
266 /* For each entry, calls a specified function with corresponding data as a
267  * parameter.
268  *
269  * @param table The hash table
270  * @param func The action function
271  */
ht_process(hashtable * table,void (* func)(void *))272 void ht_process(hashtable* table, void (*func) (void*))
273 {
274     int i;
275 
276     for (i = 0; i < table->size; ++i)
277         if ((table->table)[i] != NULL) {
278             ht_bucket* bucket;
279 
280             for (bucket = (table->table)[i]; bucket != NULL; bucket = bucket->next)
281                 func(bucket->data);
282         }
283 }
284 
285 /*
286  * functions for for string keys
287  */
288 
strhash(void * key)289 static unsigned int strhash(void* key)
290 {
291     char* str = key;
292     unsigned int hashvalue = 0;
293 
294     while (*str != 0) {
295         hashvalue ^= *(unsigned int*) str;
296         hashvalue <<= 1;
297         str++;
298     }
299 
300     return hashvalue;
301 }
302 
strcp(void * key)303 static void* strcp(void* key)
304 {
305     return strdup(key);
306 }
307 
streq(void * key1,void * key2)308 static int streq(void* key1, void* key2)
309 {
310     return !strcmp(key1, key2);
311 }
312 
313 /* functions for for double keys */
314 
d1hash(void * key)315 static unsigned int d1hash(void* key)
316 {
317     unsigned int* v = key;
318 
319 #if INT_PER_DOUBLE == 2
320     return v[0] + v[1];
321 #else
322 #error not implemented
323 #endif
324 }
325 
d1cp(void * key)326 static void* d1cp(void* key)
327 {
328     double* newkey = malloc(sizeof(double));
329 
330     *newkey = *(double*) key;
331 
332     return newkey;
333 }
334 
d1eq(void * key1,void * key2)335 static int d1eq(void* key1, void* key2)
336 {
337     return *(double*) key1 == *(double*) key2;
338 }
339 
340 /*
341  * functions for for double[2] keys
342  */
343 
d2hash(void * key)344 static unsigned int d2hash(void* key)
345 {
346     unsigned int* v = key;
347 
348 #if INT_PER_DOUBLE == 2
349     /*
350      * PS: here multiplications suppose to make (a,b) and (b,a) generate
351      * different hash values
352      */
353     return v[0] + v[1] + v[2] * 3 + v[3] * 7;
354 #else
355 #error not implemented
356 #endif
357 }
358 
d2cp(void * key)359 static void* d2cp(void* key)
360 {
361     double* newkey = malloc(sizeof(double) * 2);
362 
363     newkey[0] = ((double*) key)[0];
364     newkey[1] = ((double*) key)[1];
365 
366     return newkey;
367 }
368 
d2eq(void * key1,void * key2)369 static int d2eq(void* key1, void* key2)
370 {
371     return (((double*) key1)[0] == ((double*) key2)[0]) && (((double*) key1)[1] == ((double*) key2)[1]);
372 }
373 
374 /*
375  * functions for for int[1] keys
376  */
377 
i1hash(void * key)378 static unsigned int i1hash(void* key)
379 {
380     return ((unsigned int*) key)[0];
381 }
382 
i1cp(void * key)383 static void* i1cp(void* key)
384 {
385     int* newkey = malloc(sizeof(int));
386 
387     newkey[0] = ((int*) key)[0];
388 
389     return newkey;
390 }
391 
i1eq(void * key1,void * key2)392 static int i1eq(void* key1, void* key2)
393 {
394     return (((int*) key1)[0] == ((int*) key2)[0]);
395 }
396 
397 /*
398  * functions for for int[2] keys
399  */
400 
i2hash(void * key)401 static unsigned int i2hash(void* key)
402 {
403 #if BYTE_PER_INT >= 4
404     unsigned int* v = key;
405 
406     return v[0] + (v[1] << 16);
407 #else
408 #error not implemented
409 #endif
410 }
411 
i2cp(void * key)412 static void* i2cp(void* key)
413 {
414     int* newkey = malloc(sizeof(int) * 2);
415 
416     newkey[0] = ((int*) key)[0];
417     newkey[1] = ((int*) key)[1];
418 
419     return newkey;
420 }
421 
i2eq(void * key1,void * key2)422 static int i2eq(void* key1, void* key2)
423 {
424     return (((int*) key1)[0] == ((int*) key2)[0]) && (((int*) key1)[1] == ((int*) key2)[1]);
425 }
426 
ht_create_d1(int size)427 hashtable* ht_create_d1(int size)
428 {
429     assert(sizeof(double) == INT_PER_DOUBLE * sizeof(int));
430     return ht_create(size, d1cp, d1eq, d1hash);
431 }
432 
ht_create_d2(int size)433 hashtable* ht_create_d2(int size)
434 {
435     assert(sizeof(double) == INT_PER_DOUBLE * sizeof(int));
436     return ht_create(size, d2cp, d2eq, d2hash);
437 }
438 
ht_create_str(int size)439 hashtable* ht_create_str(int size)
440 {
441     return ht_create(size, strcp, streq, strhash);
442 }
443 
ht_create_i1(int size)444 hashtable* ht_create_i1(int size)
445 {
446     return ht_create(size, i1cp, i1eq, i1hash);
447 }
448 
ht_create_i2(int size)449 hashtable* ht_create_i2(int size)
450 {
451     assert(sizeof(int) == BYTE_PER_INT);
452     return ht_create(size, i2cp, i2eq, i2hash);
453 }
454 
ht_getnentries(hashtable * table)455 int ht_getnentries(hashtable* table)
456 {
457     return table->n;
458 }
459 
ht_getsize(hashtable * table)460 int ht_getsize(hashtable* table)
461 {
462     return table->size;
463 }
464 
ht_getnfilled(hashtable * table)465 int ht_getnfilled(hashtable* table)
466 {
467     return table->nhash;
468 }
469 
470 #if defined(HT_TEST)
471 
472 #include <stdio.h>
473 #include <limits.h>
474 
475 #define BUFSIZE 1024
476 
print_double(void * data)477 static void print_double(void* data)
478 {
479     printf(" \"%d\"", (int)* (double*) data);
480 }
481 
print_string(void * data)482 static void print_string(void* data)
483 {
484     printf(" \"%s\"", (char*) data);
485 }
486 
main()487 int main()
488 {
489     double points[] = {
490         922803.7855, 7372394.688, 0,
491         922849.2037, 7372307.027, 1,
492         922894.657, 7372219.306, 2,
493         922940.1475, 7372131.528, 3,
494         922985.6777, 7372043.692, 4,
495         923031.2501, 7371955.802, 5,
496         923076.8669, 7371867.857, 6,
497         923122.5307, 7371779.861, 7,
498         923168.2439, 7371691.816, 8,
499         923214.0091, 7371603.722, 9,
500         923259.8288, 7371515.583, 10,
501         922891.3958, 7372440.117, 11,
502         922936.873, 7372352.489, 12,
503         922982.3839, 7372264.804, 13,
504         923027.9308, 7372177.064, 14,
505         923073.5159, 7372089.268, 15,
506         923119.1415, 7372001.42, 16,
507         923164.8099, 7371913.521, 17,
508         923210.5233, 7371825.572, 18,
509         923256.2841, 7371737.575, 19,
510         923302.0946, 7371649.534, 20,
511         923347.9572, 7371561.45, 21,
512         922978.9747, 7372485.605, 22,
513         923024.5085, 7372398.009, 23,
514         923070.0748, 7372310.358, 24,
515         923115.6759, 7372222.654, 25,
516         923161.3136, 7372134.897, 26,
517         923206.9903, 7372047.09, 27,
518         923252.7079, 7371959.233, 28,
519         923298.4686, 7371871.33, 29,
520         923344.2745, 7371783.381, 30,
521         923390.1279, 7371695.389, 31,
522         923436.0309, 7371607.357, 32,
523         923066.5232, 7372531.148, 33,
524         923112.1115, 7372443.583, 34,
525         923157.7311, 7372355.966, 35,
526         923203.3842, 7372268.296, 36,
527         923249.0725, 7372180.577, 37,
528         923294.7981, 7372092.808, 38,
529         923340.5628, 7372004.993, 39,
530         923386.3686, 7371917.132, 40,
531         923432.2176, 7371829.229, 41,
532         923478.1116, 7371741.284, 42,
533         923524.0527, 7371653.302, 43,
534         923154.0423, 7372576.746, 44,
535         923199.6831, 7372489.211, 45,
536         923245.3541, 7372401.625, 46,
537         923291.0572, 7372313.989, 47,
538         923336.7941, 7372226.305, 48,
539         923382.5667, 7372138.574, 49,
540         923428.3766, 7372050.798, 50,
541         923474.2256, 7371962.978, 51,
542         923520.1155, 7371875.118, 52,
543         923566.0481, 7371787.218, 53,
544         923612.0252, 7371699.282, 54,
545         923241.533, 7372622.396, 55,
546         923287.2244, 7372534.889, 56,
547         923332.9449, 7372447.334, 57,
548         923378.6963, 7372359.731, 58,
549         923424.4801, 7372272.081, 59,
550         923470.2979, 7372184.385, 60,
551         923516.1513, 7372096.646, 61,
552         923562.0418, 7372008.866, 62,
553         923607.9709, 7371921.046, 63,
554         923653.9402, 7371833.188, 64,
555         923699.9514, 7371745.296, 65,
556         923328.9962, 7372668.095, 66,
557         923374.7365, 7372580.617, 67,
558         923420.5049, 7372493.091, 68,
559         923466.303, 7372405.519, 69,
560         923512.1321, 7372317.901, 70,
561         923557.9936, 7372230.24, 71,
562         923603.8889, 7372142.536, 72,
563         923649.8192, 7372054.793, 73,
564         923695.786, 7371967.011, 74,
565         923741.7905, 7371879.193, 75,
566         923787.8341, 7371791.342, 76,
567         923416.4327, 7372713.844, 77,
568         923462.2204, 7372626.393, 78,
569         923508.0353, 7372538.895, 79,
570         923553.8787, 7372451.353, 80,
571         923599.7517, 7372363.766, 81,
572         923645.6555, 7372276.137, 82,
573         923691.5914, 7372188.467, 83,
574         923737.5603, 7372100.757, 84,
575         923783.5634, 7372013.011, 85,
576         923829.6017, 7371925.231, 86,
577         923875.6763, 7371837.419, 87,
578         923503.8433, 7372759.64, 88,
579         923549.6771, 7372672.214, 89,
580         923595.5372, 7372584.744, 90,
581         923641.4246, 7372497.23, 91,
582         923687.3404, 7372409.673, 92,
583         923733.2855, 7372322.074, 93,
584         923779.2608, 7372234.436, 94,
585         923825.2672, 7372146.759, 95,
586         923871.3056, 7372059.047, 96,
587         923917.3766, 7371971.301, 97,
588         923963.4812, 7371883.524, 98,
589         923591.2288, 7372805.481, 99,
590         923637.1076, 7372718.081, 100,
591         923683.0118, 7372630.638, 101,
592         923728.9423, 7372543.151, 102,
593         923774.8998, 7372455.622, 103,
594         923820.8852, 7372368.052, 104,
595         923866.8991, 7372280.443, 105,
596         923912.9422, 7372192.797, 106,
597         923959.015, 7372105.116, 107,
598         924005.118, 7372017.402, 108,
599         924051.2518, 7371929.657, 109,
600         923678.5898, 7372851.367, 110,
601         923724.5126, 7372763.992, 111,
602         923770.46, 7372676.574, 112,
603         923816.4328, 7372589.113, 113,
604         923862.4314, 7372501.611, 114,
605         923908.4564, 7372414.069, 115,
606         923954.5083, 7372326.488, 116,
607         924000.5875, 7372238.87, 117,
608         924046.6941, 7372151.218, 118,
609         924092.8286, 7372063.533, 119,
610         924138.9911, 7371975.818, 120
611     };
612 
613     int size = sizeof(points) / sizeof(double) / 3;
614     hashtable* ht;
615     int i;
616 
617     /*
618      * double[2] key
619      */
620 
621     printf("\n1. Testing a table with key of double[2] type\n\n");
622 
623     printf("  creating a table...");
624     ht = ht_create_d2(size);
625     printf("done\n");
626 
627     printf("  inserting %d values from a file...", size);
628     for (i = 0; i < size; ++i)
629         ht_insert(ht, &points[i * 3], &points[i * 3 + 2]);
630     printf("done\n");
631 
632     printf("  stats:\n");
633     printf("    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash);
634     printf("    %f entries per hash value in use\n", (double) ht->n / ht->nhash);
635 
636     printf("  finding and printing each 10th data:\n");
637     for (i = 0; i < size; i += 10) {
638         double* point = &points[i * 3];
639         double* data = ht_find(ht, point);
640 
641         if (data != NULL)
642             printf("    i = %d; data = \"%d\"\n", i, (int)* data);
643         else
644             printf("    i = %d; data = <none>\n", i);
645     }
646 
647     printf("  removing every 3rd element...");
648     for (i = 0; i < size; i += 3) {
649         double* point = &points[i * 3];
650         ht_delete(ht, point);
651     }
652     printf("done\n");
653 
654     printf("  stats:\n");
655     printf("    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash);
656     printf("    %f entries per hash value in use\n", (double) ht->n / ht->nhash);
657 
658     printf("  finding and printing each 10th data:\n");
659     for (i = 0; i < size; i += 10) {
660         double* point = &points[i * 3];
661         double* data = ht_find(ht, point);
662 
663         if (data != NULL)
664             printf("    i = %d; data = \"%d\"\n", i, (int)* data);
665         else
666             printf("    i = %d; data = <none>\n", i);
667     }
668 
669     printf("  printing all data by calling ht_process():\n ");
670     ht_process(ht, print_double);
671 
672     printf("\n  destroying the hash table...");
673     ht_destroy(ht);
674     printf("done\n");
675 
676     /*
677      * char* key
678      */
679 
680     printf("\n2. Testing a table with key of char* type\n\n");
681 
682     printf("  creating a table...");
683     ht = ht_create_str(size);
684     printf("done\n");
685 
686     printf("  inserting %d elements with deep copy of each data string...", size);
687     for (i = 0; i < size; ++i) {
688         char key[BUFSIZE];
689         char str[BUFSIZE];
690         char* data;
691 
692         sprintf(key, "%d-th key", i);
693         sprintf(str, "%d-th data", i);
694         data = strdup(str);
695         ht_insert(ht, key, data);
696     }
697     printf("done\n");
698 
699     printf("  stats:\n");
700     printf("    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash);
701     printf("    %f entries per hash value in use\n", (double) ht->n / ht->nhash);
702 
703     printf("  finding and printing each 10th data:\n");
704     for (i = 0; i < size; i += 10) {
705         char key[BUFSIZE];
706         char* data;
707 
708         sprintf(key, "%d-th key", i);
709         data = ht_find(ht, key);
710         if (data != NULL)
711             printf("    i = %d; data = \"%s\"\n", i, data);
712         else
713             printf("    i = %d; data = <none>\n", i);
714     }
715 
716     printf("  removing every 3rd element...");
717     for (i = 0; i < size; i += 3) {
718         char key[BUFSIZE];
719 
720         sprintf(key, "%d-th key", i);
721         free(ht_delete(ht, key));
722     }
723     printf("done\n");
724 
725     printf("  stats:\n");
726     printf("    %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash);
727     printf("    %f entries per hash value in use\n", (double) ht->n / ht->nhash);
728 
729     printf("  finding and printing each 10th data:\n");
730     for (i = 0; i < size; i += 10) {
731         char key[BUFSIZE];
732         char* data;
733 
734         sprintf(key, "%d-th key", i);
735         data = ht_find(ht, key);
736         if (data != NULL)
737             printf("    i = %d; data = \"%s\"\n", i, data);
738         else
739             printf("    i = %d; data = <none>\n", i);
740     }
741 
742     printf("  printing all data by calling ht_process():\n ");
743     ht_process(ht, print_string);
744 
745     printf("\n  freeing the remaining data by calling ht_process()...");
746     ht_process(ht, free);
747     printf("done\n");
748 
749     printf("  destroying the hash table...");
750     ht_destroy(ht);
751     printf("done\n");
752 
753     return 0;
754 }
755 
756 #endif                          /* HT_TEST */
757