1 /******************************************************************************
2 *
3 * File: hash.c
4 *
5 * Purpose: Hash table implementation
6 *
7 * Author: Jerry Coffin
8 *
9 * Description: Public domain code by Jerry Coffin, with improvements by
10 * HenkJan Wolthuis.
11 * Date last modified: 05-Jul-1997
12 *
13 * Revisions: 18-09-2002 -- modified by Pavel Sakov
14 *
15 *****************************************************************************/
16
17 #include <string.h>
18 #include <stdlib.h>
19 #include <assert.h>
20 #include <math.h>
21 #include "hash.h"
22
23 #define INT_PER_DOUBLE 2
24 #define BYTE_PER_INT 4
25
26 /** A hash table consists of an array of these buckets.
27 */
28 typedef struct ht_bucket {
29 void* key;
30 void* data;
31 int id; /* unique id -- just in case */
32 struct ht_bucket* next;
33 } ht_bucket;
34
35 /** Hash table structure.
36 * Note that more nodes than `size' can be inserted in the table,
37 * but performance degrades as this happens.
38 */
39 struct hashtable {
40 int size; /* table size */
41 int n; /* current number of entries */
42 int naccum; /* number of inserted entries */
43 int nhash; /* number of used table elements */
44 ht_keycp cp;
45 ht_keyeq eq;
46 ht_key2hash hash;
47 ht_bucket** table;
48 };
49
50 /* Creates a hashtable of specified size.
51 */
ht_create(int size,ht_keycp cp,ht_keyeq eq,ht_key2hash hash)52 hashtable* ht_create(int size, ht_keycp cp, ht_keyeq eq, ht_key2hash hash)
53 {
54 hashtable* table = malloc(sizeof(hashtable));
55 ht_bucket** bucket;
56 int i;
57
58 assert(table != NULL);
59
60 if (size <= 0) {
61 free(table);
62 return NULL;
63 }
64
65 table->size = size;
66 table->table = malloc(sizeof(ht_bucket*) * size);
67 assert(table->table != NULL);
68 bucket = table->table;
69
70 if (bucket == NULL) {
71 free(table);
72 return NULL;
73 }
74
75 for (i = 0; i < size; ++i)
76 bucket[i] = NULL;
77 table->n = 0;
78 table->naccum = 0;
79 table->nhash = 0;
80 table->eq = eq;
81 table->cp = cp;
82 table->hash = hash;
83
84 return table;
85 }
86
87 /* Destroys a hash table.
88 * (Take care of deallocating data by ht_process() prior to destroying the
89 * table if necessary.)
90 *
91 * @param table Hash table to be destroyed
92 */
ht_destroy(hashtable * table)93 void ht_destroy(hashtable* table)
94 {
95 int i;
96
97 if (table == NULL)
98 return;
99
100 for (i = 0; i < table->size; ++i) {
101 ht_bucket* bucket;
102
103 for (bucket = (table->table)[i]; bucket != NULL;) {
104 ht_bucket* prev = bucket;
105
106 free(bucket->key);
107 bucket = bucket->next;
108 free(prev);
109 }
110 }
111
112 free(table->table);
113 free(table);
114 }
115
116 /* Inserts a new entry into the hash table.
117 *
118 * @param table The hash table
119 * @param key Ponter to entry's key
120 * @param data Pointer to associated data
121 * @return Pointer to the old data associated with the key, NULL if the key
122 * wasn't in the table previously
123 */
ht_insert(hashtable * table,void * key,void * data)124 void* ht_insert(hashtable* table, void* key, void* data)
125 {
126 unsigned int val = table->hash(key) % table->size;
127 ht_bucket* bucket;
128
129 /*
130 * NULL means this bucket hasn't been used yet. We'll simply allocate
131 * space for our new bucket and put our data there, with the table
132 * pointing at it.
133 */
134 if ((table->table)[val] == NULL) {
135 bucket = malloc(sizeof(ht_bucket));
136 assert(bucket != NULL);
137
138 bucket->key = table->cp(key);
139 bucket->next = NULL;
140 bucket->data = data;
141 bucket->id = table->naccum;
142
143 (table->table)[val] = bucket;
144 table->n++;
145 table->naccum++;
146 table->nhash++;
147
148 return NULL;
149 }
150
151 /*
152 * This spot in the table is already in use. See if the current string
153 * has already been inserted, and if so, return corresponding data.
154 */
155 for (bucket = (table->table)[val]; bucket != NULL; bucket = bucket->next)
156 if (table->eq(key, bucket->key) == 1) {
157 void* old_data = bucket->data;
158
159 bucket->data = data;
160 bucket->id = table->naccum;
161 table->naccum++;
162
163 return old_data;
164 }
165
166 /*
167 * This key must not be in the table yet. We'll add it to the head of
168 * the list at this spot in the hash table. Speed would be slightly
169 * improved if the list was kept sorted instead. In this case, this
170 * code would be moved into the loop above, and the insertion would take
171 * place as soon as it was determined that the present key in the list
172 * was larger than this one.
173 */
174 bucket = (ht_bucket*) malloc(sizeof(ht_bucket));
175 assert(bucket != NULL);
176 bucket->key = table->cp(key);
177 bucket->data = data;
178 bucket->next = (table->table)[val];
179 bucket->id = table->naccum;
180
181 (table->table)[val] = bucket;
182 table->n++;
183 table->naccum++;
184
185 return NULL;
186 }
187
188 /* Returns a pointer to the data associated with a key. If the key has
189 * not been inserted in the table, returns NULL.
190 *
191 * @param table The hash table
192 * @param key The key
193 * @return The associated data or NULL
194 */
ht_find(hashtable * table,void * key)195 void* ht_find(hashtable* table, void* key)
196 {
197 unsigned int val = table->hash(key) % table->size;
198 ht_bucket* bucket;
199
200 if ((table->table)[val] == NULL)
201 return NULL;
202
203 for (bucket = (table->table)[val]; bucket != NULL; bucket = bucket->next)
204 if (table->eq(key, bucket->key) == 1)
205 return bucket->data;
206
207 return NULL;
208 }
209
210 /* Deletes an entry from the table. Returns a pointer to the data that
211 * was associated with the key so that the calling code can dispose it
212 * properly.
213 *
214 * @param table The hash table
215 * @param key The key
216 * @return The associated data or NULL
217 */
ht_delete(hashtable * table,void * key)218 void* ht_delete(hashtable* table, void* key)
219 {
220 unsigned int val = table->hash(key) % table->size;
221 ht_bucket* prev;
222 ht_bucket* bucket;
223 void* data;
224
225 if ((table->table)[val] == NULL)
226 return NULL;
227
228 /*
229 * Traverse the list, keeping track of the previous node in the list.
230 * When we find the node to delete, we set the previous node's next
231 * pointer to point to the node after ourself instead. We then delete
232 * the key from the present node, and return a pointer to the data it
233 * contains.
234 */
235 for (prev = NULL, bucket = (table->table)[val]; bucket != NULL; prev = bucket, bucket = bucket->next) {
236 if (table->eq(key, bucket->key) == 1) {
237 data = bucket->data;
238 if (prev != NULL)
239 prev->next = bucket->next;
240 else {
241 /*
242 * If 'prev' still equals NULL, it means that we need to
243 * delete the first node in the list. This simply consists
244 * of putting our own 'next' pointer in the array holding
245 * the head of the list. We then dispose of the current
246 * node as above.
247 */
248 (table->table)[val] = bucket->next;
249 table->nhash--;
250 }
251 free(bucket->key);
252 free(bucket);
253 table->n--;
254
255 return data;
256 }
257 }
258
259 /*
260 * If we get here, it means we didn't find the item in the table. Signal
261 * this by returning NULL.
262 */
263 return NULL;
264 }
265
266 /* For each entry, calls a specified function with corresponding data as a
267 * parameter.
268 *
269 * @param table The hash table
270 * @param func The action function
271 */
ht_process(hashtable * table,void (* func)(void *))272 void ht_process(hashtable* table, void (*func) (void*))
273 {
274 int i;
275
276 for (i = 0; i < table->size; ++i)
277 if ((table->table)[i] != NULL) {
278 ht_bucket* bucket;
279
280 for (bucket = (table->table)[i]; bucket != NULL; bucket = bucket->next)
281 func(bucket->data);
282 }
283 }
284
285 /*
286 * functions for for string keys
287 */
288
strhash(void * key)289 static unsigned int strhash(void* key)
290 {
291 char* str = key;
292 unsigned int hashvalue = 0;
293
294 while (*str != 0) {
295 hashvalue ^= *(unsigned int*) str;
296 hashvalue <<= 1;
297 str++;
298 }
299
300 return hashvalue;
301 }
302
strcp(void * key)303 static void* strcp(void* key)
304 {
305 return strdup(key);
306 }
307
streq(void * key1,void * key2)308 static int streq(void* key1, void* key2)
309 {
310 return !strcmp(key1, key2);
311 }
312
313 /* functions for for double keys */
314
d1hash(void * key)315 static unsigned int d1hash(void* key)
316 {
317 unsigned int* v = key;
318
319 #if INT_PER_DOUBLE == 2
320 return v[0] + v[1];
321 #else
322 #error not implemented
323 #endif
324 }
325
d1cp(void * key)326 static void* d1cp(void* key)
327 {
328 double* newkey = malloc(sizeof(double));
329
330 *newkey = *(double*) key;
331
332 return newkey;
333 }
334
d1eq(void * key1,void * key2)335 static int d1eq(void* key1, void* key2)
336 {
337 return *(double*) key1 == *(double*) key2;
338 }
339
340 /*
341 * functions for for double[2] keys
342 */
343
d2hash(void * key)344 static unsigned int d2hash(void* key)
345 {
346 unsigned int* v = key;
347
348 #if INT_PER_DOUBLE == 2
349 /*
350 * PS: here multiplications suppose to make (a,b) and (b,a) generate
351 * different hash values
352 */
353 return v[0] + v[1] + v[2] * 3 + v[3] * 7;
354 #else
355 #error not implemented
356 #endif
357 }
358
d2cp(void * key)359 static void* d2cp(void* key)
360 {
361 double* newkey = malloc(sizeof(double) * 2);
362
363 newkey[0] = ((double*) key)[0];
364 newkey[1] = ((double*) key)[1];
365
366 return newkey;
367 }
368
d2eq(void * key1,void * key2)369 static int d2eq(void* key1, void* key2)
370 {
371 return (((double*) key1)[0] == ((double*) key2)[0]) && (((double*) key1)[1] == ((double*) key2)[1]);
372 }
373
374 /*
375 * functions for for int[1] keys
376 */
377
i1hash(void * key)378 static unsigned int i1hash(void* key)
379 {
380 return ((unsigned int*) key)[0];
381 }
382
i1cp(void * key)383 static void* i1cp(void* key)
384 {
385 int* newkey = malloc(sizeof(int));
386
387 newkey[0] = ((int*) key)[0];
388
389 return newkey;
390 }
391
i1eq(void * key1,void * key2)392 static int i1eq(void* key1, void* key2)
393 {
394 return (((int*) key1)[0] == ((int*) key2)[0]);
395 }
396
397 /*
398 * functions for for int[2] keys
399 */
400
i2hash(void * key)401 static unsigned int i2hash(void* key)
402 {
403 #if BYTE_PER_INT >= 4
404 unsigned int* v = key;
405
406 return v[0] + (v[1] << 16);
407 #else
408 #error not implemented
409 #endif
410 }
411
i2cp(void * key)412 static void* i2cp(void* key)
413 {
414 int* newkey = malloc(sizeof(int) * 2);
415
416 newkey[0] = ((int*) key)[0];
417 newkey[1] = ((int*) key)[1];
418
419 return newkey;
420 }
421
i2eq(void * key1,void * key2)422 static int i2eq(void* key1, void* key2)
423 {
424 return (((int*) key1)[0] == ((int*) key2)[0]) && (((int*) key1)[1] == ((int*) key2)[1]);
425 }
426
ht_create_d1(int size)427 hashtable* ht_create_d1(int size)
428 {
429 assert(sizeof(double) == INT_PER_DOUBLE * sizeof(int));
430 return ht_create(size, d1cp, d1eq, d1hash);
431 }
432
ht_create_d2(int size)433 hashtable* ht_create_d2(int size)
434 {
435 assert(sizeof(double) == INT_PER_DOUBLE * sizeof(int));
436 return ht_create(size, d2cp, d2eq, d2hash);
437 }
438
ht_create_str(int size)439 hashtable* ht_create_str(int size)
440 {
441 return ht_create(size, strcp, streq, strhash);
442 }
443
ht_create_i1(int size)444 hashtable* ht_create_i1(int size)
445 {
446 return ht_create(size, i1cp, i1eq, i1hash);
447 }
448
ht_create_i2(int size)449 hashtable* ht_create_i2(int size)
450 {
451 assert(sizeof(int) == BYTE_PER_INT);
452 return ht_create(size, i2cp, i2eq, i2hash);
453 }
454
ht_getnentries(hashtable * table)455 int ht_getnentries(hashtable* table)
456 {
457 return table->n;
458 }
459
ht_getsize(hashtable * table)460 int ht_getsize(hashtable* table)
461 {
462 return table->size;
463 }
464
ht_getnfilled(hashtable * table)465 int ht_getnfilled(hashtable* table)
466 {
467 return table->nhash;
468 }
469
470 #if defined(HT_TEST)
471
472 #include <stdio.h>
473 #include <limits.h>
474
475 #define BUFSIZE 1024
476
print_double(void * data)477 static void print_double(void* data)
478 {
479 printf(" \"%d\"", (int)* (double*) data);
480 }
481
print_string(void * data)482 static void print_string(void* data)
483 {
484 printf(" \"%s\"", (char*) data);
485 }
486
main()487 int main()
488 {
489 double points[] = {
490 922803.7855, 7372394.688, 0,
491 922849.2037, 7372307.027, 1,
492 922894.657, 7372219.306, 2,
493 922940.1475, 7372131.528, 3,
494 922985.6777, 7372043.692, 4,
495 923031.2501, 7371955.802, 5,
496 923076.8669, 7371867.857, 6,
497 923122.5307, 7371779.861, 7,
498 923168.2439, 7371691.816, 8,
499 923214.0091, 7371603.722, 9,
500 923259.8288, 7371515.583, 10,
501 922891.3958, 7372440.117, 11,
502 922936.873, 7372352.489, 12,
503 922982.3839, 7372264.804, 13,
504 923027.9308, 7372177.064, 14,
505 923073.5159, 7372089.268, 15,
506 923119.1415, 7372001.42, 16,
507 923164.8099, 7371913.521, 17,
508 923210.5233, 7371825.572, 18,
509 923256.2841, 7371737.575, 19,
510 923302.0946, 7371649.534, 20,
511 923347.9572, 7371561.45, 21,
512 922978.9747, 7372485.605, 22,
513 923024.5085, 7372398.009, 23,
514 923070.0748, 7372310.358, 24,
515 923115.6759, 7372222.654, 25,
516 923161.3136, 7372134.897, 26,
517 923206.9903, 7372047.09, 27,
518 923252.7079, 7371959.233, 28,
519 923298.4686, 7371871.33, 29,
520 923344.2745, 7371783.381, 30,
521 923390.1279, 7371695.389, 31,
522 923436.0309, 7371607.357, 32,
523 923066.5232, 7372531.148, 33,
524 923112.1115, 7372443.583, 34,
525 923157.7311, 7372355.966, 35,
526 923203.3842, 7372268.296, 36,
527 923249.0725, 7372180.577, 37,
528 923294.7981, 7372092.808, 38,
529 923340.5628, 7372004.993, 39,
530 923386.3686, 7371917.132, 40,
531 923432.2176, 7371829.229, 41,
532 923478.1116, 7371741.284, 42,
533 923524.0527, 7371653.302, 43,
534 923154.0423, 7372576.746, 44,
535 923199.6831, 7372489.211, 45,
536 923245.3541, 7372401.625, 46,
537 923291.0572, 7372313.989, 47,
538 923336.7941, 7372226.305, 48,
539 923382.5667, 7372138.574, 49,
540 923428.3766, 7372050.798, 50,
541 923474.2256, 7371962.978, 51,
542 923520.1155, 7371875.118, 52,
543 923566.0481, 7371787.218, 53,
544 923612.0252, 7371699.282, 54,
545 923241.533, 7372622.396, 55,
546 923287.2244, 7372534.889, 56,
547 923332.9449, 7372447.334, 57,
548 923378.6963, 7372359.731, 58,
549 923424.4801, 7372272.081, 59,
550 923470.2979, 7372184.385, 60,
551 923516.1513, 7372096.646, 61,
552 923562.0418, 7372008.866, 62,
553 923607.9709, 7371921.046, 63,
554 923653.9402, 7371833.188, 64,
555 923699.9514, 7371745.296, 65,
556 923328.9962, 7372668.095, 66,
557 923374.7365, 7372580.617, 67,
558 923420.5049, 7372493.091, 68,
559 923466.303, 7372405.519, 69,
560 923512.1321, 7372317.901, 70,
561 923557.9936, 7372230.24, 71,
562 923603.8889, 7372142.536, 72,
563 923649.8192, 7372054.793, 73,
564 923695.786, 7371967.011, 74,
565 923741.7905, 7371879.193, 75,
566 923787.8341, 7371791.342, 76,
567 923416.4327, 7372713.844, 77,
568 923462.2204, 7372626.393, 78,
569 923508.0353, 7372538.895, 79,
570 923553.8787, 7372451.353, 80,
571 923599.7517, 7372363.766, 81,
572 923645.6555, 7372276.137, 82,
573 923691.5914, 7372188.467, 83,
574 923737.5603, 7372100.757, 84,
575 923783.5634, 7372013.011, 85,
576 923829.6017, 7371925.231, 86,
577 923875.6763, 7371837.419, 87,
578 923503.8433, 7372759.64, 88,
579 923549.6771, 7372672.214, 89,
580 923595.5372, 7372584.744, 90,
581 923641.4246, 7372497.23, 91,
582 923687.3404, 7372409.673, 92,
583 923733.2855, 7372322.074, 93,
584 923779.2608, 7372234.436, 94,
585 923825.2672, 7372146.759, 95,
586 923871.3056, 7372059.047, 96,
587 923917.3766, 7371971.301, 97,
588 923963.4812, 7371883.524, 98,
589 923591.2288, 7372805.481, 99,
590 923637.1076, 7372718.081, 100,
591 923683.0118, 7372630.638, 101,
592 923728.9423, 7372543.151, 102,
593 923774.8998, 7372455.622, 103,
594 923820.8852, 7372368.052, 104,
595 923866.8991, 7372280.443, 105,
596 923912.9422, 7372192.797, 106,
597 923959.015, 7372105.116, 107,
598 924005.118, 7372017.402, 108,
599 924051.2518, 7371929.657, 109,
600 923678.5898, 7372851.367, 110,
601 923724.5126, 7372763.992, 111,
602 923770.46, 7372676.574, 112,
603 923816.4328, 7372589.113, 113,
604 923862.4314, 7372501.611, 114,
605 923908.4564, 7372414.069, 115,
606 923954.5083, 7372326.488, 116,
607 924000.5875, 7372238.87, 117,
608 924046.6941, 7372151.218, 118,
609 924092.8286, 7372063.533, 119,
610 924138.9911, 7371975.818, 120
611 };
612
613 int size = sizeof(points) / sizeof(double) / 3;
614 hashtable* ht;
615 int i;
616
617 /*
618 * double[2] key
619 */
620
621 printf("\n1. Testing a table with key of double[2] type\n\n");
622
623 printf(" creating a table...");
624 ht = ht_create_d2(size);
625 printf("done\n");
626
627 printf(" inserting %d values from a file...", size);
628 for (i = 0; i < size; ++i)
629 ht_insert(ht, &points[i * 3], &points[i * 3 + 2]);
630 printf("done\n");
631
632 printf(" stats:\n");
633 printf(" %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash);
634 printf(" %f entries per hash value in use\n", (double) ht->n / ht->nhash);
635
636 printf(" finding and printing each 10th data:\n");
637 for (i = 0; i < size; i += 10) {
638 double* point = &points[i * 3];
639 double* data = ht_find(ht, point);
640
641 if (data != NULL)
642 printf(" i = %d; data = \"%d\"\n", i, (int)* data);
643 else
644 printf(" i = %d; data = <none>\n", i);
645 }
646
647 printf(" removing every 3rd element...");
648 for (i = 0; i < size; i += 3) {
649 double* point = &points[i * 3];
650 ht_delete(ht, point);
651 }
652 printf("done\n");
653
654 printf(" stats:\n");
655 printf(" %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash);
656 printf(" %f entries per hash value in use\n", (double) ht->n / ht->nhash);
657
658 printf(" finding and printing each 10th data:\n");
659 for (i = 0; i < size; i += 10) {
660 double* point = &points[i * 3];
661 double* data = ht_find(ht, point);
662
663 if (data != NULL)
664 printf(" i = %d; data = \"%d\"\n", i, (int)* data);
665 else
666 printf(" i = %d; data = <none>\n", i);
667 }
668
669 printf(" printing all data by calling ht_process():\n ");
670 ht_process(ht, print_double);
671
672 printf("\n destroying the hash table...");
673 ht_destroy(ht);
674 printf("done\n");
675
676 /*
677 * char* key
678 */
679
680 printf("\n2. Testing a table with key of char* type\n\n");
681
682 printf(" creating a table...");
683 ht = ht_create_str(size);
684 printf("done\n");
685
686 printf(" inserting %d elements with deep copy of each data string...", size);
687 for (i = 0; i < size; ++i) {
688 char key[BUFSIZE];
689 char str[BUFSIZE];
690 char* data;
691
692 sprintf(key, "%d-th key", i);
693 sprintf(str, "%d-th data", i);
694 data = strdup(str);
695 ht_insert(ht, key, data);
696 }
697 printf("done\n");
698
699 printf(" stats:\n");
700 printf(" %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash);
701 printf(" %f entries per hash value in use\n", (double) ht->n / ht->nhash);
702
703 printf(" finding and printing each 10th data:\n");
704 for (i = 0; i < size; i += 10) {
705 char key[BUFSIZE];
706 char* data;
707
708 sprintf(key, "%d-th key", i);
709 data = ht_find(ht, key);
710 if (data != NULL)
711 printf(" i = %d; data = \"%s\"\n", i, data);
712 else
713 printf(" i = %d; data = <none>\n", i);
714 }
715
716 printf(" removing every 3rd element...");
717 for (i = 0; i < size; i += 3) {
718 char key[BUFSIZE];
719
720 sprintf(key, "%d-th key", i);
721 free(ht_delete(ht, key));
722 }
723 printf("done\n");
724
725 printf(" stats:\n");
726 printf(" %d entries, %d table elements, %d filled elements\n", ht->n, ht->size, ht->nhash);
727 printf(" %f entries per hash value in use\n", (double) ht->n / ht->nhash);
728
729 printf(" finding and printing each 10th data:\n");
730 for (i = 0; i < size; i += 10) {
731 char key[BUFSIZE];
732 char* data;
733
734 sprintf(key, "%d-th key", i);
735 data = ht_find(ht, key);
736 if (data != NULL)
737 printf(" i = %d; data = \"%s\"\n", i, data);
738 else
739 printf(" i = %d; data = <none>\n", i);
740 }
741
742 printf(" printing all data by calling ht_process():\n ");
743 ht_process(ht, print_string);
744
745 printf("\n freeing the remaining data by calling ht_process()...");
746 ht_process(ht, free);
747 printf("done\n");
748
749 printf(" destroying the hash table...");
750 ht_destroy(ht);
751 printf("done\n");
752
753 return 0;
754 }
755
756 #endif /* HT_TEST */
757